diff --git a/README copy.md b/README copy.md new file mode 100644 index 0000000000000000000000000000000000000000..7e2cae89414a353a034eea6212b8bc8f479dca55 --- /dev/null +++ b/README copy.md @@ -0,0 +1,13 @@ +--- +title: Snnetv2 Semantic Segmentation +emoji: 🐨 +colorFrom: green +colorTo: red +sdk: gradio +sdk_version: 4.14.0 +app_file: app.py +pinned: false +license: apache-2.0 +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..f1681ae913d80f04bdf4be6d86eda68d46bda54f --- /dev/null +++ b/app.py @@ -0,0 +1,251 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser + +import cv2 +from mmengine.model.utils import revert_sync_batchnorm + +from mmseg.apis import inference_model, init_model +from mmseg.apis.inference import show_result_pyplot +import torch +import time +import gradio as gr +import plotly.express as px +import json + +def main(): + parser = ArgumentParser() + parser.add_argument('--config', default='configs/snnet/setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.py', help='Config file') + parser.add_argument('--checkpoint', help='Checkpoint file', default='setr_naive_512x512_160k_b16_ade20k_snnetv2_deit3_s_l_lora_16_iter_160000.pth') + # parser.add_argument('--video', help='Video file or webcam id') + + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--palette', + default='cityscapes', + help='Color palette used for segmentation map') + parser.add_argument( + '--show', action='store_true', help='Whether to show draw result') + parser.add_argument( + '--show-wait-time', default=1, type=int, help='Wait time after imshow') + parser.add_argument( + '--output-file', default=None, type=str, help='Output video file path') + parser.add_argument( + '--output-fourcc', + default='MJPG', + type=str, + help='Fourcc of the output video') + parser.add_argument( + '--output-fps', default=30, type=int, help='FPS of the output video') + parser.add_argument( + '--output-height', + default=-1, + type=int, + help='Frame height of the output video') + parser.add_argument( + '--output-width', + default=-1, + type=int, + help='Frame width of the output video') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + args = parser.parse_args() + + # build the model from a config file and a checkpoint file + model = init_model(args.config, args.checkpoint, device=args.device) + if args.device == 'cpu': + model = revert_sync_batchnorm(model) + + from mmseg.models.backbones.snnet import get_stitch_configs_bidirection + stitch_configs_info, _, _, anchor_ids, sl_ids, ls_ids, lsl_ids, sls_ids = get_stitch_configs_bidirection([12, 24]) + + stitch_configs_info = {i: cfg for i, cfg in enumerate(stitch_configs_info)} + + + with open('./model_flops/snnet_flops_setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.json', 'r') as f: + flops_params = json.load(f) + + with open('./results/eval_single_scale_20230507_235400.json', 'r') as f: + results = json.load(f) + + config_ids = list(results.keys()) + flops_res = {} + eval_res = {} + total_data = {} + for i, cfg_id in enumerate(config_ids): + flops = flops_params[cfg_id] + miou_res = results[cfg_id]['metric']['mIoU'] * 100 + eval_res[int(cfg_id)] = miou_res + flops_res[int(cfg_id)] = flops / 1e9 + total_data[int(cfg_id)] = [flops // 1e9, miou_res] + + + def visualize_stitch_pos(stitch_id): + if stitch_id == 13: + # 13 is equivalent to 0 + stitch_id = 0 + + names = [f'ID {key}' for key in flops_res.keys()] + + fig = px.scatter(x=flops_res.values(), y=eval_res.values(), hover_name=names) + fig.update_layout( + title=f"SN-Netv2 - Stitch ID - {stitch_id}", + title_x=0.5, + xaxis_title="GFLOPs", + yaxis_title="mIoU", + font=dict( + family="Courier New, monospace", + size=18, + color="RebeccaPurple" + ), + legend=dict( + yanchor="bottom", + y=0.99, + xanchor="left", + x=0.01), + ) + # continent, DarkSlateGrey + fig.update_traces(marker=dict(size=10, + line=dict(width=2)), + selector=dict(mode='markers')) + + fig.add_scatter(x=[flops_res[stitch_id]], y=[eval_res[stitch_id]], mode='markers', marker=dict(size=15), name='Current Stitch') + return fig + + + def segment_video(video, stitch_id): + + if stitch_id == 13: + # 13 is equivalent to 0 + stitch_id = 0 + + model.backbone.reset_stitch_id(stitch_id) + output_video_path = './temp_video.avi' + cap = cv2.VideoCapture(video) + assert (cap.isOpened()) + input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + input_fps = cap.get(cv2.CAP_PROP_FPS) + + + fourcc = cv2.VideoWriter_fourcc(*args.output_fourcc) + output_fps = args.output_fps if args.output_fps > 0 else input_fps + output_height = args.output_height if args.output_height > 0 else int( + input_height) + output_width = args.output_width if args.output_width > 0 else int( + input_width) + writer = cv2.VideoWriter(output_video_path, fourcc, output_fps, + (output_width, output_height), True) + + try: + while True: + start_time = time.time() + flag, frame = cap.read() + if not flag: + break + + # test a single image + result = inference_model(model, frame) + + # blend raw image and prediction + draw_img = show_result_pyplot(model, frame, result, + show=False, + with_labels=False, + ) + + if draw_img.shape[0] != output_height or draw_img.shape[ + 1] != output_width: + draw_img = cv2.resize(draw_img, + (output_width, output_height)) + writer.write(draw_img) + finally: + if writer: + writer.release() + cap.release() + + fig = visualize_stitch_pos(stitch_id) + + return output_video_path, fig + + def segment_image(image, stitch_id): + if stitch_id == 13: + # 13 is equivalent to 0 + stitch_id = 0 + + model.backbone.reset_stitch_id(stitch_id) + result = inference_model(model, image) + draw_img = show_result_pyplot(model, image, result, + show=False, + with_labels=True, + ) + fig = visualize_stitch_pos(stitch_id) + return draw_img, fig + + + + with gr.Blocks() as image_demo: + with gr.Row(): + with gr.Column(): + image_input = gr.Image(label='Input Image') + stitch_slider = gr.Slider(minimum=0, maximum=134, step=1, label="Stitch ID") + with gr.Row(): + clear_button = gr.ClearButton() + submit_button = gr.Button() + + with gr.Column(): + image_output = gr.Image(label='Segmentation Results') + stitch_plot = gr.Plot(label='Stitch Position') + + submit_button.click( + fn=segment_image, + inputs=[image_input, stitch_slider], + outputs=[image_output, stitch_plot], + ) + + stitch_slider.change( + fn=visualize_stitch_pos, + inputs=[stitch_slider], + outputs=[stitch_plot], + show_progress=False + ) + + clear_button.click( + lambda: [None, 0, None, None], + outputs=[image_input, stitch_slider, image_output, stitch_plot], + ) + + gr.Examples( + [ + ['./demo_1.jpg', 0], + ['./demo_2.jpg', 1], + ['./demo_3.jpg', 93], + ['./demo_4.jpg', 3], + ], + inputs=[ + image_input, + stitch_slider + ], + outputs=[ + image_input, + stitch_plot + ], + ) + + with gr.Blocks() as demo: + with gr.Column(): + gr.HTML(""" +

Stitched ViTs are Flexible Vision Backbones

+
+

This is the classification demo page of SN-Netv2, an flexible vision backbone that allows for 100+ runtime speed and performance trade-offs. You can also run this gradio demo on your local GPUs at https://github.com/ziplab/SN-Netv2, Paper link: https://arxiv.org/abs/2307.00154.

+ """) + tabbed_page = gr.TabbedInterface([image_demo,], ['Image']) + + + demo.launch(allowed_paths=['./']) + + +if __name__ == '__main__': + main() diff --git a/configs/_base_/datasets/ade20k.py b/configs/_base_/datasets/ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..48340d11eeaf44d1e8e973d5b0ce6108e42d3215 --- /dev/null +++ b/configs/_base_/datasets/ade20k.py @@ -0,0 +1,68 @@ +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/ade20k_640x640.py b/configs/_base_/datasets/ade20k_640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..c1f642da7fd55077471841f04b177dd4ebe3d150 --- /dev/null +++ b/configs/_base_/datasets/ade20k_640x640.py @@ -0,0 +1,68 @@ +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +crop_size = (640, 640) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(2560, 640), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2560, 640), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/bdd100k.py b/configs/_base_/datasets/bdd100k.py new file mode 100644 index 0000000000000000000000000000000000000000..24cec69bfeb2211ca387c774e99ee4b83511991f --- /dev/null +++ b/configs/_base_/datasets/bdd100k.py @@ -0,0 +1,70 @@ +# dataset settings +dataset_type = 'BDD100KDataset' +data_root = 'data/bdd100k/' + +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=2, + num_workers=2, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/10k/train', + seg_map_path='labels/sem_seg/masks/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/10k/val', + seg_map_path='labels/sem_seg/masks/val'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/chase_db1.py b/configs/_base_/datasets/chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..ed47c2dbe5edd66ce1e853a9f3a6097683315ddf --- /dev/null +++ b/configs/_base_/datasets/chase_db1.py @@ -0,0 +1,75 @@ +# dataset settings +dataset_type = 'ChaseDB1Dataset' +data_root = 'data/CHASE_DB1' +img_scale = (960, 999) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=img_scale, + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] + +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', + seg_map_path='annotations/training'), + pipeline=train_pipeline))) + +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..b63a4cdfe78e8c75c32723ff419c62add54a0640 --- /dev/null +++ b/configs/_base_/datasets/cityscapes.py @@ -0,0 +1,67 @@ +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=2, + num_workers=2, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='leftImg8bit/train', seg_map_path='gtFine/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='leftImg8bit/val', seg_map_path='gtFine/val'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes_1024x1024.py b/configs/_base_/datasets/cityscapes_1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..72be307b406cffa52a03916b8cbca73208a120ce --- /dev/null +++ b/configs/_base_/datasets/cityscapes_1024x1024.py @@ -0,0 +1,29 @@ +_base_ = './cityscapes.py' +crop_size = (1024, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes_768x768.py b/configs/_base_/datasets/cityscapes_768x768.py new file mode 100644 index 0000000000000000000000000000000000000000..fcee0143ac75ada8c43b122b0c952df1be455512 --- /dev/null +++ b/configs/_base_/datasets/cityscapes_768x768.py @@ -0,0 +1,29 @@ +_base_ = './cityscapes.py' +crop_size = (768, 768) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2049, 1025), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2049, 1025), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes_769x769.py b/configs/_base_/datasets/cityscapes_769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..ae40ac8c5fb02dcee69d93a63e8adbe998b28b47 --- /dev/null +++ b/configs/_base_/datasets/cityscapes_769x769.py @@ -0,0 +1,29 @@ +_base_ = './cityscapes.py' +crop_size = (769, 769) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2049, 1025), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2049, 1025), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes_832x832.py b/configs/_base_/datasets/cityscapes_832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..0254580357f5e38be1b89ad3812d3dcedf1ccde4 --- /dev/null +++ b/configs/_base_/datasets/cityscapes_832x832.py @@ -0,0 +1,29 @@ +_base_ = './cityscapes.py' +crop_size = (832, 832) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/coco-stuff10k.py b/configs/_base_/datasets/coco-stuff10k.py new file mode 100644 index 0000000000000000000000000000000000000000..5d6bb12b97549c4ebf1ffd07863282d6705df794 --- /dev/null +++ b/configs/_base_/datasets/coco-stuff10k.py @@ -0,0 +1,69 @@ +# dataset settings +dataset_type = 'COCOStuffDataset' +data_root = 'data/coco_stuff10k' +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + reduce_zero_label=True, + data_prefix=dict( + img_path='images/train2014', seg_map_path='annotations/train2014'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + reduce_zero_label=True, + data_prefix=dict( + img_path='images/test2014', seg_map_path='annotations/test2014'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/coco-stuff164k.py b/configs/_base_/datasets/coco-stuff164k.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b9d90117a173a5c4800f23eb6a9cb57c263063 --- /dev/null +++ b/configs/_base_/datasets/coco-stuff164k.py @@ -0,0 +1,67 @@ +# dataset settings +dataset_type = 'COCOStuffDataset' +data_root = 'data/coco_stuff164k' +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/train2017', seg_map_path='annotations/train2017'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/val2017', seg_map_path='annotations/val2017'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/drive.py b/configs/_base_/datasets/drive.py new file mode 100644 index 0000000000000000000000000000000000000000..6a3dd82c643330a8a00890efbd874e8abb1b2551 --- /dev/null +++ b/configs/_base_/datasets/drive.py @@ -0,0 +1,73 @@ +# dataset settings +dataset_type = 'DRIVEDataset' +data_root = 'data/DRIVE' +img_scale = (584, 565) +crop_size = (64, 64) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=img_scale, + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', + seg_map_path='annotations/training'), + pipeline=train_pipeline))) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/hrf.py b/configs/_base_/datasets/hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..353d070472e5272ff7611265a43846abc1e4014f --- /dev/null +++ b/configs/_base_/datasets/hrf.py @@ -0,0 +1,73 @@ +# dataset settings +dataset_type = 'HRFDataset' +data_root = 'data/HRF' +img_scale = (2336, 3504) +crop_size = (256, 256) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=img_scale, + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', + seg_map_path='annotations/training'), + pipeline=train_pipeline))) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/isaid.py b/configs/_base_/datasets/isaid.py new file mode 100644 index 0000000000000000000000000000000000000000..5cd4309f6df6309267e1684abec5c338e24d36b1 --- /dev/null +++ b/configs/_base_/datasets/isaid.py @@ -0,0 +1,73 @@ +# dataset settings +dataset_type = 'iSAIDDataset' +data_root = 'data/iSAID' +""" +This crop_size setting is followed by the implementation of +`PointFlow: Flowing Semantics Through Points for Aerial Image +Segmentation `_. +""" + +crop_size = (896, 896) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(896, 896), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(896, 896), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/train', seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/levir_256x256.py b/configs/_base_/datasets/levir_256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..a2a69aa9e9c0c35601d2b9452f8256372a1b128c --- /dev/null +++ b/configs/_base_/datasets/levir_256x256.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'LEVIRCDDataset' +data_root = r'data/LEVIRCD' + +albu_train_transforms = [ + dict(type='RandomBrightnessContrast', p=0.2), + dict(type='HorizontalFlip', p=0.5), + dict(type='VerticalFlip', p=0.5) +] + +train_pipeline = [ + dict(type='LoadMultipleRSImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Albu', transforms=albu_train_transforms), + dict(type='ConcatCDInput'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadMultipleRSImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='ConcatCDInput'), + dict(type='PackSegInputs') +] + +tta_pipeline = [ + dict(type='LoadMultipleRSImageFromFile'), + dict( + type='TestTimeAug', + transforms=[[dict(type='LoadAnnotations')], + [dict(type='ConcatCDInput')], + [dict(type='PackSegInputs')]]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='train/A', + img_path2='train/B', + seg_map_path='train/label'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='test/A', img_path2='test/B', seg_map_path='test/label'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/loveda.py b/configs/_base_/datasets/loveda.py new file mode 100644 index 0000000000000000000000000000000000000000..b93bc74af1b4536f35c9f9caeb4adbd009795996 --- /dev/null +++ b/configs/_base_/datasets/loveda.py @@ -0,0 +1,66 @@ +# dataset settings +dataset_type = 'LoveDADataset' +data_root = 'data/loveDA' +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(1024, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/train', seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/mapillary_v1.py b/configs/_base_/datasets/mapillary_v1.py new file mode 100644 index 0000000000000000000000000000000000000000..611aa4741b5afceb5998dffcf75933f72403f805 --- /dev/null +++ b/configs/_base_/datasets/mapillary_v1.py @@ -0,0 +1,68 @@ +# dataset settings +dataset_type = 'MapillaryDataset_v1' +data_root = 'data/mapillary/' +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=2, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='training/images', seg_map_path='training/v1.2/labels'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='validation/images', + seg_map_path='validation/v1.2/labels'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/mapillary_v1_65.py b/configs/_base_/datasets/mapillary_v1_65.py new file mode 100644 index 0000000000000000000000000000000000000000..f594f373337e403f9d01743103bcd8167781c940 --- /dev/null +++ b/configs/_base_/datasets/mapillary_v1_65.py @@ -0,0 +1,37 @@ +# dataset settings +_base_ = './mapillary_v1.py' +metainfo = dict( + classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier', + 'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', + 'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane', + 'Sidewalk', 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist', + 'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk', + 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', + 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', 'Bike Rack', + 'Billboard', 'Catch Basin', 'CCTV Camera', 'Fire Hydrant', + 'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole', + 'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole', + 'Traffic Light', 'Traffic Sign (Back)', 'Traffic Sign (Front)', + 'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', + 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', + 'Wheeled Slow', 'Car Mount', 'Ego Vehicle'), + palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], + [180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255], + [140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232], + [150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60], + [255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128], + [255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180], + [190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30], + [255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220], + [220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40], + [33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150], + [210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80], + [250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20], + [119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142], + [0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], + [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10]]) + +train_dataloader = dict(dataset=dict(metainfo=metainfo)) +val_dataloader = dict(dataset=dict(metainfo=metainfo)) +test_dataloader = val_dataloader diff --git a/configs/_base_/datasets/mapillary_v2.py b/configs/_base_/datasets/mapillary_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..7cb7a958e516caec34f8a1f82af8d05a0a84472f --- /dev/null +++ b/configs/_base_/datasets/mapillary_v2.py @@ -0,0 +1,68 @@ +# dataset settings +dataset_type = 'MapillaryDataset_v2' +data_root = 'data/mapillary/' +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=2, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='training/images', seg_map_path='training/v2.0/labels'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='validation/images', + seg_map_path='validation/v2.0/labels'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/nyu.py b/configs/_base_/datasets/nyu.py new file mode 100644 index 0000000000000000000000000000000000000000..74d57c5fc50b89754424a22d0988e18e2a0596c7 --- /dev/null +++ b/configs/_base_/datasets/nyu.py @@ -0,0 +1,67 @@ +# dataset settings +dataset_type = 'NYUDataset' +data_root = 'data/nyu' + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3), + dict(type='RandomDepthMix', prob=0.25), + dict(type='RandomFlip', prob=0.5), + dict(type='RandomCrop', crop_size=(480, 480)), + dict( + type='Albu', + transforms=[ + dict(type='RandomBrightnessContrast'), + dict(type='RandomGamma'), + dict(type='HueSaturationValue'), + ]), + dict( + type='PackSegInputs', + meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape', + 'pad_shape', 'scale_factor', 'flip', 'flip_direction', + 'category_id')), +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2000, 480), keep_ratio=True), + dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)), + dict( + type='PackSegInputs', + meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape', + 'pad_shape', 'scale_factor', 'flip', 'flip_direction', + 'category_id')) +] + +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/train', depth_map_path='annotations/train'), + pipeline=train_pipeline)) + +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict( + img_path='images/test', depth_map_path='annotations/test'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='DepthMetric', + min_depth_eval=0.001, + max_depth_eval=10.0, + crop_type='nyu_crop') +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/nyu_512x512.py b/configs/_base_/datasets/nyu_512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..88e3878d33429f8be1f67c42ebda5604b58c4bc0 --- /dev/null +++ b/configs/_base_/datasets/nyu_512x512.py @@ -0,0 +1,72 @@ +# dataset settings +dataset_type = 'NYUDataset' +data_root = 'data/nyu' + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3), + dict(type='RandomDepthMix', prob=0.25), + dict(type='RandomFlip', prob=0.5), + dict( + type='RandomResize', + scale=(768, 512), + ratio_range=(0.8, 1.5), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(512, 512)), + dict( + type='Albu', + transforms=[ + dict(type='RandomBrightnessContrast'), + dict(type='RandomGamma'), + dict(type='HueSaturationValue'), + ]), + dict( + type='PackSegInputs', + meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape', + 'pad_shape', 'scale_factor', 'flip', 'flip_direction', + 'category_id')), +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)), + dict( + type='PackSegInputs', + meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape', + 'pad_shape', 'scale_factor', 'flip', 'flip_direction', + 'category_id')) +] + +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/train', depth_map_path='annotations/train'), + pipeline=train_pipeline)) + +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict( + img_path='images/test', depth_map_path='annotations/test'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='DepthMetric', + min_depth_eval=0.001, + max_depth_eval=10.0, + crop_type='nyu_crop') +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/pascal_context.py b/configs/_base_/datasets/pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..dfb1f858b352f70b2831e1885d8ab80f2b7b497a --- /dev/null +++ b/configs/_base_/datasets/pascal_context.py @@ -0,0 +1,56 @@ +# dataset settings +dataset_type = 'PascalContextDataset' +data_root = 'data/VOCdevkit/VOC2010/' + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=img_scale, + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClassContext'), + ann_file='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClassContext'), + ann_file='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/pascal_context_59.py b/configs/_base_/datasets/pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..7f31043ed0e7952ce4c8d43825ba5cbecb7ea500 --- /dev/null +++ b/configs/_base_/datasets/pascal_context_59.py @@ -0,0 +1,72 @@ +# dataset settings +dataset_type = 'PascalContextDataset59' +data_root = 'data/VOCdevkit/VOC2010/' + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=img_scale, + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClassContext'), + ann_file='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClassContext'), + ann_file='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/pascal_voc12.py b/configs/_base_/datasets/pascal_voc12.py new file mode 100644 index 0000000000000000000000000000000000000000..5235ca9cfe27f3fb8115acf4238c2b2c50621dc6 --- /dev/null +++ b/configs/_base_/datasets/pascal_voc12.py @@ -0,0 +1,69 @@ +# dataset settings +dataset_type = 'PascalVOCDataset' +data_root = 'data/VOCdevkit/VOC2012' +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClass'), + ann_file='ImageSets/Segmentation/train.txt', + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClass'), + ann_file='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/pascal_voc12_aug.py b/configs/_base_/datasets/pascal_voc12_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..69c36548805321b62ffcb34758fa0f0976ec6817 --- /dev/null +++ b/configs/_base_/datasets/pascal_voc12_aug.py @@ -0,0 +1,81 @@ +# dataset settings +dataset_type = 'PascalVOCDataset' +data_root = 'data/VOCdevkit/VOC2012' +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Pad', size=crop_size), + dict(type='PackSegInputs') +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +dataset_train = dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='JPEGImages', seg_map_path='SegmentationClass'), + ann_file='ImageSets/Segmentation/train.txt', + pipeline=train_pipeline) + +dataset_aug = dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClassAug'), + ann_file='ImageSets/Segmentation/aug.txt', + pipeline=train_pipeline) + +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict(type='ConcatDataset', datasets=[dataset_train, dataset_aug])) + +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='JPEGImages', seg_map_path='SegmentationClass'), + ann_file='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/potsdam.py b/configs/_base_/datasets/potsdam.py new file mode 100644 index 0000000000000000000000000000000000000000..95f6039351a9e1640d06a7576f47617354224773 --- /dev/null +++ b/configs/_base_/datasets/potsdam.py @@ -0,0 +1,66 @@ +# dataset settings +dataset_type = 'PotsdamDataset' +data_root = 'data/potsdam' +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(512, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(512, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/train', seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/refuge.py b/configs/_base_/datasets/refuge.py new file mode 100644 index 0000000000000000000000000000000000000000..79bb4d4e945f67286e31f9d6851e8ded0854d090 --- /dev/null +++ b/configs/_base_/datasets/refuge.py @@ -0,0 +1,90 @@ +# dataset settings +dataset_type = 'REFUGEDataset' +data_root = 'data/REFUGE' +train_img_scale = (2056, 2124) +val_img_scale = (1634, 1634) +test_img_scale = (1634, 1634) +crop_size = (512, 512) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=False), + dict( + type='RandomResize', + scale=train_img_scale, + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=val_img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=False), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=test_img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=False), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=val_pipeline)) +test_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/test', seg_map_path='annotations/test'), + pipeline=val_pipeline)) + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/stare.py b/configs/_base_/datasets/stare.py new file mode 100644 index 0000000000000000000000000000000000000000..b7545dc62379d40fa4dc307f4e6ed0e5501a62a2 --- /dev/null +++ b/configs/_base_/datasets/stare.py @@ -0,0 +1,73 @@ +# dataset settings +dataset_type = 'STAREDataset' +data_root = 'data/STARE' +img_scale = (605, 700) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=img_scale, + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', + seg_map_path='annotations/training'), + pipeline=train_pipeline))) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/synapse.py b/configs/_base_/datasets/synapse.py new file mode 100644 index 0000000000000000000000000000000000000000..86852918cd76916913b8c5ef5591e30583bf4a42 --- /dev/null +++ b/configs/_base_/datasets/synapse.py @@ -0,0 +1,41 @@ +dataset_type = 'SynapseDataset' +data_root = 'data/synapse/' +img_scale = (224, 224) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='RandomRotFlip', rotate_prob=0.5, flip_prob=0.5, degree=20), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict( + batch_size=6, + num_workers=2, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/train', seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/vaihingen.py b/configs/_base_/datasets/vaihingen.py new file mode 100644 index 0000000000000000000000000000000000000000..6c78994fe7d84876810acac14ef76adef6a912c4 --- /dev/null +++ b/configs/_base_/datasets/vaihingen.py @@ -0,0 +1,66 @@ +# dataset settings +dataset_type = 'ISPRSDataset' +data_root = 'data/vaihingen' +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(512, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(512, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/train', seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..272b4d2467992b0f584a3b9d825061c0db474842 --- /dev/null +++ b/configs/_base_/default_runtime.py @@ -0,0 +1,15 @@ +default_scope = 'mmseg' +env_cfg = dict( + cudnn_benchmark=True, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer') +log_processor = dict(by_epoch=False) +log_level = 'INFO' +load_from = None +resume = False + +tta_model = dict(type='SegTTAModel') diff --git a/configs/_base_/models/ann_r50-d8.py b/configs/_base_/models/ann_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..a1ef956948627b7ca674abaf3a2369e27bc70329 --- /dev/null +++ b/configs/_base_/models/ann_r50-d8.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ANNHead', + in_channels=[1024, 2048], + in_index=[2, 3], + channels=512, + project_channels=256, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/apcnet_r50-d8.py b/configs/_base_/models/apcnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..63269f9987115d4709623c9dcb2ee3e74bb2bee8 --- /dev/null +++ b/configs/_base_/models/apcnet_r50-d8.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='APCHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/bisenetv1_r18-d32.py b/configs/_base_/models/bisenetv1_r18-d32.py new file mode 100644 index 0000000000000000000000000000000000000000..2aecb9e2efd788577ed9634eec9659a91381ba1e --- /dev/null +++ b/configs/_base_/models/bisenetv1_r18-d32.py @@ -0,0 +1,76 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='BiSeNetV1', + in_channels=3, + context_channels=(128, 256, 512), + spatial_channels=(64, 64, 64, 128), + out_indices=(0, 1, 2), + out_channels=256, + backbone_cfg=dict( + type='ResNet', + in_channels=3, + depth=18, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='FCNHead', + in_channels=256, + in_index=0, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/bisenetv2.py b/configs/_base_/models/bisenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..ae845129db9f25573653659ef564ddadc790377f --- /dev/null +++ b/configs/_base_/models/bisenetv2.py @@ -0,0 +1,88 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='BiSeNetV2', + detail_channels=(64, 64, 128), + semantic_channels=(16, 32, 64, 128), + semantic_expansion_ratio=6, + bga_channels=128, + out_indices=(0, 1, 2, 3, 4), + init_cfg=None, + align_corners=False), + decode_head=dict( + type='FCNHead', + in_channels=128, + in_index=0, + channels=1024, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=16, + channels=16, + num_convs=2, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=32, + channels=64, + num_convs=2, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=64, + channels=256, + num_convs=2, + num_classes=19, + in_index=3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=128, + channels=1024, + num_convs=2, + num_classes=19, + in_index=4, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/ccnet_r50-d8.py b/configs/_base_/models/ccnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..575d8eb4590393ffb024fe0fecace32ac96f4b2b --- /dev/null +++ b/configs/_base_/models/ccnet_r50-d8.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='CCHead', + in_channels=2048, + in_index=3, + channels=512, + recurrence=2, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/cgnet.py b/configs/_base_/models/cgnet.py new file mode 100644 index 0000000000000000000000000000000000000000..93c6f5b6d1a5f1d325189b78f5a95d36434246a1 --- /dev/null +++ b/configs/_base_/models/cgnet.py @@ -0,0 +1,43 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[72.39239876, 82.90891754, 73.15835921], + std=[1, 1, 1], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='CGNet', + norm_cfg=norm_cfg, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16)), + decode_head=dict( + type='FCNHead', + in_channels=256, + in_index=2, + channels=256, + num_convs=0, + concat_input=False, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0, + class_weight=[ + 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, + 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, + 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, + 10.396974, 10.055647 + ])), + # model training and testing settings + train_cfg=dict(sampler=None), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/danet_r50-d8.py b/configs/_base_/models/danet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..8163b3d69116eb069829d66694f0ebee37831532 --- /dev/null +++ b/configs/_base_/models/danet_r50-d8.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/deeplabv3_r50-d8.py b/configs/_base_/models/deeplabv3_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..22efe9a6cad1713ff4eb4071252e21e782e3ac85 --- /dev/null +++ b/configs/_base_/models/deeplabv3_r50-d8.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/deeplabv3_unet_s5-d16.py b/configs/_base_/models/deeplabv3_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..92df52c35d402b9f2446cb82f5c49e53b4a48107 --- /dev/null +++ b/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -0,0 +1,58 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='ASPPHead', + in_channels=64, + in_index=4, + channels=16, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/configs/_base_/models/deeplabv3plus_r50-d8.py b/configs/_base_/models/deeplabv3plus_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..74dbed5593d5b4ce246eebb03b6ebb2138fdc59b --- /dev/null +++ b/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DepthwiseSeparableASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/dmnet_r50-d8.py b/configs/_base_/models/dmnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..f66a042f1de47434a53a8ecc9557a63df015b234 --- /dev/null +++ b/configs/_base_/models/dmnet_r50-d8.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DMHead', + in_channels=2048, + in_index=3, + channels=512, + filter_sizes=(1, 3, 5, 7), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/dnl_r50-d8.py b/configs/_base_/models/dnl_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..ee64056c0ee861050d90ede3c0e201771dd62d48 --- /dev/null +++ b/configs/_base_/models/dnl_r50-d8.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DNLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/dpt_vit-b16.py b/configs/_base_/models/dpt_vit-b16.py new file mode 100644 index 0000000000000000000000000000000000000000..90845b37b5a312448d6a910d29a8d013cbf4b88d --- /dev/null +++ b/configs/_base_/models/dpt_vit-b16.py @@ -0,0 +1,39 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa + backbone=dict( + type='VisionTransformer', + img_size=224, + embed_dims=768, + num_layers=12, + num_heads=12, + out_indices=(2, 5, 8, 11), + final_norm=False, + with_cls_token=True, + output_cls_token=True), + decode_head=dict( + type='DPTHead', + in_channels=(768, 768, 768, 768), + channels=256, + embed_dims=768, + post_process_channels=[96, 192, 384, 768], + num_classes=150, + readout_type='project', + input_transform='multiple_select', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=None, + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) # yapf: disable diff --git a/configs/_base_/models/emanet_r50-d8.py b/configs/_base_/models/emanet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..c55af4f11d1d7dc2fe5b046904bdd9e247169dc3 --- /dev/null +++ b/configs/_base_/models/emanet_r50-d8.py @@ -0,0 +1,55 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EMAHead', + in_channels=2048, + in_index=3, + channels=256, + ema_channels=512, + num_bases=64, + num_stages=3, + momentum=0.1, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/encnet_r50-d8.py b/configs/_base_/models/encnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..63cec9e3cbe9c1a221e788b7bac1eba589a69c1d --- /dev/null +++ b/configs/_base_/models/encnet_r50-d8.py @@ -0,0 +1,56 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(1, 2, 3), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/erfnet_fcn.py b/configs/_base_/models/erfnet_fcn.py new file mode 100644 index 0000000000000000000000000000000000000000..4d68a7229687b36bcbdfc87fec8f11d8e2ee89ea --- /dev/null +++ b/configs/_base_/models/erfnet_fcn.py @@ -0,0 +1,40 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='ERFNet', + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + init_cfg=None), + decode_head=dict( + type='FCNHead', + in_channels=16, + channels=128, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fast_scnn.py b/configs/_base_/models/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..11127b0115890bfee2ae1b28e58792df9dd9719c --- /dev/null +++ b/configs/_base_/models/fast_scnn.py @@ -0,0 +1,65 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='FastSCNN', + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='DepthwiseSeparableFCNHead', + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=32, + num_convs=1, + num_classes=19, + in_index=-2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=64, + channels=32, + num_convs=1, + num_classes=19, + in_index=-3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py b/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py new file mode 100644 index 0000000000000000000000000000000000000000..a200b4bac6420f77c6c31a47a40a4301b2bf2b1e --- /dev/null +++ b/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py @@ -0,0 +1,61 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + dilations=(1, 1, 2, 4), + strides=(1, 2, 2, 2), + out_indices=(1, 2, 3), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='JPU', + in_channels=(512, 1024, 2048), + mid_channels=512, + start_level=0, + end_level=-1, + dilations=(1, 2, 4, 8), + align_corners=False, + norm_cfg=norm_cfg), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=2, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=1, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fcn_hr18.py b/configs/_base_/models/fcn_hr18.py new file mode 100644 index 0000000000000000000000000000000000000000..01a447aabe67e1226cd60424f4808ffeff15a0a6 --- /dev/null +++ b/configs/_base_/models/fcn_hr18.py @@ -0,0 +1,60 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + channels=sum([18, 36, 72, 144]), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fcn_r50-d8.py b/configs/_base_/models/fcn_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..9a76a6c3fbe2d645e607abe914c47402234ab383 --- /dev/null +++ b/configs/_base_/models/fcn_r50-d8.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fcn_unet_s5-d16.py b/configs/_base_/models/fcn_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..9f880d21e205a56fb4e2504ec4eca2617284b1bd --- /dev/null +++ b/configs/_base_/models/fcn_unet_s5-d16.py @@ -0,0 +1,59 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='FCNHead', + in_channels=64, + in_index=4, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/configs/_base_/models/fpn_poolformer_s12.py b/configs/_base_/models/fpn_poolformer_s12.py new file mode 100644 index 0000000000000000000000000000000000000000..086c804837aa48fd871a2c0ba24d928ea62fd767 --- /dev/null +++ b/configs/_base_/models/fpn_poolformer_s12.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa +# TODO: delete custom_imports after mmpretrain supports auto import +# please install mmpretrain >= 1.0.0rc7 +# import mmpretrain.models to trigger register_module in mmpretrain +custom_imports = dict( + imports=['mmpretrain.models'], allow_failed_imports=False) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='mmpretrain.PoolFormer', + arch='s12', + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'), + in_patch_size=7, + in_stride=4, + in_pad=2, + down_patch_size=3, + down_stride=2, + down_pad=1, + drop_rate=0., + drop_path_rate=0., + out_indices=(0, 2, 4, 6), + frozen_stages=0, + ), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fpn_r50.py b/configs/_base_/models/fpn_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..3baa0970fbc65866ff0edfc4a099d725483fb883 --- /dev/null +++ b/configs/_base_/models/fpn_r50.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/gcnet_r50-d8.py b/configs/_base_/models/gcnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..8238d4b5885f9562de6d7d31fc73e2dea4adb0c7 --- /dev/null +++ b/configs/_base_/models/gcnet_r50-d8.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='GCHead', + in_channels=2048, + in_index=3, + channels=512, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/icnet_r50-d8.py b/configs/_base_/models/icnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..4377053bdaed3271893cdeef82b5c929f74419c2 --- /dev/null +++ b/configs/_base_/models/icnet_r50-d8.py @@ -0,0 +1,82 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='ICNet', + backbone_cfg=dict( + type='ResNetV1c', + in_channels=3, + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + in_channels=3, + layer_channels=(512, 2048), + light_branch_middle_channels=32, + psp_out_channels=512, + out_channels=(64, 256, 256), + norm_cfg=norm_cfg, + align_corners=False, + ), + neck=dict( + type='ICNeck', + in_channels=(64, 256, 256), + out_channels=128, + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='FCNHead', + in_channels=128, + channels=128, + num_convs=1, + in_index=2, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=128, + num_convs=1, + num_classes=19, + in_index=0, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=128, + channels=128, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/isanet_r50-d8.py b/configs/_base_/models/isanet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..e028ba85b4308e8bbb7a3cc7b8f9f79ef328c4fa --- /dev/null +++ b/configs/_base_/models/isanet_r50-d8.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ISAHead', + in_channels=2048, + in_index=3, + channels=512, + isa_channels=256, + down_factor=(8, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/lraspp_m-v3-d8.py b/configs/_base_/models/lraspp_m-v3-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..acf70e7107b9bd2200a3e3542a0d6e31bdaf5b47 --- /dev/null +++ b/configs/_base_/models/lraspp_m-v3-d8.py @@ -0,0 +1,33 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='MobileNetV3', + arch='large', + out_indices=(1, 3, 16), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 24, 960), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/nonlocal_r50-d8.py b/configs/_base_/models/nonlocal_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..7d73a8486011992d936637cb0065dd38a43bf52c --- /dev/null +++ b/configs/_base_/models/nonlocal_r50-d8.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='NLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/ocrnet_hr18.py b/configs/_base_/models/ocrnet_hr18.py new file mode 100644 index 0000000000000000000000000000000000000000..6c7fcfe3d61bb0d4f16ae51ec807697afca7e04e --- /dev/null +++ b/configs/_base_/models/ocrnet_hr18.py @@ -0,0 +1,76 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='CascadeEncoderDecoder', + data_preprocessor=data_preprocessor, + num_stages=2, + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/ocrnet_r50-d8.py b/configs/_base_/models/ocrnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..0a2588f9838857480d1aace3cd80e4acba02659f --- /dev/null +++ b/configs/_base_/models/ocrnet_r50-d8.py @@ -0,0 +1,55 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='CascadeEncoderDecoder', + data_preprocessor=data_preprocessor, + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=[ + dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=2048, + in_index=3, + channels=512, + ocr_channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/pointrend_r50.py b/configs/_base_/models/pointrend_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..8a27e856f5c10a02972bcf10b4ea8e4e64b2e9cf --- /dev/null +++ b/configs/_base_/models/pointrend_r50.py @@ -0,0 +1,64 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='CascadeEncoderDecoder', + data_preprocessor=data_preprocessor, + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict( + num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), + test_cfg=dict( + mode='whole', + subdivision_steps=2, + subdivision_num_points=8196, + scale_factor=2)) diff --git a/configs/_base_/models/psanet_r50-d8.py b/configs/_base_/models/psanet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..40fd5a91374463982458fe3bad73c3512922328e --- /dev/null +++ b/configs/_base_/models/psanet_r50-d8.py @@ -0,0 +1,57 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSAHead', + in_channels=2048, + in_index=3, + channels=512, + mask_size=(97, 97), + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/pspnet_r50-d8.py b/configs/_base_/models/pspnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..c257b8ba276f35b66db3fec96e70ff3ff930ce44 --- /dev/null +++ b/configs/_base_/models/pspnet_r50-d8.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/pspnet_unet_s5-d16.py b/configs/_base_/models/pspnet_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..834a22ad00c6314cfcfaafa9f339ef56f318a1ca --- /dev/null +++ b/configs/_base_/models/pspnet_unet_s5-d16.py @@ -0,0 +1,58 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='PSPHead', + in_channels=64, + in_index=4, + channels=16, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/configs/_base_/models/san_vit-b16.py b/configs/_base_/models/san_vit-b16.py new file mode 100644 index 0000000000000000000000000000000000000000..96ac41b8dade5663b19dbc4a0fc67df5e0250c05 --- /dev/null +++ b/configs/_base_/models/san_vit-b16.py @@ -0,0 +1,137 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) + +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[122.7709, 116.7460, 104.0937], + std=[68.5005, 66.6322, 70.3232], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size_divisor=640, + test_cfg=dict(size_divisor=32)) + +num_classes = 171 +model = dict( + type='MultimodalEncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='pretrain/clip_vit_base_patch16_224.pth', + asymetric_input=True, + encoder_resolution=0.5, + image_encoder=dict( + type='VisionTransformer', + img_size=(224, 224), + patch_size=16, + patch_pad=0, + in_channels=3, + embed_dims=768, + num_layers=9, + num_heads=12, + mlp_ratio=4, + out_origin=True, + out_indices=(2, 5, 8), + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.0, + with_cls_token=True, + output_cls_token=True, + patch_bias=False, + pre_norm=True, + norm_cfg=dict(type='LN', eps=1e-5), + act_cfg=dict(type='QuickGELU'), + norm_eval=False, + interpolate_mode='bicubic', + frozen_exclude=['pos_embed']), + text_encoder=dict( + type='CLIPTextEncoder', + dataset_name=None, + templates='vild', + embed_dims=512, + num_layers=12, + num_heads=8, + mlp_ratio=4, + output_dims=512, + cache_feature=True, + cat_bg=True, + norm_cfg=dict(type='LN', eps=1e-5) + ), + decode_head=dict( + type='SideAdapterCLIPHead', + num_classes=num_classes, + deep_supervision_idxs=[7], + san_cfg=dict( + in_channels=3, + clip_channels=768, + embed_dims=240, + patch_size=16, + patch_bias=True, + num_queries=100, + cfg_encoder=dict( + num_encode_layer=8, + num_heads=6, + mlp_ratio=4 + ), + fusion_index=[0, 1, 2, 3], + cfg_decoder=dict( + num_heads=12, + num_layers=1, + embed_channels=256, + mlp_channels=256, + num_mlp=3, + rescale=True), + norm_cfg=dict(type='LN', eps=1e-6), + ), + maskgen_cfg=dict( + sos_token_format='cls_token', + sos_token_num=100, + cross_attn=False, + num_layers=3, + embed_dims=768, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + out_dims=512, + final_norm=True, + act_cfg=dict(type='QuickGELU'), + norm_cfg=dict(type='LN', eps=1e-5), + frozen_exclude=[] + ), + align_corners=False, + train_cfg=dict( + num_points=12544, + oversample_ratio=3.0, + importance_sample_ratio=0.75, + assigner=dict( + type='HungarianAssigner', + match_costs=[ + dict(type='ClassificationCost', weight=2.0), + dict( + type='CrossEntropyLossCost', + weight=5.0, + use_sigmoid=True), + dict( + type='DiceCost', + weight=5.0, + pred_act=True, + eps=1.0) + ])), + loss_decode=[dict(type='CrossEntropyLoss', + loss_name='loss_cls_ce', + loss_weight=2.0, + class_weight=[1.0] * num_classes + [0.1]), + dict(type='CrossEntropyLoss', + use_sigmoid=True, + loss_name='loss_mask_ce', + loss_weight=5.0), + dict(type='DiceLoss', + ignore_index=None, + naive_dice=True, + eps=1, + loss_name='loss_mask_dice', + loss_weight=5.0) + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) # yapf: disable diff --git a/configs/_base_/models/segformer_mit-b0.py b/configs/_base_/models/segformer_mit-b0.py new file mode 100644 index 0000000000000000000000000000000000000000..46841adc076aa34720b575d170d48563ae0b4511 --- /dev/null +++ b/configs/_base_/models/segformer_mit-b0.py @@ -0,0 +1,42 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='MixVisionTransformer', + in_channels=3, + embed_dims=32, + num_stages=4, + num_layers=[2, 2, 2, 2], + num_heads=[1, 2, 5, 8], + patch_sizes=[7, 3, 3, 3], + sr_ratios=[8, 4, 2, 1], + out_indices=(0, 1, 2, 3), + mlp_ratio=4, + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1), + decode_head=dict( + type='SegformerHead', + in_channels=[32, 64, 160, 256], + in_index=[0, 1, 2, 3], + channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/segmenter_vit-b16_mask.py b/configs/_base_/models/segmenter_vit-b16_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..8f3dad1536daee6e0cc8da03a0726396c916a4d2 --- /dev/null +++ b/configs/_base_/models/segmenter_vit-b16_mask.py @@ -0,0 +1,44 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa +# model settings +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[127.5, 127.5, 127.5], + std=[127.5, 127.5, 127.5], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=checkpoint, + backbone=dict( + type='VisionTransformer', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + drop_path_rate=0.1, + attn_drop_rate=0.0, + drop_rate=0.0, + final_norm=True, + norm_cfg=backbone_norm_cfg, + with_cls_token=True, + interpolate_mode='bicubic', + ), + decode_head=dict( + type='SegmenterMaskTransformerHead', + in_channels=768, + channels=768, + num_classes=150, + num_layers=2, + num_heads=12, + embed_dims=768, + dropout_ratio=0.0, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)), +) diff --git a/configs/_base_/models/setr_mla.py b/configs/_base_/models/setr_mla.py new file mode 100644 index 0000000000000000000000000000000000000000..dedf169cacfba5f11bd8cc885bb0e080d2953647 --- /dev/null +++ b/configs/_base_/models/setr_mla.py @@ -0,0 +1,103 @@ +# model settings +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', + backbone=dict( + type='VisionTransformer', + img_size=(768, 768), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + out_indices=(5, 11, 17, 23), + drop_rate=0.1, + norm_cfg=backbone_norm_cfg, + with_cls_token=False, + interpolate_mode='bilinear', + ), + neck=dict( + type='MLANeck', + in_channels=[1024, 1024, 1024, 1024], + out_channels=256, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + ), + decode_head=dict( + type='SETRMLAHead', + in_channels=(256, 256, 256, 256), + channels=512, + in_index=(0, 1, 2, 3), + dropout_ratio=0, + mla_channels=128, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=0, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=1, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=2, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=3, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/setr_naive.py b/configs/_base_/models/setr_naive.py new file mode 100644 index 0000000000000000000000000000000000000000..ccf5b3398ba02978cb0c870fdb089804db43f80e --- /dev/null +++ b/configs/_base_/models/setr_naive.py @@ -0,0 +1,88 @@ +# model settings +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', + backbone=dict( + type='VisionTransformer', + img_size=(768, 768), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + out_indices=(9, 14, 19, 23), + drop_rate=0.1, + norm_cfg=backbone_norm_cfg, + with_cls_token=True, + interpolate_mode='bilinear', + ), + decode_head=dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=3, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/setr_pup.py b/configs/_base_/models/setr_pup.py new file mode 100644 index 0000000000000000000000000000000000000000..df1bc1890d2c59b344c1460986d08aceb11e0bcb --- /dev/null +++ b/configs/_base_/models/setr_pup.py @@ -0,0 +1,88 @@ +# model settings +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', + backbone=dict( + type='VisionTransformer', + img_size=(768, 768), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + out_indices=(9, 14, 19, 23), + drop_rate=0.1, + norm_cfg=backbone_norm_cfg, + with_cls_token=True, + interpolate_mode='bilinear', + ), + decode_head=dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=3, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=4, + up_scale=2, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/stdc.py b/configs/_base_/models/stdc.py new file mode 100644 index 0000000000000000000000000000000000000000..01bf2b925e550d8645e82f2a41405bc21a989151 --- /dev/null +++ b/configs/_base_/models/stdc.py @@ -0,0 +1,91 @@ +norm_cfg = dict(type='BN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='STDCContextPathNet', + backbone_cfg=dict( + type='STDCNet', + stdc_type='STDCNet1', + in_channels=3, + channels=(32, 64, 256, 512, 1024), + bottleneck_type='cat', + num_convs=4, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + with_final_conv=False), + last_in_channels=(1024, 512), + out_channels=128, + ffm_cfg=dict(in_channels=384, out_channels=256, scale_factor=4)), + decode_head=dict( + type='FCNHead', + in_channels=256, + channels=256, + num_convs=1, + num_classes=19, + in_index=3, + concat_input=False, + dropout_ratio=0.1, + norm_cfg=norm_cfg, + align_corners=True, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='STDCHead', + in_channels=256, + channels=64, + num_convs=1, + num_classes=2, + boundary_threshold=0.1, + in_index=0, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=True, + loss_decode=[ + dict( + type='CrossEntropyLoss', + loss_name='loss_ce', + use_sigmoid=True, + loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=1.0) + ]), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/twins_pcpvt-s_fpn.py b/configs/_base_/models/twins_pcpvt-s_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..059210b5e1976d5b856720a6941071e509c8b223 --- /dev/null +++ b/configs/_base_/models/twins_pcpvt-s_fpn.py @@ -0,0 +1,53 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa + +# model settings +backbone_norm_cfg = dict(type='LN') +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='PCPVT', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + in_channels=3, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + mlp_ratios=[8, 8, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=True, + norm_cfg=backbone_norm_cfg, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + norm_after_stage=False, + drop_rate=0.0, + attn_drop_rate=0., + drop_path_rate=0.2), + neck=dict( + type='FPN', + in_channels=[64, 128, 320, 512], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/twins_pcpvt-s_upernet.py b/configs/_base_/models/twins_pcpvt-s_upernet.py new file mode 100644 index 0000000000000000000000000000000000000000..585a76f8586e90ba7a59ed4313ccd96161d29a02 --- /dev/null +++ b/configs/_base_/models/twins_pcpvt-s_upernet.py @@ -0,0 +1,61 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa + +# model settings +backbone_norm_cfg = dict(type='LN') +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='PCPVT', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + in_channels=3, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + mlp_ratios=[8, 8, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=True, + norm_cfg=backbone_norm_cfg, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + norm_after_stage=False, + drop_rate=0.0, + attn_drop_rate=0., + drop_path_rate=0.2), + decode_head=dict( + type='UPerHead', + in_channels=[64, 128, 320, 512], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=320, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/upernet_beit.py b/configs/_base_/models/upernet_beit.py new file mode 100644 index 0000000000000000000000000000000000000000..691e288dbfdf8a79b3b365b70c086626e2ab82d1 --- /dev/null +++ b/configs/_base_/models/upernet_beit.py @@ -0,0 +1,58 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='BEiT', + img_size=(640, 640), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=(3, 5, 7, 11), + qv_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + init_values=0.1), + neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + type='UPerHead', + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=768, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=768, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/upernet_convnext.py b/configs/_base_/models/upernet_convnext.py new file mode 100644 index 0000000000000000000000000000000000000000..958994c91e65dd39ebebf41a2c7145ae22184d56 --- /dev/null +++ b/configs/_base_/models/upernet_convnext.py @@ -0,0 +1,52 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +custom_imports = dict(imports='mmpretrain.models', allow_failed_imports=False) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth' # noqa +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='mmpretrain.ConvNeXt', + arch='base', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + type='UPerHead', + in_channels=[128, 256, 512, 1024], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=384, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/upernet_mae.py b/configs/_base_/models/upernet_mae.py new file mode 100644 index 0000000000000000000000000000000000000000..b833b67645981473b60a505cf244d611cf4817a1 --- /dev/null +++ b/configs/_base_/models/upernet_mae.py @@ -0,0 +1,57 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='MAE', + img_size=(640, 640), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=(3, 5, 7, 11), + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + init_values=0.1), + neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + type='UPerHead', + in_channels=[384, 384, 384, 384], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=384, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/upernet_r50.py b/configs/_base_/models/upernet_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..97f2eb8c4866addab1b7d99754f7696effd92a1a --- /dev/null +++ b/configs/_base_/models/upernet_r50.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='UPerHead', + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/upernet_swin.py b/configs/_base_/models/upernet_swin.py new file mode 100644 index 0000000000000000000000000000000000000000..61cfce035ebbb33e1f4f9d0b716815fa84772bda --- /dev/null +++ b/configs/_base_/models/upernet_swin.py @@ -0,0 +1,62 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +backbone_norm_cfg = dict(type='LN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='SwinTransformer', + pretrain_img_size=224, + embed_dims=96, + patch_size=4, + window_size=7, + mlp_ratio=4, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + strides=(4, 2, 2, 2), + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + patch_norm=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + use_abs_pos_embed=False, + act_cfg=dict(type='GELU'), + norm_cfg=backbone_norm_cfg), + decode_head=dict( + type='UPerHead', + in_channels=[96, 192, 384, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=384, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/upernet_vit-b16_ln_mln.py b/configs/_base_/models/upernet_vit-b16_ln_mln.py new file mode 100644 index 0000000000000000000000000000000000000000..776525ad98f07187f18cdb1e86c5f3cf8f2ef63c --- /dev/null +++ b/configs/_base_/models/upernet_vit-b16_ln_mln.py @@ -0,0 +1,65 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth', + backbone=dict( + type='VisionTransformer', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=(2, 5, 8, 11), + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.0, + with_cls_token=True, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + interpolate_mode='bicubic'), + neck=dict( + type='MultiLevelNeck', + in_channels=[768, 768, 768, 768], + out_channels=768, + scales=[4, 2, 1, 0.5]), + decode_head=dict( + type='UPerHead', + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=768, + in_index=3, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) # yapf: disable diff --git a/configs/_base_/models/vpd_sd.py b/configs/_base_/models/vpd_sd.py new file mode 100644 index 0000000000000000000000000000000000000000..87321e74f04aacbf67d5bcb1677f60399a66fd34 --- /dev/null +++ b/configs/_base_/models/vpd_sd.py @@ -0,0 +1,86 @@ +# model settings +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[127.5, 127.5, 127.5], + std=[127.5, 127.5, 127.5], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=0) + +# adapted from stable-diffusion/configs/stable-diffusion/v1-inference.yaml +stable_diffusion_cfg = dict( + base_learning_rate=0.0001, + target='ldm.models.diffusion.ddpm.LatentDiffusion', + checkpoint='https://download.openmmlab.com/mmsegmentation/v0.5/' + 'vpd/stable_diffusion_v1-5_pretrain_third_party.pth', + params=dict( + linear_start=0.00085, + linear_end=0.012, + num_timesteps_cond=1, + log_every_t=200, + timesteps=1000, + first_stage_key='jpg', + cond_stage_key='txt', + image_size=64, + channels=4, + cond_stage_trainable=False, + conditioning_key='crossattn', + monitor='val/loss_simple_ema', + scale_factor=0.18215, + use_ema=False, + scheduler_config=dict( + target='ldm.lr_scheduler.LambdaLinearScheduler', + params=dict( + warm_up_steps=[10000], + cycle_lengths=[10000000000000], + f_start=[1e-06], + f_max=[1.0], + f_min=[1.0])), + unet_config=dict( + target='ldm.modules.diffusionmodules.openaimodel.UNetModel', + params=dict( + image_size=32, + in_channels=4, + out_channels=4, + model_channels=320, + attention_resolutions=[4, 2, 1], + num_res_blocks=2, + channel_mult=[1, 2, 4, 4], + num_heads=8, + use_spatial_transformer=True, + transformer_depth=1, + context_dim=768, + use_checkpoint=True, + legacy=False)), + first_stage_config=dict( + target='ldm.models.autoencoder.AutoencoderKL', + params=dict( + embed_dim=4, + monitor='val/rec_loss', + ddconfig=dict( + double_z=True, + z_channels=4, + resolution=256, + in_channels=3, + out_ch=3, + ch=128, + ch_mult=[1, 2, 4, 4], + num_res_blocks=2, + attn_resolutions=[], + dropout=0.0), + lossconfig=dict(target='torch.nn.Identity'))), + cond_stage_config=dict( + target='ldm.modules.encoders.modules.AbstractEncoder'))) + +model = dict( + type='DepthEstimator', + data_preprocessor=data_preprocessor, + backbone=dict( + type='VPD', + diffusion_cfg=stable_diffusion_cfg, + ), +) + +# some of the parameters in stable-diffusion model will not be updated +# during training +find_unused_parameters = True diff --git a/configs/_base_/schedules/schedule_160k.py b/configs/_base_/schedules/schedule_160k.py new file mode 100644 index 0000000000000000000000000000000000000000..60d7bec76244e86ec4635173a45985d4f7023e74 --- /dev/null +++ b/configs/_base_/schedules/schedule_160k.py @@ -0,0 +1,25 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=160000, + by_epoch=False) +] +# training schedule for 160k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=160000, val_interval=16000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_20k.py b/configs/_base_/schedules/schedule_20k.py new file mode 100644 index 0000000000000000000000000000000000000000..e809e3e88092446b8ee233ebbc6feccabdbccaac --- /dev/null +++ b/configs/_base_/schedules/schedule_20k.py @@ -0,0 +1,24 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=20000, + by_epoch=False) +] +# training schedule for 20k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_240k.py b/configs/_base_/schedules/schedule_240k.py new file mode 100644 index 0000000000000000000000000000000000000000..feb2ce9637fd539a28881d1ddf516e5bc3e58be5 --- /dev/null +++ b/configs/_base_/schedules/schedule_240k.py @@ -0,0 +1,25 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=240000, + by_epoch=False) +] +# training schedule for 240k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=240000, val_interval=24000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_25k.py b/configs/_base_/schedules/schedule_25k.py new file mode 100644 index 0000000000000000000000000000000000000000..825e141ed12cd36567030de94c6bd081718510ee --- /dev/null +++ b/configs/_base_/schedules/schedule_25k.py @@ -0,0 +1,28 @@ +# optimizer +optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.1) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=3e-2, begin=0, end=12000, + by_epoch=False), + dict( + type='PolyLRRatio', + eta_min_ratio=3e-2, + power=0.9, + begin=12000, + end=24000, + by_epoch=False), + dict(type='ConstantLR', by_epoch=False, factor=1, begin=24000, end=25000) +] +# training schedule for 25k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=25000, val_interval=1000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_320k.py b/configs/_base_/schedules/schedule_320k.py new file mode 100644 index 0000000000000000000000000000000000000000..70b063afc9f89d62cb2f2dce8a6f225ad4d39220 --- /dev/null +++ b/configs/_base_/schedules/schedule_320k.py @@ -0,0 +1,25 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=320000, + by_epoch=False) +] +# training schedule for 320k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=320000, val_interval=32000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_40k.py b/configs/_base_/schedules/schedule_40k.py new file mode 100644 index 0000000000000000000000000000000000000000..4b823339a28cfc19159e4e93603fbf1beb81ac94 --- /dev/null +++ b/configs/_base_/schedules/schedule_40k.py @@ -0,0 +1,24 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=40000, + by_epoch=False) +] +# training schedule for 40k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_80k.py b/configs/_base_/schedules/schedule_80k.py new file mode 100644 index 0000000000000000000000000000000000000000..0dcd6c4d1bc0158107276fc9abe7d5d62c0880c2 --- /dev/null +++ b/configs/_base_/schedules/schedule_80k.py @@ -0,0 +1,24 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=80000, + by_epoch=False) +] +# training schedule for 80k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/ann/README.md b/configs/ann/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1281a9ee14f2aabcfaa128ace7e945a77c361038 --- /dev/null +++ b/configs/ann/README.md @@ -0,0 +1,68 @@ +# ANN + +> [Asymmetric Non-local Neural Networks for Semantic Segmentation](https://arxiv.org/abs/1908.07678) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The non-local module works as a particularly useful technique for semantic segmentation while criticized for its prohibitive computation and GPU memory occupation. In this paper, we present Asymmetric Non-local Neural Network to semantic segmentation, which has two prominent components: Asymmetric Pyramid Non-local Block (APNB) and Asymmetric Fusion Non-local Block (AFNB). APNB leverages a pyramid sampling module into the non-local block to largely reduce the computation and memory consumption without sacrificing the performance. AFNB is adapted from APNB to fuse the features of different levels under a sufficient consideration of long range dependencies and thus considerably improves the performance. Extensive experiments on semantic segmentation benchmarks demonstrate the effectiveness and efficiency of our work. In particular, we report the state-of-the-art performance of 81.3 mIoU on the Cityscapes test set. For a 256x128 input, APNB is around 6 times faster than a non-local block on GPU while 28 times smaller in GPU running memory occupation. Code is available at: [this https URL](https://github.com/MendelXu/ANN). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ANN | R-50-D8 | 512x1024 | 40000 | 6 | 3.71 | V100 | 77.40 | 78.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211.log.json) | +| ANN | R-101-D8 | 512x1024 | 40000 | 9.5 | 2.55 | V100 | 76.55 | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243-adf6eece.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243.log.json) | +| ANN | R-50-D8 | 769x769 | 40000 | 6.8 | 1.70 | V100 | 78.89 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712-2b46b04d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712.log.json) | +| ANN | R-101-D8 | 769x769 | 40000 | 10.7 | 1.15 | V100 | 79.32 | 80.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720-059bff28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720.log.json) | +| ANN | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 77.34 | 78.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911-5a9ad545.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911.log.json) | +| ANN | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 77.14 | 78.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728-aceccc6e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728.log.json) | +| ANN | R-50-D8 | 769x769 | 80000 | - | - | V100 | 78.88 | 80.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426-cc7ff323.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426.log.json) | +| ANN | R-101-D8 | 769x769 | 80000 | - | - | V100 | 78.80 | 80.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713-a9d4be8d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ANN | R-50-D8 | 512x512 | 80000 | 9.1 | 21.01 | V100 | 41.01 | 42.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818-26f75e11.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818.log.json) | +| ANN | R-101-D8 | 512x512 | 80000 | 12.5 | 14.12 | V100 | 42.94 | 44.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818-c0153543.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818.log.json) | +| ANN | R-50-D8 | 512x512 | 160000 | - | - | V100 | 41.74 | 42.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733-892247bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733.log.json) | +| ANN | R-101-D8 | 512x512 | 160000 | - | - | V100 | 42.94 | 44.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733-955eb1ec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ANN | R-50-D8 | 512x512 | 20000 | 6 | 20.92 | V100 | 74.86 | 76.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246-dfcb1c62.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246.log.json) | +| ANN | R-101-D8 | 512x512 | 20000 | 9.5 | 13.94 | V100 | 77.47 | 78.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246-2fad0042.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246.log.json) | +| ANN | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.56 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314-b5dac322.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314.log.json) | +| ANN | R-101-D8 | 512x512 | 40000 | - | - | V100 | 76.70 | 78.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann/ann_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314-bd205bbe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314.log.json) | + +## Citation + +```bibtex +@inproceedings{zhu2019asymmetric, + title={Asymmetric non-local neural networks for semantic segmentation}, + author={Zhu, Zhen and Xu, Mengde and Bai, Song and Huang, Tengteng and Bai, Xiang}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={593--602}, + year={2019} +} +``` diff --git a/configs/ann/ann_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/ann/ann_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..0da7e0b7021ec3e8ead76c1a81f3ebb761b36a05 --- /dev/null +++ b/configs/ann/ann_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/ann/ann_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..08459c0a509fd396d0af8967e5ee4a98c6734571 --- /dev/null +++ b/configs/ann/ann_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/ann/ann_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..46781fa9f244e157e8e7cfe5b6023dec4c62a97c --- /dev/null +++ b/configs/ann/ann_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/ann/ann_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..c951d8704ce1b59977b2643cf458789e891d37be --- /dev/null +++ b/configs/ann/ann_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/ann/ann_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9f14327542f84bdf58a99cee4212c8d1c9e091eb --- /dev/null +++ b/configs/ann/ann_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/ann/ann_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c3c1a3f70693a1d3770ffa9b619df40ed73435dd --- /dev/null +++ b/configs/ann/ann_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/ann/ann_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c3c1a3f70693a1d3770ffa9b619df40ed73435dd --- /dev/null +++ b/configs/ann/ann_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/ann/ann_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..3cc5b8e30042c965498fc03afec8ca2f5076a068 --- /dev/null +++ b/configs/ann/ann_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ann/ann_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/ann/ann_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..119eb7644789831d1434408a1fab525fa2bb8b66 --- /dev/null +++ b/configs/ann/ann_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/ann/ann_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/ann/ann_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..3152b929a64e096720b791a98369732fa6a326ed --- /dev/null +++ b/configs/ann/ann_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ann/ann_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/ann/ann_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..793437f7a8e7e38785a0488c10627b26dd4187dc --- /dev/null +++ b/configs/ann/ann_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/ann/ann_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/ann/ann_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..4e392ca1662cbacaa10f6aeb8192ea357303855f --- /dev/null +++ b/configs/ann/ann_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ann/ann_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/ann/ann_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..900381dd1fb8aa4c17d832bb1ca595895dabc65b --- /dev/null +++ b/configs/ann/ann_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/ann/ann_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/ann/ann_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..6921218c3122cc5ffc551873fc81eaf0161b65b3 --- /dev/null +++ b/configs/ann/ann_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/ann/ann_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/ann/ann_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..e1c236049c891b4e8fd5e5c18f190c87000683d5 --- /dev/null +++ b/configs/ann/ann_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/ann/ann_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/ann/ann_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9fb26efcf4e5a30ae272265e92643c0215c5f897 --- /dev/null +++ b/configs/ann/ann_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/ann/metafile.yaml b/configs/ann/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d118681fdceb502dffeaab01c9753fefc728361 --- /dev/null +++ b/configs/ann/metafile.yaml @@ -0,0 +1,391 @@ +Collections: +- Name: ANN + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + README: configs/ann/README.md + Frameworks: + - PyTorch +Models: +- Name: ann_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.4 + mIoU(ms+flip): 78.57 + Config: configs/ann/ann_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ANN + Training Resources: 4x V100 GPUS + Memory (GB): 6.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.55 + mIoU(ms+flip): 78.85 + Config: configs/ann/ann_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ANN + Training Resources: 4x V100 GPUS + Memory (GB): 9.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243-adf6eece.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.89 + mIoU(ms+flip): 80.46 + Config: configs/ann/ann_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ANN + Training Resources: 4x V100 GPUS + Memory (GB): 6.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712-2b46b04d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.32 + mIoU(ms+flip): 80.94 + Config: configs/ann/ann_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ANN + Training Resources: 4x V100 GPUS + Memory (GB): 10.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720-059bff28.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.34 + mIoU(ms+flip): 78.65 + Config: configs/ann/ann_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ANN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911-5a9ad545.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.14 + mIoU(ms+flip): 78.81 + Config: configs/ann/ann_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ANN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728-aceccc6e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.88 + mIoU(ms+flip): 80.57 + Config: configs/ann/ann_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ANN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426-cc7ff323.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.8 + mIoU(ms+flip): 80.34 + Config: configs/ann/ann_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ANN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713-a9d4be8d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.01 + mIoU(ms+flip): 42.3 + Config: configs/ann/ann_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - ANN + Training Resources: 4x V100 GPUS + Memory (GB): 9.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818-26f75e11.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.94 + mIoU(ms+flip): 44.18 + Config: configs/ann/ann_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - ANN + Training Resources: 4x V100 GPUS + Memory (GB): 12.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818-c0153543.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.74 + mIoU(ms+flip): 42.62 + Config: configs/ann/ann_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - ANN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733-892247bc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.94 + mIoU(ms+flip): 44.06 + Config: configs/ann/ann_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - ANN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733-955eb1ec.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.86 + mIoU(ms+flip): 76.13 + Config: configs/ann/ann_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - ANN + Training Resources: 4x V100 GPUS + Memory (GB): 6.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246-dfcb1c62.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.47 + mIoU(ms+flip): 78.7 + Config: configs/ann/ann_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - ANN + Training Resources: 4x V100 GPUS + Memory (GB): 9.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246-2fad0042.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.56 + mIoU(ms+flip): 77.51 + Config: configs/ann/ann_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - ANN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314-b5dac322.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch +- Name: ann_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: ANN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.7 + mIoU(ms+flip): 78.06 + Config: configs/ann/ann_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - ANN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314-bd205bbe.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314.log.json + Paper: + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1908.07678 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Framework: PyTorch diff --git a/configs/apcnet/README.md b/configs/apcnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9104f3c87f584c6af115a39bb0b175f44b65c964 --- /dev/null +++ b/configs/apcnet/README.md @@ -0,0 +1,59 @@ +# APCNet + +> [Adaptive Pyramid Context Network for Semantic Segmentation](https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Recent studies witnessed that context features can significantly improve the performance of deep semantic segmentation networks. Current context based segmentation methods differ with each other in how to construct context features and perform differently in practice. This paper firstly introduces three desirable properties of context features in segmentation task. Specially, we find that Global-guided Local Affinity (GLA) can play a vital role in constructing effective context features, while this property has been largely ignored in previous works. Based on this analysis, this paper proposes Adaptive Pyramid Context Network (APCNet)for semantic segmentation. APCNet adaptively constructs multi-scale contextual representations with multiple welldesigned Adaptive Context Modules (ACMs). Specifically, each ACM leverages a global image representation as a guidance to estimate the local affinity coefficients for each sub-region, and then calculates a context vector with these affinities. We empirically evaluate our APCNet on three semantic segmentation and scene parsing datasets, including PASCAL VOC 2012, Pascal-Context, and ADE20K dataset. Experimental results show that APCNet achieves state-ofthe-art performance on all three benchmarks, and obtains a new record 84.2% on PASCAL VOC 2012 test set without MS COCO pre-trained and any post-processing. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| APCNet | R-50-D8 | 512x1024 | 40000 | 7.7 | 3.57 | V100 | 78.02 | 79.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes_20201214_115717-5e88fa33.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes-20201214_115717.log.json) | +| APCNet | R-101-D8 | 512x1024 | 40000 | 11.2 | 2.15 | V100 | 79.08 | 80.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes_20201214_115716-abc9d111.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes-20201214_115716.log.json) | +| APCNet | R-50-D8 | 769x769 | 40000 | 8.7 | 1.52 | V100 | 77.89 | 79.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes_20201214_115717-2a2628d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes-20201214_115717.log.json) | +| APCNet | R-101-D8 | 769x769 | 40000 | 12.7 | 1.03 | V100 | 77.96 | 79.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes_20201214_115718-b650de90.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes-20201214_115718.log.json) | +| APCNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 78.96 | 79.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes_20201214_115716-987f51e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes-20201214_115716.log.json) | +| APCNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 79.64 | 80.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes_20201214_115705-b1ff208a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes-20201214_115705.log.json) | +| APCNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 78.79 | 80.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes_20201214_115718-7ea9fa12.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes-20201214_115718.log.json) | +| APCNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 78.45 | 79.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes_20201214_115716-a7fbc2ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes-20201214_115716.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| APCNet | R-50-D8 | 512x512 | 80000 | 10.1 | 19.61 | V100 | 42.20 | 43.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k_20201214_115705-a8626293.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k-20201214_115705.log.json) | +| APCNet | R-101-D8 | 512x512 | 80000 | 13.6 | 13.10 | V100 | 45.54 | 46.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k_20201214_115704-c656c3fb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k-20201214_115704.log.json) | +| APCNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 43.40 | 43.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k_20201214_115706-25fb92c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k-20201214_115706.log.json) | +| APCNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 45.41 | 46.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet/apcnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k_20201214_115705-73f9a8d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k-20201214_115705.log.json) | + +## Citation + +```bibtex +@InProceedings{He_2019_CVPR, +author = {He, Junjun and Deng, Zhongying and Zhou, Lei and Wang, Yali and Qiao, Yu}, +title = {Adaptive Pyramid Context Network for Semantic Segmentation}, +booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2019} +} +``` diff --git a/configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..754b2d1a089cb48f66a0f287f36030deebca687a --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..d2b5fe1360f0ff662727b666198c5e4f360f7066 --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..03b018d2ffbaf9ef0631b96c555105456567676f --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..0cbbfadbddbb5e152b7865b8b1b7bc9e80f0de09 --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/apcnet/apcnet_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..f0aacc06e0c381ba143cf2a75a434df3ffd480b9 --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/apcnet/apcnet_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..219d07ae5564072b682a9f208da72586d22c005c --- /dev/null +++ b/configs/apcnet/apcnet_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..b440771d01975cb3d9a77570b6b4e389727a6630 --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..9ff897c977069fc04ca2aeb43388fde4c2dbce65 --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6de10330b3d0947bb6900b56e61ddee921f211e1 --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..d6ec8985cc363d9ae1d6da7a97987b74c1f8a892 --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/apcnet/apcnet_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/apcnet/apcnet_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..37b23d1c539c8133733b87d973ae4c1595a65c5f --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/apcnet/apcnet_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/apcnet/apcnet_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b0fbe275b63b4aa351af6509ee56b302c4d1a432 --- /dev/null +++ b/configs/apcnet/apcnet_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/apcnet/metafile.yaml b/configs/apcnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f4072c8fda9343755e333550abbc2e98f6716d1 --- /dev/null +++ b/configs/apcnet/metafile.yaml @@ -0,0 +1,296 @@ +Collections: +- Name: APCNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + README: configs/apcnet/README.md + Frameworks: + - PyTorch +Models: +- Name: apcnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.02 + mIoU(ms+flip): 79.26 + Config: configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - APCNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes_20201214_115717-5e88fa33.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes-20201214_115717.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.08 + mIoU(ms+flip): 80.34 + Config: configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - APCNet + Training Resources: 4x V100 GPUS + Memory (GB): 11.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes_20201214_115716-abc9d111.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes-20201214_115716.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.89 + mIoU(ms+flip): 79.75 + Config: configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - APCNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes_20201214_115717-2a2628d7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes-20201214_115717.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.96 + mIoU(ms+flip): 79.24 + Config: configs/apcnet/apcnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - APCNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes_20201214_115718-b650de90.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes-20201214_115718.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.96 + mIoU(ms+flip): 79.94 + Config: configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - APCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes_20201214_115716-987f51e3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes-20201214_115716.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.64 + mIoU(ms+flip): 80.61 + Config: configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - APCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes_20201214_115705-b1ff208a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes-20201214_115705.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.79 + mIoU(ms+flip): 80.35 + Config: configs/apcnet/apcnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - APCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes_20201214_115718-7ea9fa12.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes-20201214_115718.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.45 + mIoU(ms+flip): 79.91 + Config: configs/apcnet/apcnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - APCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes_20201214_115716-a7fbc2ab.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes-20201214_115716.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.2 + mIoU(ms+flip): 43.3 + Config: configs/apcnet/apcnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - APCNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k_20201214_115705-a8626293.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k-20201214_115705.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.54 + mIoU(ms+flip): 46.65 + Config: configs/apcnet/apcnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - APCNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k_20201214_115704-c656c3fb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k-20201214_115704.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.4 + mIoU(ms+flip): 43.94 + Config: configs/apcnet/apcnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - APCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k_20201214_115706-25fb92c2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k-20201214_115706.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: apcnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: APCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.41 + mIoU(ms+flip): 46.63 + Config: configs/apcnet/apcnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - APCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k_20201214_115705-73f9a8d7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k-20201214_115705.log.json + Paper: + Title: Adaptive Pyramid Context Network for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch diff --git a/configs/beit/README.md b/configs/beit/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b005c88c501bd50a692aa4cb7db3e13b3f4ec29a --- /dev/null +++ b/configs/beit/README.md @@ -0,0 +1,85 @@ +# BEiT + +> [BEiT: BERT Pre-Training of Image Transformers](https://arxiv.org/abs/2106.08254) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We introduce a self-supervised vision representation model BEiT, which stands for Bidirectional Encoder representation from Image Transformers. Following BERT developed in the natural language processing area, we propose a masked image modeling task to pretrain vision Transformers. Specifically, each image has two views in our pre-training, i.e, image patches (such as 16x16 pixels), and visual tokens (i.e., discrete tokens). We first "tokenize" the original image into visual tokens. Then we randomly mask some image patches and fed them into the backbone Transformer. The pre-training objective is to recover the original visual tokens based on the corrupted image patches. After pre-training BEiT, we directly fine-tune the model parameters on downstream tasks by appending task layers upon the pretrained encoder. Experimental results on image classification and semantic segmentation show that our model achieves competitive results with previous pre-training methods. For example, base-size BEiT achieves 83.2% top-1 accuracy on ImageNet-1K, significantly outperforming from-scratch DeiT training (81.8%) with the same setup. Moreover, large-size BEiT obtains 86.3% only using ImageNet-1K, even outperforming ViT-L with supervised pre-training on ImageNet-22K (85.2%). The code and pretrained models are available at [this https URL](https://github.com/microsoft/unilm/tree/master/beit). + + + +
+ +
+ +## Usage + +To use other repositories' pre-trained models, it is necessary to convert keys. + +We provide a script [`beit2mmseg.py`](../../tools/model_converters/beit2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/microsoft/unilm/tree/master/beit/semantic_segmentation) to MMSegmentation style. + +```shell +python tools/model_converters/beit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/beit2mmseg.py https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_224_pt22k_ft22k.pth pretrain/beit_base_patch16_224_pt22k_ft22k.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +In our default setting, pretrained models could be defined below: + +| pretrained models | original models | +| ----------------- | --------------------------------------------------------------------------------------------------------------------------- | +| BEiT_base.pth | ['BEiT_base'](https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_224_pt22k_ft22k.pth) | +| BEiT_large.pth | ['BEiT_large'](https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_large_patch16_224_pt22k_ft22k.pth) | + +Verify the single-scale results of the model: + +```shell +sh tools/dist_test.sh \ +configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py \ +upernet_beit-large_fp16_8x1_640x640_160k_ade20k-8fc0dd5d.pth $GPUS --eval mIoU +``` + +Since relative position embedding requires the input length and width to be equal, the sliding window is adopted for multi-scale inference. So we set min_size=640, that is, the shortest edge is 640. So the multi-scale inference of config is performed separately, instead of '--aug-test'. For multi-scale inference: + +```shell +sh tools/dist_test.sh \ +configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py \ +upernet_beit-large_fp16_8x1_640x640_160k_ade20k-8fc0dd5d.pth $GPUS --eval mIoU +``` + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------------ | ----------------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | BEiT-B | 640x640 | ImageNet-22K | 224x224 | 16 | 160000 | 15.88 | 2.00 | V100 | 53.08 | 53.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/beit/beit-base_upernet_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-base_8x2_640x640_160k_ade20k/upernet_beit-base_8x2_640x640_160k_ade20k-eead221d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-base_8x2_640x640_160k_ade20k/upernet_beit-base_8x2_640x640_160k_ade20k.log.json) | +| UPerNet | BEiT-L | 640x640 | ImageNet-22K | 224x224 | 8 | 320000 | 22.64 | 0.96 | V100 | 56.33 | 56.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/beit/beit-large_upernet_8xb1-amp-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k/upernet_beit-large_fp16_8x1_640x640_160k_ade20k-8fc0dd5d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.log.json) | + +## Citation + +```bibtex +@inproceedings{beit, + title={{BEiT}: {BERT} Pre-Training of Image Transformers}, + author={Hangbo Bao and Li Dong and Songhao Piao and Furu Wei}, + booktitle={International Conference on Learning Representations}, + year={2022}, + url={https://openreview.net/forum?id=p-BhZSz59o4} +} +``` diff --git a/configs/beit/beit-base_upernet_8xb2-160k_ade20k-640x640.py b/configs/beit/beit-base_upernet_8xb2-160k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..1cd7d0e8a8847019f9e3b71b60a7ddf7e58e8ed8 --- /dev/null +++ b/configs/beit/beit-base_upernet_8xb2-160k_ade20k-640x640.py @@ -0,0 +1,36 @@ +_base_ = [ + '../_base_/models/upernet_beit.py', '../_base_/datasets/ade20k_640x640.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='pretrain/beit_base_patch16_224_pt22k_ft22k.pth', + test_cfg=dict(mode='slide', crop_size=(640, 640), stride=(426, 426))) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=3e-5, betas=(0.9, 0.999), weight_decay=0.05), + constructor='LayerDecayOptimizerConstructor', + paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.9)) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/beit/beit-base_upernet_8xb2-160k_ade20k-640x640_ms.py b/configs/beit/beit-base_upernet_8xb2-160k_ade20k-640x640_ms.py new file mode 100644 index 0000000000000000000000000000000000000000..02480222c454b0da0fdb161c2e0e615af8f67224 --- /dev/null +++ b/configs/beit/beit-base_upernet_8xb2-160k_ade20k-640x640_ms.py @@ -0,0 +1,16 @@ +_base_ = './beit-base_upernet_8xb2-160k_ade20k-640x640.py' + +test_pipeline = [ + dict(type='LoadImageFromFile'), + # TODO: Refactor 'MultiScaleFlipAug' which supports + # `min_size` feature in `Resize` class + # img_ratios is [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] + # original image scale is (2560, 640) + dict(type='Resize', scale=(2560, 640), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs'), +] +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/beit/beit-large_upernet_8xb1-amp-160k_ade20k-640x640.py b/configs/beit/beit-large_upernet_8xb1-amp-160k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..4fd5cd20adda2685d6d553c4d238840e9e297386 --- /dev/null +++ b/configs/beit/beit-large_upernet_8xb1-amp-160k_ade20k-640x640.py @@ -0,0 +1,50 @@ +_base_ = [ + '../_base_/models/upernet_beit.py', '../_base_/datasets/ade20k_640x640.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_320k.py' +] +crop_size = (640, 640) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='pretrain/beit_large_patch16_224_pt22k_ft22k.pth', + backbone=dict( + type='BEiT', + embed_dims=1024, + num_layers=24, + num_heads=16, + mlp_ratio=4, + qv_bias=True, + init_values=1e-6, + drop_path_rate=0.2, + out_indices=[7, 11, 15, 23]), + neck=dict(embed_dim=1024, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + in_channels=[1024, 1024, 1024, 1024], num_classes=150, channels=1024), + auxiliary_head=dict(in_channels=1024, num_classes=150), + test_cfg=dict(mode='slide', crop_size=(640, 640), stride=(426, 426))) + +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict( + type='AdamW', lr=2e-5, betas=(0.9, 0.999), weight_decay=0.05), + constructor='LayerDecayOptimizerConstructor', + paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95), + accumulative_counts=2) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=3000), + dict( + type='PolyLR', + power=1.0, + begin=3000, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] + +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/beit/beit-large_upernet_8xb1-amp-160k_ade20k-640x640_ms.py b/configs/beit/beit-large_upernet_8xb1-amp-160k_ade20k-640x640_ms.py new file mode 100644 index 0000000000000000000000000000000000000000..fc6f049d11f097c00870cd687df152e996dcb879 --- /dev/null +++ b/configs/beit/beit-large_upernet_8xb1-amp-160k_ade20k-640x640_ms.py @@ -0,0 +1,16 @@ +_base_ = './beit-large_upernet_8xb1-amp-160k_ade20k-640x640.py' + +test_pipeline = [ + dict(type='LoadImageFromFile'), + # TODO: Refactor 'MultiScaleFlipAug' which supports + # `min_size` feature in `Resize` class + # img_ratios is [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] + # original image scale is (2560, 640) + dict(type='Resize', scale=(2560, 640), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs'), +] +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/beit/metafile.yaml b/configs/beit/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef6124e8dcd2cc6541035ae001890eb332c37c3a --- /dev/null +++ b/configs/beit/metafile.yaml @@ -0,0 +1,49 @@ +Models: +- Name: beit-base_upernet_8xb2-160k_ade20k-640x640 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 53.08 + mIoU(ms+flip): 53.84 + Config: configs/beit/beit-base_upernet_8xb2-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - BEiT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 15.88 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-base_8x2_640x640_160k_ade20k/upernet_beit-base_8x2_640x640_160k_ade20k-eead221d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-base_8x2_640x640_160k_ade20k/upernet_beit-base_8x2_640x640_160k_ade20k.log.json + Paper: + Title: 'BEiT: BERT Pre-Training of Image Transformers' + URL: https://arxiv.org/abs/2106.08254 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/backbones/beit.py#1404 + Framework: PyTorch +- Name: beit-large_upernet_8xb1-amp-160k_ade20k-640x640 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 56.33 + mIoU(ms+flip): 56.84 + Config: configs/beit/beit-large_upernet_8xb1-amp-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - BEiT-L + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 22.64 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k/upernet_beit-large_fp16_8x1_640x640_160k_ade20k-8fc0dd5d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.log.json + Paper: + Title: 'BEiT: BERT Pre-Training of Image Transformers' + URL: https://arxiv.org/abs/2106.08254 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/backbones/beit.py#1404 + Framework: PyTorch diff --git a/configs/bisenetv1/README.md b/configs/bisenetv1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a5058957f0c43cc74185e3a6b9796b401e25296d --- /dev/null +++ b/configs/bisenetv1/README.md @@ -0,0 +1,64 @@ +# BiSeNetV1 + +> [BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Semantic segmentation requires both rich spatial information and sizeable receptive field. However, modern approaches usually compromise spatial resolution to achieve real-time inference speed, which leads to poor performance. In this paper, we address this dilemma with a novel Bilateral Segmentation Network (BiSeNet). We first design a Spatial Path with a small stride to preserve the spatial information and generate high-resolution features. Meanwhile, a Context Path with a fast downsampling strategy is employed to obtain sufficient receptive field. On top of the two paths, we introduce a new Feature Fusion Module to combine features efficiently. The proposed architecture makes a right balance between the speed and segmentation performance on Cityscapes, CamVid, and COCO-Stuff datasets. Specifically, for a 2048x1024 input, we achieve 68.4% Mean IOU on the Cityscapes test dataset with speed of 105 FPS on one NVIDIA Titan XP card, which is significantly faster than the existing methods with comparable performance. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | ---------------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| BiSeNetV1 | R-18-D32 (No Pretrain) | 1024x1024 | 160000 | 5.69 | 31.77 | V100 | 74.44 | 77.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239-c55e78e2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239.log.json) | +| BiSeNetV1 | R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | V100 | 74.37 | 76.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251-8ba80eff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251.log.json) | +| BiSeNetV1 | R-18-D32 (4x8) | 1024x1024 | 160000 | 11.17 | 31.77 | V100 | 75.16 | 77.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb8-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322-bb8db75f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322.log.json) | +| BiSeNetV1 | R-50-D32 (No Pretrain) | 1024x1024 | 160000 | 15.39 | 7.71 | V100 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) | +| BiSeNetV1 | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | V100 | 77.68 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628.log.json) | + +### COCO-Stuff 164k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | ----------------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| BiSeNetV1 | R-18-D32 (No Pretrain) | 512x512 | 160000 | - | - | V100 | 25.45 | 26.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328-046aa2f2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328.log.json) | +| BiSeNetV1 | R-18-D32 | 512x512 | 160000 | 6.33 | 74.24 | V100 | 28.55 | 29.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100-f700dbf7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100.log.json) | +| BiSeNetV1 | R-50-D32 (No Pretrain) | 512x512 | 160000 | - | - | V100 | 29.82 | 30.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616-d2bb0df4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616.log.json) | +| BiSeNetV1 | R-50-D32 | 512x512 | 160000 | 9.28 | 32.60 | V100 | 34.88 | 35.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932-66747911.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932.log.json) | +| BiSeNetV1 | R-101-D32 (No Pretrain) | 512x512 | 160000 | - | - | V100 | 31.14 | 31.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147-c6b32c3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147.log.json) | +| BiSeNetV1 | R-101-D32 | 512x512 | 160000 | 10.36 | 25.25 | V100 | 37.38 | 37.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1/bisenetv1_r101-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220-28c8f092.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220.log.json) | + +Note: + +- `4x8`: Using 4 GPUs with 8 samples per GPU in training. +- For BiSeNetV1 on Cityscapes dataset, default setting is 4 GPUs with 4 samples per GPU in training. +- `No Pretrain` means the model is trained from scratch. + +## Citation + +```bibtex +@inproceedings{yu2018bisenet, + title={Bisenet: Bilateral segmentation network for real-time semantic segmentation}, + author={Yu, Changqian and Wang, Jingbo and Peng, Chao and Gao, Changxin and Yu, Gang and Sang, Nong}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={325--341}, + year={2018} +} +``` diff --git a/configs/bisenetv1/bisenetv1_r101-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py b/configs/bisenetv1/bisenetv1_r101-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ac63447d47fc7cdf27517546bd254764145ddd4d --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r101-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,6 @@ +_base_ = './bisenetv1_r101-d32_4xb4-160k_coco-stuff164k-512x512.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c')))) diff --git a/configs/bisenetv1/bisenetv1_r101-d32_4xb4-160k_coco-stuff164k-512x512.py b/configs/bisenetv1/bisenetv1_r101-d32_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..02e4e9be05d9859af91299bbbae8f408360d7ad8 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r101-d32_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,58 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + context_channels=(512, 1024, 2048), + spatial_channels=(256, 256, 256, 512), + out_channels=1024, + backbone_cfg=dict(type='ResNet', depth=101)), + decode_head=dict(in_channels=1024, channels=1024, num_classes=171), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=512, + channels=256, + num_convs=1, + num_classes=171, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=512, + channels=256, + num_convs=1, + num_classes=171, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py b/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..da3e598127d4c0430f053262f01be7eec819fba2 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py @@ -0,0 +1,29 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (1024, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +optimizer = dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py b/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9de889f001591150ae9ed9bf0b90fc83e4552323 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,10 @@ +_base_ = './bisenetv1_r18-d32_4xb4-160k_coco-stuff164k-512x512.py' +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))), +) diff --git a/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb8-160k_cityscapes-1024x1024.py b/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb8-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..0580ce11e6b84a9a61b9a02a670d806196d891d9 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb8-160k_cityscapes-1024x1024.py @@ -0,0 +1,4 @@ +_base_ = './bisenetv1_r18-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py' +train_dataloader = dict(batch_size=8, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_cityscapes-1024x1024.py b/configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6c3e12b24f5a7ddab6e6ec74075147110487fdea --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_cityscapes-1024x1024.py @@ -0,0 +1,24 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (1024, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +optimizer = dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_coco-stuff164k-512x512.py b/configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2109d689d0a39f90f1363e4af65a3af36a0db52c --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,53 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=171, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=171, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py b/configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..013c4ff1629b6dd961ac8eb7b90a6cfef5f0596b --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py @@ -0,0 +1,7 @@ +_base_ = './bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py' +model = dict( + type='EncoderDecoder', + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py b/configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b35259c72587bdb4f4e4b04308906cdc69bc667f --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,7 @@ +_base_ = './bisenetv1_r50-d32_4xb4-160k_coco-stuff164k-512x512.py' + +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py b/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..9753c10231d7030274fe16fb6b1832e8c1dea2f8 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py @@ -0,0 +1,55 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +crop_size = (1024, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='BiSeNetV1', + context_channels=(512, 1024, 2048), + spatial_channels=(256, 256, 256, 512), + out_channels=1024, + backbone_cfg=dict(type='ResNet', depth=50)), + decode_head=dict( + type='FCNHead', in_channels=1024, in_index=0, channels=1024), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=512, + channels=256, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False), + dict( + type='FCNHead', + in_channels=512, + channels=256, + num_convs=1, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False), + ]) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_coco-stuff164k-512x512.py b/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..8b6ef74c1a7a2a93f451742cff1041825db52933 --- /dev/null +++ b/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,58 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + context_channels=(512, 1024, 2048), + spatial_channels=(256, 256, 256, 512), + out_channels=1024, + backbone_cfg=dict(type='ResNet', depth=50)), + decode_head=dict(in_channels=1024, channels=1024, num_classes=171), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=512, + channels=256, + num_convs=1, + num_classes=171, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=512, + channels=256, + num_convs=1, + num_classes=171, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/metafile.yaml b/configs/bisenetv1/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e37f632b2fd263429b4a01b2c2008c7a53dd2bcd --- /dev/null +++ b/configs/bisenetv1/metafile.yaml @@ -0,0 +1,275 @@ +Collections: +- Name: BiSeNetV1 + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - COCO-Stuff 164k + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + README: configs/bisenetv1/README.md + Frameworks: + - PyTorch +Models: +- Name: bisenetv1_r18-d32_4xb4-160k_cityscapes-1024x1024 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.44 + mIoU(ms+flip): 77.05 + Config: configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - R-18-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Memory (GB): 5.69 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239-c55e78e2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r18-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.37 + mIoU(ms+flip): 76.91 + Config: configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - R-18-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Memory (GB): 5.69 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251-8ba80eff.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r18-d32-in1k-pre_4xb8-160k_cityscapes-1024x1024 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.16 + mIoU(ms+flip): 77.24 + Config: configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb8-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 32 + Architecture: + - R-18-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Memory (GB): 11.17 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322-bb8db75f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.92 + mIoU(ms+flip): 78.87 + Config: configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - R-50-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Memory (GB): 15.39 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r50-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.68 + mIoU(ms+flip): 79.57 + Config: configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - R-50-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Memory (GB): 15.39 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r18-d32_4xb4-160k_coco-stuff164k-512x512 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 25.45 + mIoU(ms+flip): 26.15 + Config: configs/bisenetv1/bisenetv1_r18-d32_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-18-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328-046aa2f2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r18-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 28.55 + mIoU(ms+flip): 29.26 + Config: configs/bisenetv1/bisenetv1_r18-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-18-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Memory (GB): 6.33 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100-f700dbf7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r50-d32_4xb4-160k_coco-stuff164k-512x512 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 29.82 + mIoU(ms+flip): 30.33 + Config: configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616-d2bb0df4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r50-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 34.88 + mIoU(ms+flip): 35.37 + Config: configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Memory (GB): 9.28 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932-66747911.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r50-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 31.14 + mIoU(ms+flip): 31.76 + Config: configs/bisenetv1/bisenetv1_r50-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147-c6b32c3b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch +- Name: bisenetv1_r101-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512 + In Collection: BiSeNetV1 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 37.38 + mIoU(ms+flip): 37.99 + Config: configs/bisenetv1/bisenetv1_r101-d32-in1k-pre_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D32 + - BiSeNetV1 + Training Resources: 4x V100 GPUS + Memory (GB): 10.36 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220-28c8f092.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220.log.json + Paper: + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + URL: https://arxiv.org/abs/1808.00897 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Framework: PyTorch diff --git a/configs/bisenetv2/README.md b/configs/bisenetv2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a5871dfeb986cddf2235fb40c1036fe32e5aacfc --- /dev/null +++ b/configs/bisenetv2/README.md @@ -0,0 +1,53 @@ +# BiSeNetV2 + +> [Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic Segmentation](https://arxiv.org/abs/2004.02147) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The low-level details and high-level semantics are both essential to the semantic segmentation task. However, to speed up the model inference, current approaches almost always sacrifice the low-level details, which leads to a considerable accuracy decrease. We propose to treat these spatial details and categorical semantics separately to achieve high accuracy and high efficiency for realtime semantic segmentation. To this end, we propose an efficient and effective architecture with a good trade-off between speed and accuracy, termed Bilateral Segmentation Network (BiSeNet V2). This architecture involves: (i) a Detail Branch, with wide channels and shallow layers to capture low-level details and generate high-resolution feature representation; (ii) a Semantic Branch, with narrow channels and deep layers to obtain high-level semantic context. The Semantic Branch is lightweight due to reducing the channel capacity and a fast-downsampling strategy. Furthermore, we design a Guided Aggregation Layer to enhance mutual connections and fuse both types of feature representation. Besides, a booster training strategy is designed to improve the segmentation performance without any extra inference cost. Extensive quantitative and qualitative evaluations demonstrate that the proposed architecture performs favourably against a few state-of-the-art real-time semantic segmentation approaches. Specifically, for a 2,048x1,024 input, we achieve 72.6% Mean IoU on the Cityscapes test set with a speed of 156 FPS on one NVIDIA GeForce GTX 1080 Ti card, which is significantly faster than existing methods, yet we achieve better segmentation accuracy. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | ---------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| BiSeNetV2 | BiSeNetV2 | 1024x1024 | 160000 | 7.64 | 31.77 | V100 | 73.21 | 75.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv2/bisenetv2_fcn_4xb4-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes_20210902_015551-bcf10f09.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes_20210902_015551.log.json) | +| BiSeNetV2 | BiSeNetV2 (OHEM) | 1024x1024 | 160000 | 7.64 | - | V100 | 73.57 | 75.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv2/bisenetv2_fcn_4xb4-ohem-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes_20210902_112947-5f8103b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes_20210902_112947.log.json) | +| BiSeNetV2 | BiSeNetV2 (4x8) | 1024x1024 | 160000 | 15.05 | - | V100 | 75.76 | 77.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv2/bisenetv2_fcn_4xb8-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes_20210903_000032-e1a2eed6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes_20210903_000032.log.json) | +| BiSeNetV2 | BiSeNetV2 (FP16) | 1024x1024 | 160000 | 5.77 | 36.65 | V100 | 73.07 | 75.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv2/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes_20210902_045942-b979777b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes_20210902_045942.log.json) | + +Note: + +- `OHEM` means Online Hard Example Mining (OHEM) is adopted in training. +- `FP16` means Mixed Precision (FP16) is adopted in training. +- `4x8` means 4 GPUs with 8 samples per GPU in training. + +## Citation + +```bibtex +@article{yu2021bisenet, + title={Bisenet v2: Bilateral network with guided aggregation for real-time semantic segmentation}, + author={Yu, Changqian and Gao, Changxin and Wang, Jingbo and Yu, Gang and Shen, Chunhua and Sang, Nong}, + journal={International Journal of Computer Vision}, + pages={1--18}, + year={2021}, + publisher={Springer} +} +``` diff --git a/configs/bisenetv2/bisenetv2_fcn_4xb4-160k_cityscapes-1024x1024.py b/configs/bisenetv2/bisenetv2_fcn_4xb4-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6462ce76247bcf0ec1cb0bb4649bccaf393f7458 --- /dev/null +++ b/configs/bisenetv2/bisenetv2_fcn_4xb4-160k_cityscapes-1024x1024.py @@ -0,0 +1,24 @@ +_base_ = [ + '../_base_/models/bisenetv2.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (1024, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv2/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024.py b/configs/bisenetv2/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed338c00b20e7bcbd80d884155ef7c8b4ad3934 --- /dev/null +++ b/configs/bisenetv2/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024.py @@ -0,0 +1,6 @@ +_base_ = './bisenetv2_fcn_4xb4-160k_cityscapes-1024x1024.py' +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005), + loss_scale=512.) diff --git a/configs/bisenetv2/bisenetv2_fcn_4xb4-ohem-160k_cityscapes-1024x1024.py b/configs/bisenetv2/bisenetv2_fcn_4xb4-ohem-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..8d5cbcb4e5e83e5592560ffb88f03ab0f01e8442 --- /dev/null +++ b/configs/bisenetv2/bisenetv2_fcn_4xb4-ohem-160k_cityscapes-1024x1024.py @@ -0,0 +1,83 @@ +_base_ = [ + '../_base_/models/bisenetv2.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (1024, 1024) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +models = dict( + data_preprocessor=data_preprocessor, + decode_head=dict( + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=16, + channels=16, + num_convs=2, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=32, + channels=64, + num_convs=2, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=64, + channels=256, + num_convs=2, + num_classes=19, + in_index=3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=128, + channels=1024, + num_convs=2, + num_classes=19, + in_index=4, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], +) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv2/bisenetv2_fcn_4xb8-160k_cityscapes-1024x1024.py b/configs/bisenetv2/bisenetv2_fcn_4xb8-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..8fcba64713e142db38c29f489fd7ab83c8b28e9c --- /dev/null +++ b/configs/bisenetv2/bisenetv2_fcn_4xb8-160k_cityscapes-1024x1024.py @@ -0,0 +1,24 @@ +_base_ = [ + '../_base_/models/bisenetv2.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (1024, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=8, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv2/metafile.yaml b/configs/bisenetv2/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5430ec3071f96eb75fcd12c4084042c39b45fed8 --- /dev/null +++ b/configs/bisenetv2/metafile.yaml @@ -0,0 +1,114 @@ +Collections: +- Name: BiSeNetV2 + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: 'Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic + Segmentation' + URL: https://arxiv.org/abs/2004.02147 + README: configs/bisenetv2/README.md + Frameworks: + - PyTorch +Models: +- Name: bisenetv2_fcn_4xb4-160k_cityscapes-1024x1024 + In Collection: BiSeNetV2 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.21 + mIoU(ms+flip): 75.74 + Config: configs/bisenetv2/bisenetv2_fcn_4xb4-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - BiSeNetV2 + - BiSeNetV2 + Training Resources: 4x V100 GPUS + Memory (GB): 7.64 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes_20210902_015551-bcf10f09.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes_20210902_015551.log.json + Paper: + Title: 'Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic + Segmentation' + URL: https://arxiv.org/abs/2004.02147 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv2.py#L545 + Framework: PyTorch +- Name: bisenetv2_fcn_4xb4-ohem-160k_cityscapes-1024x1024 + In Collection: BiSeNetV2 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.57 + mIoU(ms+flip): 75.8 + Config: configs/bisenetv2/bisenetv2_fcn_4xb4-ohem-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - BiSeNetV2 + - BiSeNetV2 + Training Resources: 4x V100 GPUS + Memory (GB): 7.64 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes_20210902_112947-5f8103b4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes_20210902_112947.log.json + Paper: + Title: 'Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic + Segmentation' + URL: https://arxiv.org/abs/2004.02147 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv2.py#L545 + Framework: PyTorch +- Name: bisenetv2_fcn_4xb8-160k_cityscapes-1024x1024 + In Collection: BiSeNetV2 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.76 + mIoU(ms+flip): 77.79 + Config: configs/bisenetv2/bisenetv2_fcn_4xb8-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 32 + Architecture: + - BiSeNetV2 + - BiSeNetV2 + Training Resources: 4x V100 GPUS + Memory (GB): 15.05 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes_20210903_000032-e1a2eed6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes_20210903_000032.log.json + Paper: + Title: 'Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic + Segmentation' + URL: https://arxiv.org/abs/2004.02147 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv2.py#L545 + Framework: PyTorch +- Name: bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024 + In Collection: BiSeNetV2 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.07 + mIoU(ms+flip): 75.13 + Config: configs/bisenetv2/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - BiSeNetV2 + - BiSeNetV2 + Training Resources: 4x V100 GPUS + Memory (GB): 5.77 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes_20210902_045942-b979777b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes_20210902_045942.log.json + Paper: + Title: 'Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic + Segmentation' + URL: https://arxiv.org/abs/2004.02147 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv2.py#L545 + Framework: PyTorch diff --git a/configs/ccnet/README.md b/configs/ccnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..64dd5f0298aa4610ea070b96955c6ea4a9151c18 --- /dev/null +++ b/configs/ccnet/README.md @@ -0,0 +1,67 @@ +# CCNet + +> [CCNet: Criss-Cross Attention for Semantic Segmentation](https://arxiv.org/abs/1811.11721) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Contextual information is vital in visual understanding problems, such as semantic segmentation and object detection. We propose a Criss-Cross Network (CCNet) for obtaining full-image contextual information in a very effective and efficient way. Concretely, for each pixel, a novel criss-cross attention module harvests the contextual information of all the pixels on its criss-cross path. By taking a further recurrent operation, each pixel can finally capture the full-image dependencies. Besides, a category consistent loss is proposed to enforce the criss-cross attention module to produce more discriminative features. Overall, CCNet is with the following merits: 1) GPU memory friendly. Compared with the non-local block, the proposed recurrent criss-cross attention module requires 11x less GPU memory usage. 2) High computational efficiency. The recurrent criss-cross attention significantly reduces FLOPs by about 85% of the non-local block. 3) The state-of-the-art performance. We conduct extensive experiments on semantic segmentation benchmarks including Cityscapes, ADE20K, human parsing benchmark LIP, instance segmentation benchmark COCO, video segmentation benchmark CamVid. In particular, our CCNet achieves the mIoU scores of 81.9%, 45.76% and 55.47% on the Cityscapes test set, the ADE20K validation set and the LIP validation set respectively, which are the new state-of-the-art results. The source codes are available at [this https URL](https://github.com/speedinghzl/CCNet). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CCNet | R-50-D8 | 512x1024 | 40000 | 6 | 3.32 | V100 | 77.76 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517-4123f401.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517.log.json) | +| CCNet | R-101-D8 | 512x1024 | 40000 | 9.5 | 2.31 | V100 | 76.35 | 78.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540-a3b84ba6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540.log.json) | +| CCNet | R-50-D8 | 769x769 | 40000 | 6.8 | 1.43 | V100 | 78.46 | 79.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125-76d11884.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125.log.json) | +| CCNet | R-101-D8 | 769x769 | 40000 | 10.7 | 1.01 | V100 | 76.94 | 78.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428-4f57c8d0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428.log.json) | +| CCNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 79.03 | 80.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421-869a3423.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421.log.json) | +| CCNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 78.87 | 79.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935-ffae8917.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935.log.json) | +| CCNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.29 | 81.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421-73eed8ca.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421.log.json) | +| CCNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.45 | 80.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502-ad3cd481.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CCNet | R-50-D8 | 512x512 | 80000 | 8.8 | 20.89 | V100 | 41.78 | 42.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848-aa37f61e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848.log.json) | +| CCNet | R-101-D8 | 512x512 | 80000 | 12.2 | 14.11 | V100 | 43.97 | 45.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848-1f4929a3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848.log.json) | +| CCNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.08 | 43.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435-7c97193b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435.log.json) | +| CCNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 43.71 | 45.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644-e849e007.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CCNet | R-50-D8 | 512x512 | 20000 | 6 | 20.45 | V100 | 76.17 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212-fad81784.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212.log.json) | +| CCNet | R-101-D8 | 512x512 | 20000 | 9.5 | 13.64 | V100 | 77.27 | 79.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212-0007b61d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212.log.json) | +| CCNet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 75.96 | 77.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127-c2a15f02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127.log.json) | +| CCNet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 77.87 | 78.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet/ccnet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127-c30da577.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127.log.json) | + +## Citation + +```bibtex +@article{huang2018ccnet, + title={CCNet: Criss-Cross Attention for Semantic Segmentation}, + author={Huang, Zilong and Wang, Xinggang and Huang, Lichao and Huang, Chang and Wei, Yunchao and Liu, Wenyu}, + booktitle={ICCV}, + year={2019} +} +``` diff --git a/configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..0c49e1edc29451586321a1a7c1f5466d99b34f0e --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..f24f5a70edaa6844d3dc9fa2f7dd37f239909697 --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..b358e12c4e643c769064eb147a13fd4ea929aa37 --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..75750768b29a98dee6d99dbe79b996c3035fbc26 --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/ccnet/ccnet_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a29d118f41e293d47d4cacfad56667679079314e --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/ccnet/ccnet_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..fd421a2ed56e0b717a90fa47117ec09276b18797 --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/ccnet/ccnet_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..425dfcf33939bc579ec83b1bfe88c0fa1fcc7486 --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/ccnet/ccnet_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..f6dcb9cf50cd1404952e2627a36d98ddf2e01cb3 --- /dev/null +++ b/configs/ccnet/ccnet_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..84fc51a6b35d15ffd50e2aeca9b59d271237275c --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..a93079406587445759f92e4acb0d51f3615f9903 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..89bfe81825dfab56af222d69ef965c99bf1e1629 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..5f7c954aecbaf2be5e7b6803eb4beedde5046c07 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/ccnet/ccnet_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/ccnet/ccnet_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..cee810cd852abfd3d0bcdc07b19b6df9d9b38433 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/ccnet/ccnet_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/ccnet/ccnet_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..76a90d98a83d5518a91a95551425a90d43bd37b5 --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/ccnet/ccnet_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/ccnet/ccnet_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a8aeb85dc6157e77d1db04dd6b8f9c17f2e2ce1e --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/ccnet/ccnet_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/ccnet/ccnet_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..f7fced0a7b83949d5c7ce3a7eb0e17ddb092d7bd --- /dev/null +++ b/configs/ccnet/ccnet_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/ccnet/metafile.yaml b/configs/ccnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62e5694e4770018e5c30402c4072686c39386bcc --- /dev/null +++ b/configs/ccnet/metafile.yaml @@ -0,0 +1,391 @@ +Collections: +- Name: CCNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + README: configs/ccnet/README.md + Frameworks: + - PyTorch +Models: +- Name: ccnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.76 + mIoU(ms+flip): 78.87 + Config: configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - CCNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517-4123f401.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.35 + mIoU(ms+flip): 78.19 + Config: configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - CCNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540-a3b84ba6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.46 + mIoU(ms+flip): 79.93 + Config: configs/ccnet/ccnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - CCNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125-76d11884.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.94 + mIoU(ms+flip): 78.62 + Config: configs/ccnet/ccnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - CCNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428-4f57c8d0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.03 + mIoU(ms+flip): 80.16 + Config: configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - CCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421-869a3423.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.87 + mIoU(ms+flip): 79.9 + Config: configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - CCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935-ffae8917.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.29 + mIoU(ms+flip): 81.08 + Config: configs/ccnet/ccnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - CCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421-73eed8ca.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.45 + mIoU(ms+flip): 80.66 + Config: configs/ccnet/ccnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - CCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502-ad3cd481.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.78 + mIoU(ms+flip): 42.98 + Config: configs/ccnet/ccnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - CCNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848-aa37f61e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.97 + mIoU(ms+flip): 45.13 + Config: configs/ccnet/ccnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - CCNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848-1f4929a3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.08 + mIoU(ms+flip): 43.13 + Config: configs/ccnet/ccnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - CCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435-7c97193b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.71 + mIoU(ms+flip): 45.04 + Config: configs/ccnet/ccnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - CCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644-e849e007.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.17 + mIoU(ms+flip): 77.51 + Config: configs/ccnet/ccnet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - CCNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212-fad81784.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.27 + mIoU(ms+flip): 79.02 + Config: configs/ccnet/ccnet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - CCNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212-0007b61d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.96 + mIoU(ms+flip): 77.04 + Config: configs/ccnet/ccnet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - CCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127-c2a15f02.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch +- Name: ccnet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: CCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.87 + mIoU(ms+flip): 78.9 + Config: configs/ccnet/ccnet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - CCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127-c30da577.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127.log.json + Paper: + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.11721 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Framework: PyTorch diff --git a/configs/cgnet/README.md b/configs/cgnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96c9fcf515c5f58b437fa7cab26190aabeada5bf --- /dev/null +++ b/configs/cgnet/README.md @@ -0,0 +1,46 @@ +# CGNet + +> [CGNet: A Light-weight Context Guided Network for Semantic Segmentation](https://arxiv.org/abs/1811.08201) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The demand of applying semantic segmentation model on mobile devices has been increasing rapidly. Current state-of-the-art networks have enormous amount of parameters hence unsuitable for mobile devices, while other small memory footprint models follow the spirit of classification network and ignore the inherent characteristic of semantic segmentation. To tackle this problem, we propose a novel Context Guided Network (CGNet), which is a light-weight and efficient network for semantic segmentation. We first propose the Context Guided (CG) block, which learns the joint feature of both local feature and surrounding context, and further improves the joint feature with the global context. Based on the CG block, we develop CGNet which captures contextual information in all stages of the network and is specially tailored for increasing segmentation accuracy. CGNet is also elaborately designed to reduce the number of parameters and save memory footprint. Under an equivalent number of parameters, the proposed CGNet significantly outperforms existing segmentation networks. Extensive experiments on Cityscapes and CamVid datasets verify the effectiveness of the proposed approach. Specifically, without any post-processing and multi-scale testing, the proposed CGNet achieves 64.8% mean IoU on Cityscapes with less than 0.5 M parameters. The source code for the complete system can be found at [this https URL](https://github.com/wutianyiRosun/CGNet). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| CGNet | M3N21 | 680x680 | 60000 | 7.5 | 30.51 | V100 | 65.63 | 68.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/cgnet/cgnet_fcn_4xb4-60k_cityscapes-680x680.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes_20201101_110253-4c0b2f2d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes-20201101_110253.log.json) | +| CGNet | M3N21 | 512x1024 | 60000 | 8.3 | 31.14 | V100 | 68.27 | 70.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/cgnet/cgnet_fcn_4xb8-60k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes_20201101_110254-124ea03b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes-20201101_110254.log.json) | + +## Citation + +```bibtext +@article{wu2020cgnet, + title={Cgnet: A light-weight context guided network for semantic segmentation}, + author={Wu, Tianyi and Tang, Sheng and Zhang, Rui and Cao, Juan and Zhang, Yongdong}, + journal={IEEE Transactions on Image Processing}, + volume={30}, + pages={1169--1179}, + year={2020}, + publisher={IEEE} +} +``` diff --git a/configs/cgnet/cgnet_fcn_4xb4-60k_cityscapes-680x680.py b/configs/cgnet/cgnet_fcn_4xb4-60k_cityscapes-680x680.py new file mode 100644 index 0000000000000000000000000000000000000000..6a2c0ed12596f3043b7805cf8197f34576b6f72c --- /dev/null +++ b/configs/cgnet/cgnet_fcn_4xb4-60k_cityscapes-680x680.py @@ -0,0 +1,59 @@ +_base_ = [ + '../_base_/models/cgnet.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py' +] + +# optimizer +optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + by_epoch=False, + begin=0, + end=60000) +] +# runtime settings +total_iters = 60000 +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=total_iters, val_interval=4000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000), + sampler_seed=dict(type='DistSamplerSeedHook')) + +crop_size = (680, 680) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size), + dict(type='RandomFlip', prob=0.5), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict( + batch_size=8, num_workers=4, dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, num_workers=4, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/cgnet/cgnet_fcn_4xb8-60k_cityscapes-512x1024.py b/configs/cgnet/cgnet_fcn_4xb8-60k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..8be29de479ab958eece9dc37ba6efbad24b106a2 --- /dev/null +++ b/configs/cgnet/cgnet_fcn_4xb8-60k_cityscapes-512x1024.py @@ -0,0 +1,38 @@ +_base_ = [ + '../_base_/models/cgnet.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py' +] + +# optimizer +optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + by_epoch=False, + begin=0, + end=60000) +] +# runtime settings +total_iters = 60000 +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=total_iters, val_interval=4000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000), + sampler_seed=dict(type='DistSamplerSeedHook')) + +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) + +train_dataloader = dict(batch_size=8) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/cgnet/metafile.yaml b/configs/cgnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..063fc8b3c6bc9d5e6bebdeaac9d334409de80310 --- /dev/null +++ b/configs/cgnet/metafile.yaml @@ -0,0 +1,61 @@ +Collections: +- Name: CGNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: 'CGNet: A Light-weight Context Guided Network for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.08201 + README: configs/cgnet/README.md + Frameworks: + - PyTorch +Models: +- Name: cgnet_fcn_4xb4-60k_cityscapes-680x680 + In Collection: CGNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 65.63 + mIoU(ms+flip): 68.04 + Config: configs/cgnet/cgnet_fcn_4xb4-60k_cityscapes-680x680.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - M3N21 + - CGNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes_20201101_110253-4c0b2f2d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes-20201101_110253.log.json + Paper: + Title: 'CGNet: A Light-weight Context Guided Network for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.08201 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/cgnet.py#L187 + Framework: PyTorch +- Name: cgnet_fcn_4xb8-60k_cityscapes-512x1024 + In Collection: CGNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.27 + mIoU(ms+flip): 70.33 + Config: configs/cgnet/cgnet_fcn_4xb8-60k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 32 + Architecture: + - M3N21 + - CGNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes_20201101_110254-124ea03b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes-20201101_110254.log.json + Paper: + Title: 'CGNet: A Light-weight Context Guided Network for Semantic Segmentation' + URL: https://arxiv.org/abs/1811.08201 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/cgnet.py#L187 + Framework: PyTorch diff --git a/configs/convnext/README.md b/configs/convnext/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d78fe6ee1bb06cce56a6e3d13c97db09f4d11120 --- /dev/null +++ b/configs/convnext/README.md @@ -0,0 +1,74 @@ +# ConvNeXt + +> [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model. A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers (e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets. + + + +
+ +
+ +### Usage + +- ConvNeXt backbone needs to install [MMClassification](https://github.com/open-mmlab/mmclassification) first, which has abundant backbones for downstream tasks. + +```shell +pip install mmpretrain>=1.0.0rc7 +``` + +### Pre-trained Models + +The pre-trained models on ImageNet-1k or ImageNet-21k are used to fine-tune on the downstream tasks. + +| Model | Training Data | Params(M) | Flops(G) | Download | +| :-----------: | :-----------: | :-------: | :------: | :----------------------------------------------------------------------------------------------------------------------------------------------: | +| ConvNeXt-T\* | ImageNet-1k | 28.59 | 4.46 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth) | +| ConvNeXt-S\* | ImageNet-1k | 50.22 | 8.69 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth) | +| ConvNeXt-B\* | ImageNet-1k | 88.59 | 15.36 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth) | +| ConvNeXt-B\* | ImageNet-21k | 88.59 | 15.36 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_in21k_20220301-262fd037.pth) | +| ConvNeXt-L\* | ImageNet-21k | 197.77 | 34.37 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-large_3rdparty_in21k_20220301-e6e0ea0a.pth) | +| ConvNeXt-XL\* | ImageNet-21k | 350.20 | 60.93 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-xlarge_3rdparty_in21k_20220301-08aa5ddc.pth) | + +*Models with* are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt/tree/main/semantic_segmentation#results-and-fine-tuned-models).\* + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | ----------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | ConvNeXt-T | 512x512 | 160000 | 4.23 | 19.90 | V100 | 46.11 | 46.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/convnext/convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k/upernet_convnext_tiny_fp16_512x512_160k_ade20k_20220227_124553-cad485de.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k/upernet_convnext_tiny_fp16_512x512_160k_ade20k_20220227_124553.log.json) | +| UPerNet | ConvNeXt-S | 512x512 | 160000 | 5.16 | 15.18 | V100 | 48.56 | 49.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/convnext/convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k/upernet_convnext_small_fp16_512x512_160k_ade20k_20220227_131208-1b1e394f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k/upernet_convnext_small_fp16_512x512_160k_ade20k_20220227_131208.log.json) | +| UPerNet | ConvNeXt-B | 512x512 | 160000 | 6.33 | 14.41 | V100 | 48.71 | 49.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k/upernet_convnext_base_fp16_512x512_160k_ade20k_20220227_181227-02a24fc6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k/upernet_convnext_base_fp16_512x512_160k_ade20k_20220227_181227.log.json) | +| UPerNet | ConvNeXt-B | 640x640 | 160000 | 8.53 | 10.88 | V100 | 52.13 | 52.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k/upernet_convnext_base_fp16_640x640_160k_ade20k_20220227_182859-9280e39b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k/upernet_convnext_base_fp16_640x640_160k_ade20k_20220227_182859.log.json) | +| UPerNet | ConvNeXt-L | 640x640 | 160000 | 12.08 | 7.69 | V100 | 53.16 | 53.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/convnext/convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k/upernet_convnext_large_fp16_640x640_160k_ade20k_20220226_040532-e57aa54d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k/upernet_convnext_large_fp16_640x640_160k_ade20k_20220226_040532.log.json) | +| UPerNet | ConvNeXt-XL | 640x640 | 160000 | 26.16\* | 6.33 | V100 | 53.58 | 54.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/convnext/convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k/upernet_convnext_xlarge_fp16_640x640_160k_ade20k_20220226_080344-95fc38c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k/upernet_convnext_xlarge_fp16_640x640_160k_ade20k_20220226_080344.log.json) | + +Note: + +- `Mem (GB)` with * is collected when `cudnn_benchmark=True`, and hardware is V100. + +## Citation + +```bibtex +@article{liu2022convnet, + title={A ConvNet for the 2020s}, + author={Liu, Zhuang and Mao, Hanzi and Wu, Chao-Yuan and Feichtenhofer, Christoph and Darrell, Trevor and Xie, Saining}, + journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2022} +} +``` diff --git a/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py b/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..09c2aa6961c57b332069a00fc52fc22c22a186e3 --- /dev/null +++ b/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py @@ -0,0 +1,43 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), + auxiliary_head=dict(in_channels=512, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), +) + +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }, + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py b/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..06a86431442cadbc865deb5d2f8a06d6feb82938 --- /dev/null +++ b/configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py @@ -0,0 +1,58 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', + '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) +data_preprocessor = dict(size=crop_size) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_in21k_20220301-262fd037.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + type='mmpretrain.ConvNeXt', + arch='base', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[128, 256, 512, 1024], + num_classes=150, + ), + auxiliary_head=dict(in_channels=512, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), +) + +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }, + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/convnext/convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py b/configs/convnext/convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..2956e86f04253900f2037cfec05f5a756530f932 --- /dev/null +++ b/configs/convnext/convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py @@ -0,0 +1,58 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', + '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) +data_preprocessor = dict(size=crop_size) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-large_3rdparty_in21k_20220301-e6e0ea0a.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + type='mmpretrain.ConvNeXt', + arch='large', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[192, 384, 768, 1536], + num_classes=150, + ), + auxiliary_head=dict(in_channels=768, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), +) + +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }, + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/convnext/convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py b/configs/convnext/convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..dbe45f10e0b4af1a23732a1df99f34d7597a1db8 --- /dev/null +++ b/configs/convnext/convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py @@ -0,0 +1,57 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + type='mmpretrain.ConvNeXt', + arch='small', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.3, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[96, 192, 384, 768], + num_classes=150, + ), + auxiliary_head=dict(in_channels=384, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), +) + +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }, + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/convnext/convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py b/configs/convnext/convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..d2e545a76d07635a3da76ade6c92590fa0deb0ff --- /dev/null +++ b/configs/convnext/convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py @@ -0,0 +1,57 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + type='mmpretrain.ConvNeXt', + arch='tiny', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[96, 192, 384, 768], + num_classes=150, + ), + auxiliary_head=dict(in_channels=384, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), +) + +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 6 + }, + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/convnext/convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py b/configs/convnext/convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..dfad73452150ec94d9e5614ec7ad027817a5ea76 --- /dev/null +++ b/configs/convnext/convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py @@ -0,0 +1,58 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', + '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) +data_preprocessor = dict(size=crop_size) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-xlarge_3rdparty_in21k_20220301-08aa5ddc.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + type='mmpretrain.ConvNeXt', + arch='xlarge', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[256, 512, 1024, 2048], + num_classes=150, + ), + auxiliary_head=dict(in_channels=1024, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), +) + +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00008, betas=(0.9, 0.999), weight_decay=0.05), + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }, + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/convnext/metafile.yaml b/configs/convnext/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8340a373c2f0b3e21f8408cc7b2dcc05855d55fc --- /dev/null +++ b/configs/convnext/metafile.yaml @@ -0,0 +1,145 @@ +Models: +- Name: convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.11 + mIoU(ms+flip): 46.62 + Config: configs/convnext/convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ConvNeXt-T + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 4.23 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k/upernet_convnext_tiny_fp16_512x512_160k_ade20k_20220227_124553-cad485de.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k/upernet_convnext_tiny_fp16_512x512_160k_ade20k_20220227_124553.log.json + Paper: + Title: A ConvNet for the 2020s + URL: https://arxiv.org/abs/2201.03545 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.20.1/mmcls/models/backbones/convnext.py#L133 + Framework: PyTorch +- Name: convnext-small_upernet_8xb2-amp-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.56 + mIoU(ms+flip): 49.02 + Config: configs/convnext/convnext-small_upernet_8xb2-amp-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ConvNeXt-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 5.16 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k/upernet_convnext_small_fp16_512x512_160k_ade20k_20220227_131208-1b1e394f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k/upernet_convnext_small_fp16_512x512_160k_ade20k_20220227_131208.log.json + Paper: + Title: A ConvNet for the 2020s + URL: https://arxiv.org/abs/2201.03545 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.20.1/mmcls/models/backbones/convnext.py#L133 + Framework: PyTorch +- Name: convnext-base_upernet_8xb2-amp-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.71 + mIoU(ms+flip): 49.54 + Config: configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ConvNeXt-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 6.33 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k/upernet_convnext_base_fp16_512x512_160k_ade20k_20220227_181227-02a24fc6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k/upernet_convnext_base_fp16_512x512_160k_ade20k_20220227_181227.log.json + Paper: + Title: A ConvNet for the 2020s + URL: https://arxiv.org/abs/2201.03545 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.20.1/mmcls/models/backbones/convnext.py#L133 + Framework: PyTorch +- Name: convnext-base_upernet_8xb2-amp-160k_ade20k-640x640 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.13 + mIoU(ms+flip): 52.66 + Config: configs/convnext/convnext-base_upernet_8xb2-amp-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ConvNeXt-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 8.53 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k/upernet_convnext_base_fp16_640x640_160k_ade20k_20220227_182859-9280e39b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k/upernet_convnext_base_fp16_640x640_160k_ade20k_20220227_182859.log.json + Paper: + Title: A ConvNet for the 2020s + URL: https://arxiv.org/abs/2201.03545 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.20.1/mmcls/models/backbones/convnext.py#L133 + Framework: PyTorch +- Name: convnext-large_upernet_8xb2-amp-160k_ade20k-640x640 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 53.16 + mIoU(ms+flip): 53.38 + Config: configs/convnext/convnext-large_upernet_8xb2-amp-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ConvNeXt-L + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 12.08 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k/upernet_convnext_large_fp16_640x640_160k_ade20k_20220226_040532-e57aa54d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k/upernet_convnext_large_fp16_640x640_160k_ade20k_20220226_040532.log.json + Paper: + Title: A ConvNet for the 2020s + URL: https://arxiv.org/abs/2201.03545 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.20.1/mmcls/models/backbones/convnext.py#L133 + Framework: PyTorch +- Name: convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 53.58 + mIoU(ms+flip): 54.11 + Config: configs/convnext/convnext-xlarge_upernet_8xb2-amp-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ConvNeXt-XL + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 26.16 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k/upernet_convnext_xlarge_fp16_640x640_160k_ade20k_20220226_080344-95fc38c2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k/upernet_convnext_xlarge_fp16_640x640_160k_ade20k_20220226_080344.log.json + Paper: + Title: A ConvNet for the 2020s + URL: https://arxiv.org/abs/2201.03545 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.20.1/mmcls/models/backbones/convnext.py#L133 + Framework: PyTorch diff --git a/configs/danet/README.md b/configs/danet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..90194f3073e0e16933bdac610f490c751110596d --- /dev/null +++ b/configs/danet/README.md @@ -0,0 +1,67 @@ +# DANet + +> [Dual Attention Network for Scene Segmentation](https://arxiv.org/abs/1809.02983) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this paper, we address the scene segmentation task by capturing rich contextual dependencies based on the selfattention mechanism. Unlike previous works that capture contexts by multi-scale features fusion, we propose a Dual Attention Networks (DANet) to adaptively integrate local features with their global dependencies. Specifically, we append two types of attention modules on top of traditional dilated FCN, which model the semantic interdependencies in spatial and channel dimensions respectively. The position attention module selectively aggregates the features at each position by a weighted sum of the features at all positions. Similar features would be related to each other regardless of their distances. Meanwhile, the channel attention module selectively emphasizes interdependent channel maps by integrating associated features among all channel maps. We sum the outputs of the two attention modules to further improve feature representation which contributes to more precise segmentation results. We achieve new state-of-the-art segmentation performance on three challenging scene segmentation datasets, i.e., Cityscapes, PASCAL Context and COCO Stuff dataset. In particular, a Mean IoU score of 81.5% on Cityscapes test set is achieved without using coarse data. We make the code and trained model publicly available at [this https URL](https://github.com/junfu1115/DANet). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DANet | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.66 | V100 | 78.74 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324-c0dbfa5f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324.log.json) | +| DANet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.99 | V100 | 80.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831-c57a7157.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831.log.json) | +| DANet | R-50-D8 | 769x769 | 40000 | 8.8 | 1.56 | V100 | 78.88 | 80.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703-76681c60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703.log.json) | +| DANet | R-101-D8 | 769x769 | 40000 | 12.8 | 1.07 | V100 | 79.88 | 81.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717-dcb7fd4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717.log.json) | +| DANet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 79.34 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029-2bfa2293.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029.log.json) | +| DANet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 80.41 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918-955e6350.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918.log.json) | +| DANet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.27 | 80.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954-495689b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954.log.json) | +| DANet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 80.47 | 82.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918-f3a929e7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DANet | R-50-D8 | 512x512 | 80000 | 11.5 | 21.20 | V100 | 41.66 | 42.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125-edb18e08.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125.log.json) | +| DANet | R-101-D8 | 512x512 | 80000 | 15 | 14.18 | V100 | 43.64 | 45.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126-d0357c73.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126.log.json) | +| DANet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.45 | 43.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340-9cb35dcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340.log.json) | +| DANet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 44.17 | 45.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348-23bf12f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DANet | R-50-D8 | 512x512 | 20000 | 6.5 | 20.94 | V100 | 74.45 | 75.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026-9e9e3ab3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026.log.json) | +| DANet | R-101-D8 | 512x512 | 20000 | 9.9 | 13.76 | V100 | 76.02 | 77.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026-d48d23b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026.log.json) | +| DANet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.37 | 77.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526-426e3a64.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526.log.json) | +| DANet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 76.51 | 77.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet/danet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031-788e232a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031.log.json) | + +## Citation + +```bibtex +@article{fu2018dual, + title={Dual Attention Network for Scene Segmentation}, + author={Jun Fu, Jing Liu, Haijie Tian, Yong Li, Yongjun Bao, Zhiwei Fang,and Hanqing Lu}, + booktitle={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2019} +} +``` diff --git a/configs/danet/danet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/danet/danet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4602f3318f96f2ca6f7025910aecdd3b0c2270e4 --- /dev/null +++ b/configs/danet/danet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/danet/danet_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..a08c18ee468e848f93ffdf2ac2f21bcd61150500 --- /dev/null +++ b/configs/danet/danet_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/danet/danet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..98b1c6490b51dc5c7ff062717b7a6c51abac4789 --- /dev/null +++ b/configs/danet/danet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/danet/danet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..9affe306cbf5f83cfb4b86af24f412170dc7f01e --- /dev/null +++ b/configs/danet/danet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/danet/danet_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0079ad65e87a81bf7edc460626df4e6072f4c7a1 --- /dev/null +++ b/configs/danet/danet_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/danet/danet_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..48444514b792a3a475a53cab3e8f0ee2f6f3ab3a --- /dev/null +++ b/configs/danet/danet_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/danet/danet_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2f2df7a595563f3e489a80247e3ef81f1964715d --- /dev/null +++ b/configs/danet/danet_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/danet/danet_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..dd75bc16b8517c6e990683440a302d540cb78c00 --- /dev/null +++ b/configs/danet/danet_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/danet/danet_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/danet/danet_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..3bc2a7792d503e028480c461e0fb121d7b33efa2 --- /dev/null +++ b/configs/danet/danet_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/danet/danet_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/danet/danet_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..3a01fb9eb54ed5e66af90837f362233dd259d784 --- /dev/null +++ b/configs/danet/danet_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/danet/danet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/danet/danet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..95d5df09cbf9051437eceb366a585f860951d492 --- /dev/null +++ b/configs/danet/danet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/danet/danet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/danet/danet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..42557164da0e41dde9cce8b1e31274610c0c1463 --- /dev/null +++ b/configs/danet/danet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/danet/danet_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/danet/danet_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a8f082d1ea32a245c7d48e20be9eb3309d357f2e --- /dev/null +++ b/configs/danet/danet_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/danet/danet_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/danet/danet_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..fab574fa5b36f7f41543b0620eebd2f99e46cdfd --- /dev/null +++ b/configs/danet/danet_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/danet/danet_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/danet/danet_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..148fa39d7287ede9b37a4d1f666c93a51216c93b --- /dev/null +++ b/configs/danet/danet_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/danet/danet_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/danet/danet_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..efbd908a9210e6c027e47a6bfaf2e7549bbe4b98 --- /dev/null +++ b/configs/danet/danet_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/danet/metafile.yaml b/configs/danet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..daff925baf8efc411a12e29afbe8821c66d038db --- /dev/null +++ b/configs/danet/metafile.yaml @@ -0,0 +1,387 @@ +Collections: +- Name: DANet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + README: configs/danet/README.md + Frameworks: + - PyTorch +Models: +- Name: danet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.74 + Config: configs/danet/danet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DANet + Training Resources: 4x V100 GPUS + Memory (GB): 7.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324-c0dbfa5f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.52 + Config: configs/danet/danet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831-c57a7157.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.88 + mIoU(ms+flip): 80.62 + Config: configs/danet/danet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DANet + Training Resources: 4x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703-76681c60.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.88 + mIoU(ms+flip): 81.47 + Config: configs/danet/danet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DANet + Training Resources: 4x V100 GPUS + Memory (GB): 12.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717-dcb7fd4e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.34 + Config: configs/danet/danet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029-2bfa2293.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.41 + Config: configs/danet/danet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918-955e6350.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.27 + mIoU(ms+flip): 80.96 + Config: configs/danet/danet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954-495689b4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.47 + mIoU(ms+flip): 82.02 + Config: configs/danet/danet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918-f3a929e7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.66 + mIoU(ms+flip): 42.9 + Config: configs/danet/danet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DANet + Training Resources: 4x V100 GPUS + Memory (GB): 11.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125-edb18e08.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.64 + mIoU(ms+flip): 45.19 + Config: configs/danet/danet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DANet + Training Resources: 4x V100 GPUS + Memory (GB): 15.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126-d0357c73.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.45 + mIoU(ms+flip): 43.25 + Config: configs/danet/danet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340-9cb35dcd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.17 + mIoU(ms+flip): 45.02 + Config: configs/danet/danet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348-23bf12f9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.45 + mIoU(ms+flip): 75.69 + Config: configs/danet/danet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DANet + Training Resources: 4x V100 GPUS + Memory (GB): 6.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026-9e9e3ab3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.02 + mIoU(ms+flip): 77.23 + Config: configs/danet/danet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026-d48d23b2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.37 + mIoU(ms+flip): 77.29 + Config: configs/danet/danet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526-426e3a64.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch +- Name: danet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: DANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.51 + mIoU(ms+flip): 77.32 + Config: configs/danet/danet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031-788e232a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031.log.json + Paper: + Title: Dual Attention Network for Scene Segmentation + URL: https://arxiv.org/abs/1809.02983 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Framework: PyTorch diff --git a/configs/ddrnet/README.md b/configs/ddrnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ccbfcdff359525e825803bf0413d0cef06ed8918 --- /dev/null +++ b/configs/ddrnet/README.md @@ -0,0 +1,46 @@ +# DDRNet + +> [Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes](http://arxiv.org/abs/2101.06085) + +## Introduction + + + +Official Repo + +## Abstract + + + +Semantic segmentation is a key technology for autonomous vehicles to understand the surrounding scenes. The appealing performances of contemporary models usually come at the expense of heavy computations and lengthy inference time, which is intolerable for self-driving. Using light-weight architectures (encoder-decoder or two-pathway) or reasoning on low-resolution images, recent methods realize very fast scene parsing, even running at more than 100 FPS on a single 1080Ti GPU. However, there is still a significant gap in performance between these real-time methods and the models based on dilation backbones. To tackle this problem, we proposed a family of efficient backbones specially designed for real-time semantic segmentation. The proposed deep dual-resolution networks (DDRNets) are composed of two deep branches between which multiple bilateral fusions are performed. Additionally, we design a new contextual information extractor named Deep Aggregation Pyramid Pooling Module (DAPPM) to enlarge effective receptive fields and fuse multi-scale context based on low-resolution feature maps. Our method achieves a new state-of-the-art trade-off between accuracy and speed on both Cityscapes and CamVid dataset. In particular, on a single 2080Ti GPU, DDRNet-23-slim yields 77.4% mIoU at 102 FPS on Cityscapes test set and 74.7% mIoU at 230 FPS on CamVid test set. With widely used test augmentation, our method is superior to most state-of-the-art models and requires much less computation. Codes and trained models are available online. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DDRNet | DDRNet23-slim | 1024x1024 | 120000 | 1.70 | 85.85 | A100 | 77.84 | 80.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ddrnet/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024_20230426_145312-6a5e5174.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024_20230426_145312.json) | +| DDRNet | DDRNet23 | 1024x1024 | 120000 | 7.26 | 33.41 | A100 | 79.99 | 81.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ddrnet/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024_20230425_162633-81601db0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024_20230425_162633.json) | + +## Notes + +The pretrained weights in config files are converted from [the official repo](https://github.com/ydhongHIT/DDRNet#pretrained-models). + +## Citation + +```bibtex +@article{pan2022deep, + title={Deep Dual-Resolution Networks for Real-Time and Accurate Semantic Segmentation of Traffic Scenes}, + author={Pan, Huihui and Hong, Yuanduo and Sun, Weichao and Jia, Yisong}, + journal={IEEE Transactions on Intelligent Transportation Systems}, + year={2022}, + publisher={IEEE} +} +``` diff --git a/configs/ddrnet/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024.py b/configs/ddrnet/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..65b0ead547a4a3ee0cdceb848f40ad3598b0e721 --- /dev/null +++ b/configs/ddrnet/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024.py @@ -0,0 +1,93 @@ +_base_ = [ + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', +] + +# The class_weight is borrowed from https://github.com/openseg-group/OCNet.pytorch/issues/14 # noqa +# Licensed under the MIT License +class_weight = [ + 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, + 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529, + 1.0507 +] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/pretrain/ddrnet23s-in1kpre_3rdparty-1ccac5b1.pth' # noqa +crop_size = (1024, 1024) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='DDRNet', + in_channels=3, + channels=32, + ppm_channels=128, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=dict(type='Pretrained', checkpoint=checkpoint)), + decode_head=dict( + type='DDRHead', + in_channels=32 * 4, + channels=64, + dropout_ratio=0., + num_classes=19, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +train_dataloader = dict(batch_size=6, num_workers=4) + +iters = 120000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=iters, val_interval=iters // 10) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', by_epoch=False, interval=iters // 10), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024.py b/configs/ddrnet/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..444efe2b883611792d2f33a2e5ddaea112524499 --- /dev/null +++ b/configs/ddrnet/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024.py @@ -0,0 +1,93 @@ +_base_ = [ + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', +] + +# The class_weight is borrowed from https://github.com/openseg-group/OCNet.pytorch/issues/14 # noqa +# Licensed under the MIT License +class_weight = [ + 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, + 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529, + 1.0507 +] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/pretrain/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa +crop_size = (1024, 1024) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='DDRNet', + in_channels=3, + channels=64, + ppm_channels=128, + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=dict(type='Pretrained', checkpoint=checkpoint)), + decode_head=dict( + type='DDRHead', + in_channels=64 * 4, + channels=128, + dropout_ratio=0., + num_classes=19, + align_corners=False, + norm_cfg=norm_cfg, + loss_decode=[ + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=1.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=0.4), + ]), + + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +train_dataloader = dict(batch_size=6, num_workers=4) + +iters = 120000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 120k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=iters, val_interval=iters // 10) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', by_epoch=False, interval=iters // 10), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/ddrnet/metafile.yaml b/configs/ddrnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07074702c222de649fc7ddf90cfac6071faa7450 --- /dev/null +++ b/configs/ddrnet/metafile.yaml @@ -0,0 +1,64 @@ +Collections: +- Name: DDRNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation + of Road Scenes + URL: http://arxiv.org/abs/2101.06085 + README: configs/ddrnet/README.md + Frameworks: + - PyTorch +Models: +- Name: ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024 + In Collection: DDRNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.84 + mIoU(ms+flip): 80.15 + Config: configs/ddrnet/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 12 + Architecture: + - DDRNet23-slim + - DDRNet + Training Resources: 2x A100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024_20230426_145312-6a5e5174.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024/ddrnet_23-slim_in1k-pre_2xb6-120k_cityscapes-1024x1024_20230426_145312.json + Paper: + Title: Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation + of Road Scenes + URL: http://arxiv.org/abs/2101.06085 + Code: '' + Framework: PyTorch +- Name: ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024 + In Collection: DDRNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.99 + mIoU(ms+flip): 81.71 + Config: configs/ddrnet/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 12 + Architecture: + - DDRNet23 + - DDRNet + Training Resources: 2x A100 GPUS + Memory (GB): 7.26 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024_20230425_162633-81601db0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024/ddrnet_23_in1k-pre_2xb6-120k_cityscapes-1024x1024_20230425_162633.json + Paper: + Title: Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation + of Road Scenes + URL: http://arxiv.org/abs/2101.06085 + Code: '' + Framework: PyTorch diff --git a/configs/deeplabv3/README.md b/configs/deeplabv3/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df50b7f90a0067b15d08043e4c59cb604e0e57da --- /dev/null +++ b/configs/deeplabv3/README.md @@ -0,0 +1,118 @@ +# DeepLabV3 + +> [Rethinking atrous convolution for semantic image segmentation](https://arxiv.org/abs/1706.05587) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this work, we revisit atrous convolution, a powerful tool to explicitly adjust filter's field-of-view as well as control the resolution of feature responses computed by Deep Convolutional Neural Networks, in the application of semantic image segmentation. To handle the problem of segmenting objects at multiple scales, we design modules which employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates. Furthermore, we propose to augment our previously proposed Atrous Spatial Pyramid Pooling module, which probes convolutional features at multiple scales, with image-level features encoding global context and further boost performance. We also elaborate on implementation details and share our experience on training our system. The proposed \`DeepLabv3' system significantly improves over our previous DeepLab versions without DenseCRF post-processing and attains comparable performance with other state-of-art models on the PASCAL VOC 2012 semantic image segmentation benchmark. + + + +
+DEEPLABv3_ResNet-D8 +DEEPLABv3_ResNet-D8 model structure +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | --------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3 | R-50-D8 | 512x1024 | 40000 | 6.1 | 2.57 | V100 | 79.09 | 80.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 40000 | 9.6 | 1.92 | V100 | 77.12 | 79.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json) | +| DeepLabV3 | R-50-D8 | 769x769 | 40000 | 6.9 | 1.11 | V100 | 78.58 | 79.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json) | +| DeepLabV3 | R-101-D8 | 769x769 | 40000 | 10.9 | 0.83 | V100 | 79.27 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json) | +| DeepLabV3 | R-18-D8 | 512x1024 | 80000 | 1.7 | 13.78 | V100 | 76.70 | 78.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes-20201225_021506.log.json) | +| DeepLabV3 | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 79.32 | 80.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 80.20 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json) | +| DeepLabV3 (FP16) | R-101-D8 | 512x1024 | 80000 | 5.75 | 3.86 | V100 | 80.48 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-774d9cec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json) | +| DeepLabV3 | R-18-D8 | 769x769 | 80000 | 1.9 | 5.55 | V100 | 76.60 | 78.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes-20201225_021506.log.json) | +| DeepLabV3 | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.89 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json) | +| DeepLabV3 | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.67 | 80.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json) | +| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 40000 | 4.7 | 6.96 | V100 | 76.71 | 78.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-67b0c992.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | +| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 80000 | - | - | V100 | 78.36 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | +| DeepLabV3 | R-18b-D8 | 512x1024 | 80000 | 1.6 | 13.93 | V100 | 76.26 | 77.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json) | +| DeepLabV3 | R-50b-D8 | 512x1024 | 80000 | 6.0 | 2.74 | V100 | 79.63 | 80.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json) | +| DeepLabV3 | R-101b-D8 | 512x1024 | 80000 | 9.5 | 1.81 | V100 | 80.01 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json) | +| DeepLabV3 | R-18b-D8 | 769x769 | 80000 | 1.8 | 5.79 | V100 | 75.63 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json) | +| DeepLabV3 | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.16 | V100 | 78.80 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json) | +| DeepLabV3 | R-101b-D8 | 769x769 | 80000 | 10.7 | 0.82 | V100 | 79.41 | 80.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 8.9 | 14.76 | V100 | 42.42 | 43.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 12.4 | 10.14 | V100 | 44.08 | 45.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.66 | 44.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | V100 | 45.00 | 46.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 6.1 | 13.88 | V100 | 76.17 | 77.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 9.6 | 9.81 | V100 | 78.70 | 79.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | V100 | 77.68 | 78.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | V100 | 77.92 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-101-D8 | 480x480 | 40000 | 9.2 | 7.09 | V100 | 46.55 | 47.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) | +| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | V100 | 46.42 | 47.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-101-D8 | 480x480 | 40000 | - | - | V100 | 52.61 | 54.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59_20210416_110332-cb08ea46.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59-20210416_110332.log.json) | +| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | V100 | 52.46 | 54.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59_20210416_113002-26303993.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59-20210416_113002.log.json) | + +### COCO-Stuff 10k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 9.6 | 10.8 | V100 | 34.66 | 36.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-b35f789d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 13.2 | 8.7 | V100 | 37.30 | 38.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-c49752cb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | V100 | 35.73 | 37.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-dc76f3ff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | V100 | 37.81 | 38.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-636cb433.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json) | + +### COCO-Stuff 164k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 9.6 | 10.8 | V100 | 39.38 | 40.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016-88675c24.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 13.2 | 8.7 | V100 | 40.87 | 41.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252-13600dc2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | V100 | 41.09 | 41.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016-49f2812b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | V100 | 41.82 | 42.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402-f035acfd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 320000 | - | - | V100 | 41.37 | 42.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403-51b21115.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 320000 | - | - | V100 | 42.61 | 43.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402-3cbca14d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402.log.json) | + +Note: + +- `D-8` here corresponding to the output stride 8 setting for DeepLab series. +- `FP16` means Mixed Precision (FP16) is adopted in training. + +## Citation + +```bibtext +@article{chen2017rethinking, + title={Rethinking atrous convolution for semantic image segmentation}, + author={Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig}, + journal={arXiv preprint arXiv:1706.05587}, + year={2017} +} +``` diff --git a/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..b9f3c178df033da62d2ce51b6a27964240ca516a --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..da3a88f998902e04329cc739b87de0c06f28fde9 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..d01803ce1f033693794c264dc735375e336aa231 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..7964b514463e41babb5a2c7bbeac9e84e5c7f068 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..1d1a6201a0bc9e7e7f1eb538e20680cbebcc4237 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..78205468d799f3f0ddfe126d992d3538e7e0630a --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..84174166ce4420776ada7b5e48f7322f1b456fd9 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = './deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py' +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=optimizer, + loss_scale=512.) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0ed6eee83348a60b70caf7aff2f095b844118875 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..add008345f70844254482c66ef257c37fe59b855 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..349cc88f0a7f137fa97c5504c0ae2943a3198b3f --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1c527e0c5367b61b637b2fdd26d6997160e076d7 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ea27bedc04aeeefaf1fb37d04a05cb038a9db6fd --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a43a786e0e550e338793f4fd74d476abd8931d36 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..8879d5394f09d2c13ce403c70faa4a5c1bcaa6b6 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-40k_pascal-context-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..54671d4dc622d5b5f8ed6722777b9156d357ba9e --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-40k_pascal-context-59-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1b2635d1c28cc488b92eabb111449977c5b33101 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b7bb0b64488aee8580a0e5808af5d64a22483062 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2d4f6f747b24efd0bebb9240711a7788adfc54e4 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..9d64ca29fecd47b381042f41067af833d4a2043f --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-80k_pascal-context-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480.py b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..54671d4dc622d5b5f8ed6722777b9156d357ba9e --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb4-40k_pascal-context-59-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..708932da859cd163b4637b63960278989e84c764 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..a0f634d0819e1d86cb8d99ae0aa1412a1154a0af --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..bc353bb564a488b3a5517ca0003661a1bbe6874c --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..021c98c3762fac98ee7d6513c1eea5ce7e5124e1 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..c747cd74a2c0ef85cfd60c276a518872d75de501 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..6506abf69628b6e48a9b9bd6830d1744af97f966 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4a2a971eb94f2cc07081d04d9c8e84d0311b6205 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..a52f29e4ce6a934aa09557ab43a1da5752eac1ad --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..1bd29b96e1ef10e2e714c4d046a7ccebbbd7637b --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..27f0fc4cae202eb6e1a45f2ee6f128d717ee5fa4 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..04e15f0f0fc36b9616765371edd9e61aaf3bfa1b --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ba76a594193147a03ed1035d0280595ce7d88fdf --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..d0559c8bfc6fd0f106faa273b4ad45725dd0f1b3 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff10k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c5458d908bdd27504793e74bb421e6b2cfbfc308 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c3b4f94f8c62682007db3160b9b3cb0f25c66156 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_320k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..40dbffad4571a6b944dd45d35db11f4b433d36a5 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff10k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_pascal-context-480x480.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..3c4e753a2d04dcc329e54520f11fa1574a772d47 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_pascal-context-59-480x480.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..e3b6c36909b58e24d5079d4bc95b545575d1c008 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..8333cc670159e93a2fb9e05b34e6bb37d383ac26 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0bcdab51498f6c95ee778197d2bca750dce05025 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..519df5a23b4f437580be00d9ce658e53f96ec780 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_pascal-context-480x480.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..ba8c7ded96ac05df978eebeabddaeca996baa974 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_pascal-context-59-480x480.py b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..d34bd89339b13bc4d6dc68ba06495716fa077de5 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..818519f263186a0b504d65af918bd459e5f34401 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..07a234be753b8b7a16567465bb52b95cdff78a54 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/deeplabv3/metafile.yaml b/configs/deeplabv3/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..650f7d695db64ae5429ee0516b078572119a81a4 --- /dev/null +++ b/configs/deeplabv3/metafile.yaml @@ -0,0 +1,985 @@ +Collections: +- Name: DeepLabV3 + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + - COCO-Stuff 10k + - COCO-Stuff 164k + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + README: configs/deeplabv3/README.md + Frameworks: + - PyTorch +Models: +- Name: deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.09 + mIoU(ms+flip): 80.45 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.12 + mIoU(ms+flip): 79.61 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.58 + mIoU(ms+flip): 79.89 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.27 + mIoU(ms+flip): 80.11 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.7 + mIoU(ms+flip): 78.27 + Config: configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes-20201225_021506.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.32 + mIoU(ms+flip): 80.57 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.2 + mIoU(ms+flip): 81.21 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.48 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + - (FP16) + Training Resources: 4x V100 GPUS + Memory (GB): 5.75 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-774d9cec.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.6 + mIoU(ms+flip): 78.26 + Config: configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes-20201225_021506.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.89 + mIoU(ms+flip): 81.06 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.67 + mIoU(ms+flip): 80.81 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.71 + mIoU(ms+flip): 78.63 + Config: configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16-MG124 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 4.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-67b0c992.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.36 + mIoU(ms+flip): 79.84 + Config: configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16-MG124 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.26 + mIoU(ms+flip): 77.88 + Config: configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.63 + mIoU(ms+flip): 80.98 + Config: configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.01 + mIoU(ms+flip): 81.21 + Config: configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.63 + mIoU(ms+flip): 77.51 + Config: configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.8 + mIoU(ms+flip): 80.27 + Config: configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.41 + mIoU(ms+flip): 80.73 + Config: configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 10.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.42 + mIoU(ms+flip): 43.28 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.08 + mIoU(ms+flip): 45.19 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 12.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.66 + mIoU(ms+flip): 44.09 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.0 + mIoU(ms+flip): 46.66 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.17 + mIoU(ms+flip): 77.42 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.7 + mIoU(ms+flip): 79.95 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.68 + mIoU(ms+flip): 78.78 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.92 + mIoU(ms+flip): 79.18 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.55 + mIoU(ms+flip): 47.81 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.42 + mIoU(ms+flip): 47.53 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.61 + mIoU(ms+flip): 54.28 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59_20210416_110332-cb08ea46.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59-20210416_110332.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.46 + mIoU(ms+flip): 54.09 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59_20210416_113002-26303993.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59-20210416_113002.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 34.66 + mIoU(ms+flip): 36.08 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-b35f789d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.3 + mIoU(ms+flip): 38.42 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-c49752cb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 35.73 + mIoU(ms+flip): 37.09 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-dc76f3ff.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.81 + mIoU(ms+flip): 38.8 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-636cb433.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 39.38 + mIoU(ms+flip): 40.03 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016-88675c24.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 40.87 + mIoU(ms+flip): 41.5 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252-13600dc2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.09 + mIoU(ms+flip): 41.69 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016-49f2812b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.82 + mIoU(ms+flip): 42.49 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402-f035acfd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.37 + mIoU(ms+flip): 42.22 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403-51b21115.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 42.61 + mIoU(ms+flip): 43.42 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402-3cbca14d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch diff --git a/configs/deeplabv3plus/README.md b/configs/deeplabv3plus/README.md new file mode 100644 index 0000000000000000000000000000000000000000..04d01fa5124c5f7dd8d7fdb94245247ce5f398e0 --- /dev/null +++ b/configs/deeplabv3plus/README.md @@ -0,0 +1,138 @@ +# DeepLabV3+ + +> [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Spatial pyramid pooling module or encode-decoder structure are used in deep neural networks for semantic segmentation task. The former networks are able to encode multi-scale contextual information by probing the incoming features with filters or pooling operations at multiple rates and multiple effective fields-of-view, while the latter networks can capture sharper object boundaries by gradually recovering the spatial information. In this work, we propose to combine the advantages from both methods. Specifically, our proposed model, DeepLabv3+, extends DeepLabv3 by adding a simple yet effective decoder module to refine the segmentation results especially along object boundaries. We further explore the Xception model and apply the depthwise separable convolution to both Atrous Spatial Pyramid Pooling and decoder modules, resulting in a faster and stronger encoder-decoder network. We demonstrate the effectiveness of the proposed model on PASCAL VOC 2012 and Cityscapes datasets, achieving the test set performance of 89.0% and 82.1% without any post-processing. Our paper is accompanied with a publicly available reference implementation of the proposed models in Tensorflow at [this https URL](https://github.com/tensorflow/models/tree/master/research/deeplab). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------------- | --------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3+ | R-50-D8 | 512x1024 | 40000 | 7.5 | 3.94 | V100 | 79.61 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 40000 | 11 | 2.60 | V100 | 80.21 | 81.82 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json) | +| DeepLabV3+ | R-50-D8 | 769x769 | 40000 | 8.5 | 1.72 | V100 | 78.97 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json) | +| DeepLabV3+ | R-101-D8 | 769x769 | 40000 | 12.5 | 1.15 | V100 | 79.46 | 80.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json) | +| DeepLabV3+ | R-18-D8 | 512x1024 | 80000 | 2.2 | 14.27 | V100 | 76.89 | 78.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes-20201226_080942.log.json) | +| DeepLabV3+ | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 80.09 | 81.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 80.97 | 82.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json) | +| DeepLabV3+ (FP16) | R-101-D8 | 512x1024 | 80000 | 6.35 | 7.87 | V100 | 80.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json) | +| DeepLabV3+ | R-18-D8 | 769x769 | 80000 | 2.5 | 5.74 | V100 | 76.26 | 77.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes-20201226_083346.log.json) | +| DeepLabV3+ | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.83 | 81.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json) | +| DeepLabV3+ | R-101-D8 | 769x769 | 80000 | - | - | V100 | 80.65 | 81.47 | [config\[1\]](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720-dfcc0b68.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720.log.json) | +| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 40000 | 5.8 | 7.48 | V100 | 79.09 | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/ddeeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | +| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 80000 | 9.9 | - | V100 | 79.90 | 81.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | +| DeepLabV3+ | R-18b-D8 | 512x1024 | 80000 | 2.1 | 14.95 | V100 | 75.87 | 77.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes-20201226_090828.log.json) | +| DeepLabV3+ | R-50b-D8 | 512x1024 | 80000 | 7.4 | 3.94 | V100 | 80.28 | 81.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes-20201225_213645.log.json) | +| DeepLabV3+ | R-101b-D8 | 512x1024 | 80000 | 10.9 | 2.60 | V100 | 80.16 | 81.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes-20201226_190843.log.json) | +| DeepLabV3+ | R-18b-D8 | 769x769 | 80000 | 2.4 | 5.96 | V100 | 76.36 | 78.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes-20201226_151312.log.json) | +| DeepLabV3+ | R-50b-D8 | 769x769 | 80000 | 8.4 | 1.72 | V100 | 79.41 | 80.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes-20201225_224655.log.json) | +| DeepLabV3+ | R-101b-D8 | 769x769 | 80000 | 12.3 | 1.10 | V100 | 79.88 | 81.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes-20201226_205041.log.json) | + +\[1\] The training of the model is sensitive to random seed, and the seed to train it is 1111. + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 10.6 | 21.01 | V100 | 42.72 | 43.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 14.1 | 14.16 | V100 | 44.60 | 46.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 160000 | - | - | V100 | 43.95 | 44.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 160000 | - | - | V100 | 45.47 | 46.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 512x512 | 20000 | 7.6 | 21 | V100 | 75.93 | 77.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 20000 | 11 | 13.88 | V100 | 77.22 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.81 | 77.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 40000 | - | - | V100 | 78.62 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | 9.09 | V100 | 47.30 | 48.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) | +| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | V100 | 47.23 | 48.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | - | V100 | 52.86 | 54.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59_20210416_111233-ed937f15.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59-20210416_111233.log.json) | +| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | V100 | 53.2 | 54.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59_20210416_111127-7ca0331d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59-20210416_111127.log.json) | + +### LoveDA + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.93 | 25.57 | V100 | 50.28 | 50.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800-ce0fa0ca.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.37 | 6.00 | V100 | 50.99 | 50.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442-f0720392.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.84 | 4.33 | V100 | 51.47 | 51.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759-4c1f297e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759.log.json) | + +### Potsdam + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.91 | 81.68 | V100 | 77.09 | 78.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601-75fd5bc3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.44 | V100 | 78.33 | 79.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 17.56 | V100 | 78.7 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508.log.json) | + +### Vaihingen + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.91 | 72.79 | V100 | 72.50 | 74.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.91 | V100 | 73.97 | 75.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 18.59 | V100 | 73.06 | 74.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) | + +### iSAID + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-18-D8 | 896x896 | 80000 | 6.19 | 24.81 | V100 | 61.35 | 62.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526-7059991d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | +| DeepLabV3+ | R-50-D8 | 896x896 | 80000 | 21.45 | 8.42 | V100 | 67.06 | 68.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526-598be439.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | + +### Mapillary Vistas v1.2 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 1280x1280 | 300000 | 24.04 | 17.92 | A100 | 47.35 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280_20230301_110504-655f8e43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280_20230301_110504.json) | + +Note: + +- `D-8`/`D-16` here corresponding to the output stride 8/16 setting for DeepLab series. +- `MG-124` stands for multi-grid dilation in the last stage of ResNet. +- `FP16` means Mixed Precision (FP16) is adopted in training. +- `896x896` is the Crop Size of iSAID dataset, which is followed by the implementation of [PointFlow: Flowing Semantics Through Points for Aerial Image Segmentation](https://arxiv.org/pdf/2103.06564.pdf) + +## Citation + +```bibtex +@inproceedings{deeplabv3plus2018, + title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, + author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, + booktitle={ECCV}, + year={2018} +} +``` diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..71c9118e1d5bd2e8e61112c42921fbe7bd38baf4 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..7d1ccf0b30acb4633c7892e5377bf3c38064e498 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..884b526d4843d215b1a838ba4c1bf2b0586c0cfc --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..debb0255fcc49e5b3284661ad98d660cfa9a8ad0 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..bc9334e67d5b72ca34d43567a3764d92f38cf0ae --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..4af9aa26825826c46854fbe71285897f6f592925 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..9c9883dc4f51aafc15136144b5c284fd31833ec8 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = './deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py' +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=optimizer, + loss_scale=512.) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c38a802e1052634c60baf77e0eb8e009eb87075e --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..97bb827722a06b76a395215e8e0423c9354e73ee --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-480x480.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..e4b401162de4bb91e9197179f6ebd71fa3b1e7da --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..eeefae49275725c98950a188a83818342277cd1d --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-40k_pascal-context-59-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0755c53aaef6467f44bfe61d06e9de9ce8ab1ed0 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..844ac9613b431a918277fc6f363c7333573a871e --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..87c6da9d6a3be3cd1f4ec2d04abca666ab40c7c5 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-480x480.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..115b1c90586ab92b063c019e1a501b69f1e66eef --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..9aaa65382274652ee62c245f9b28a0d6014cd41c --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_pascal-context-59-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5063b1332c243c6395f3d511a3e125d5b7175cb4 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b99c2c7ee0f51376d19ee49df7c2089cf58af1f8 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..d1bcb0914428485769b401d7310c2636caac77b9 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..c78fc1e209a9915f2f713858d59e77d1fe0267ed --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..5f54913e9459d46d24ea452eeff956b5460f0d0a --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..1b361d6d7af4a959b2138ed2860800737ca13561 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896.py b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896.py new file mode 100644 index 0000000000000000000000000000000000000000..3a1a753b26defa81e161223747ab0f28fe4aaca7 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..01bbf9bca98d8fa6e453e922e79400260fb3f8b3 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..134f2cfc2a560e3f21345170ea1c1e28cf580dfb --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2194838510e53de964d4afd0c706870611b793d8 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..ea86219692e3e63cef05f53962031bb608d26d0f --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..34ee7ed3df5007cf5cd9d8d9955eac42d44a470a --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py new file mode 100644 index 0000000000000000000000000000000000000000..133c45ae1deea20935755d6c37a46b2643331c3a --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py @@ -0,0 +1,58 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/mapillary_v1_65.py', + '../_base_/default_runtime.py', +] + +crop_size = (1280, 1280) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict(depth=50), + decode_head=dict(num_classes=65), + auxiliary_head=dict(num_classes=65)) + +iters = 300000 +# optimizer +optimizer = dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.0001) +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict( + custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)})) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 300k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=iters, val_interval=iters // 10) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', by_epoch=False, interval=iters // 10), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +train_dataloader = dict(batch_size=2) + +# Default setting for scaling LR automatically +# - `enable` means enable scaling LR automatically +# or not by default. +# - `base_batch_size` = (4 GPUs) x (2 samples per GPU). +auto_scale_lr = dict(enable=False, base_batch_size=8) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..32f994d9b383460d0d5ef2961d6486d87959fc44 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..8cdf534ef43aff12adfffcaff32ca05327806663 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..0d249b065a6ee79e363604472f8ca84c56b3c2f6 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..863a46e1b33eef38683b9adf97ea90915c6263b8 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9a899fb830c007606520cd16859455938630c148 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1876d0ccf4d8f4c0f38a1c0818834a0cd52be644 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..95b56d03ee11787e4a4848d157a7fee379b474c8 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-59-480x480.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..459c62dc5029c324ebddb971c79f5e38aca67682 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0d61b509dc9d324dee7c472792d2d2c91b9a683f --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..6f872cacf7025b04bdfc3e86568a077a19f0d3be --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py new file mode 100644 index 0000000000000000000000000000000000000000..7edec14bf848145aa64d601278df592934abe47a --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/isaid.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (896, 896) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=16), + auxiliary_head=dict(num_classes=16)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..64e262cf88ee4250bcd46de9cd3a75c6189da904 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/loveda.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=7), + auxiliary_head=dict(num_classes=7)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..5ff7fcb41e313744e6a0d0db4780045689089295 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-59-480x480.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..84aaf25b529b8d0aebec6903d6a00fa349c5459e --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5810d6bece129b9fb089e18b5323c50a1bffc2b8 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/potsdam.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a7f4b2d27a5d45aa2d88698933e7db44997b01c8 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/vaihingen.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..3e2813534d7c9c423efebc72898ce9188d3f6498 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769.py b/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..6366bd4e3a23d90c62d737ec978daa17cc181449 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/deeplabv3plus/metafile.yaml b/configs/deeplabv3plus/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b41de4dee224b52dcceeb0b3bb9ec8de2173cc3b --- /dev/null +++ b/configs/deeplabv3plus/metafile.yaml @@ -0,0 +1,1041 @@ +Collections: +- Name: DeepLabV3+ + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + - LoveDA + - Potsdam + - Vaihingen + - iSAID + - Mapillary Vistas v1.2 + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + README: configs/deeplabv3plus/README.md + Frameworks: + - PyTorch +Models: +- Name: deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.61 + mIoU(ms+flip): 81.01 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.21 + mIoU(ms+flip): 81.82 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 11.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.97 + mIoU(ms+flip): 80.46 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 8.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.46 + mIoU(ms+flip): 80.5 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 12.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.89 + mIoU(ms+flip): 78.76 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 2.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes-20201226_080942.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.09 + mIoU(ms+flip): 81.13 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.97 + mIoU(ms+flip): 82.03 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.46 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + - (FP16) + Training Resources: 4x V100 GPUS + Memory (GB): 6.35 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.26 + mIoU(ms+flip): 77.91 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 2.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes-20201226_083346.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.83 + mIoU(ms+flip): 81.48 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.65 + mIoU(ms+flip): 81.47 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720-dfcc0b68.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: ddeeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.09 + mIoU(ms+flip): 80.36 + Config: configs/deeplabv3plus/ddeeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16-MG124 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 5.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.9 + mIoU(ms+flip): 81.33 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16-MG124 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 9.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.87 + mIoU(ms+flip): 77.52 + Config: configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 2.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes-20201226_090828.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.28 + mIoU(ms+flip): 81.44 + Config: configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes-20201225_213645.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.16 + mIoU(ms+flip): 81.41 + Config: configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes-20201226_190843.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.36 + mIoU(ms+flip): 78.24 + Config: configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 2.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes-20201226_151312.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.41 + mIoU(ms+flip): 80.56 + Config: configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 8.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes-20201225_224655.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.88 + mIoU(ms+flip): 81.46 + Config: configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 12.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes-20201226_205041.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.72 + mIoU(ms+flip): 43.75 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.6 + mIoU(ms+flip): 46.06 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 14.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.95 + mIoU(ms+flip): 44.93 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.47 + mIoU(ms+flip): 46.35 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.93 + mIoU(ms+flip): 77.5 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.22 + mIoU(ms+flip): 78.59 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 11.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.81 + mIoU(ms+flip): 77.57 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.62 + mIoU(ms+flip): 79.53 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 47.3 + mIoU(ms+flip): 48.47 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 47.23 + mIoU(ms+flip): 48.26 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.86 + mIoU(ms+flip): 54.54 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59_20210416_111233-ed937f15.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59-20210416_111233.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 53.2 + mIoU(ms+flip): 54.67 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59_20210416_111127-7ca0331d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59-20210416_111127.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.28 + mIoU(ms+flip): 50.47 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 1.93 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800-ce0fa0ca.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.99 + mIoU(ms+flip): 50.65 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.37 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442-f0720392.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 51.47 + mIoU(ms+flip): 51.32 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.84 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759-4c1f297e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 77.09 + mIoU(ms+flip): 78.44 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 1.91 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601-75fd5bc3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.33 + mIoU(ms+flip): 79.27 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.36 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.7 + mIoU(ms+flip): 79.47 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.83 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.5 + mIoU(ms+flip): 74.13 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 1.91 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 73.97 + mIoU(ms+flip): 75.05 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.36 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 73.06 + mIoU(ms+flip): 74.14 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.83 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 61.35 + mIoU(ms+flip): 62.61 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 6.19 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526-7059991d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 67.06 + mIoU(ms+flip): 68.02 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 21.45 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526-598be439.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Mapillary Vistas v1.2 + Metrics: + mIoU: 47.35 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py + Metadata: + Training Data: Mapillary Vistas v1.2 + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x A100 GPUS + Memory (GB): 24.04 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280_20230301_110504-655f8e43.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280_20230301_110504.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch diff --git a/configs/dmnet/README.md b/configs/dmnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b0cf94455e1a40708dfb6ea74919deb49bcfdc34 --- /dev/null +++ b/configs/dmnet/README.md @@ -0,0 +1,59 @@ +# DMNet + +> [Dynamic Multi-scale Filters for Semantic Segmentation](https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Multi-scale representation provides an effective way toaddress scale variation of objects and stuff in semantic seg-mentation. Previous works construct multi-scale represen-tation by utilizing different filter sizes, expanding filter sizeswith dilated filters or pooling grids, and the parameters ofthese filters are fixed after training. These methods oftensuffer from heavy computational cost or have more param-eters, and are not adaptive to the input image during in-ference. To address these problems, this paper proposes aDynamic Multi-scale Network (DMNet) to adaptively cap-ture multi-scale contents for predicting pixel-level semanticlabels. DMNet is composed of multiple Dynamic Convolu-tional Modules (DCMs) arranged in parallel, each of whichexploits context-aware filters to estimate semantic represen-tation for a specific scale. The outputs of multiple DCMsare further integrated for final segmentation. We conductextensive experiments to evaluate our DMNet on three chal-lenging semantic segmentation and scene parsing datasets,PASCAL VOC 2012, Pascal-Context, and ADE20K. DMNetachieves a new record 84.4% mIoU on PASCAL VOC 2012test set without MS COCO pre-trained and post-processing,and also obtains state-of-the-art performance on Pascal-Context and ADE20K. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DMNet | R-50-D8 | 512x1024 | 40000 | 7.0 | 3.66 | V100 | 77.78 | 79.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201215_042326-615373cf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes-20201215_042326.log.json) | +| DMNet | R-101-D8 | 512x1024 | 40000 | 10.6 | 2.54 | V100 | 78.37 | 79.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201215_043100-8291e976.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes-20201215_043100.log.json) | +| DMNet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.57 | V100 | 78.49 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201215_093706-e7f0e23e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes-20201215_093706.log.json) | +| DMNet | R-101-D8 | 769x769 | 40000 | 12.0 | 1.01 | V100 | 77.62 | 78.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201215_081348-a74261f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes-20201215_081348.log.json) | +| DMNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 79.07 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201215_053728-3c8893b9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes-20201215_053728.log.json) | +| DMNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 79.64 | 80.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201215_031718-fa081cb8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes-20201215_031718.log.json) | +| DMNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.22 | 80.55 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201215_034006-6060840e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes-20201215_034006.log.json) | +| DMNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.19 | 80.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201215_082810-7f0de59a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes-20201215_082810.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DMNet | R-50-D8 | 512x512 | 80000 | 9.4 | 20.95 | V100 | 42.37 | 43.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201215_144744-f89092a6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k-20201215_144744.log.json) | +| DMNet | R-101-D8 | 512x512 | 80000 | 13.0 | 13.88 | V100 | 45.34 | 46.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201215_104812-bfa45311.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k-20201215_104812.log.json) | +| DMNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 43.15 | 44.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201215_115313-025ab3f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k-20201215_115313.log.json) | +| DMNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 45.42 | 46.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201215_111145-a0bc02ef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k-20201215_111145.log.json) | + +## Citation + +```bibtex +@InProceedings{He_2019_ICCV, +author = {He, Junjun and Deng, Zhongying and Qiao, Yu}, +title = {Dynamic Multi-Scale Filters for Semantic Segmentation}, +booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, +month = {October}, +year = {2019} +} +``` diff --git a/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..9832b62a29087a88111157fa49b74abbb1b14b18 --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..03346c5d9beb3b16069f3b5a45394399ad050531 --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..fd7e9acd1c0de671eebf1f703dc21dee0ac3ee73 --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..2205e601ce9a2e1ea40790a50be8af6e302c3c3b --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/dmnet/dmnet_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..23e215bf2fdcb0a2cb828f7e00ca5bf76a425ab7 --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/dmnet/dmnet_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5c25587e647fec6bf71b06565ad856db691631af --- /dev/null +++ b/configs/dmnet/dmnet_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..aa86b013988f28479533adc97e69a4f822c36027 --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..8c2dbf31bde29f7017a09d88c5634459eab8df75 --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..bc2160634b5efef0642c3f8a270d3bbb7876cabd --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..e32ae71765e4bcc8ac30a67cc611da66e556adb3 --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dmnet/dmnet_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/dmnet/dmnet_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..71d0a046ba90f884133e16b94583a437e52e65ef --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/dmnet/dmnet_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/dmnet/dmnet_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..727bed0ea1c95d5c2e3e0a16c4252feaca987d54 --- /dev/null +++ b/configs/dmnet/dmnet_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/dmnet/metafile.yaml b/configs/dmnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f5e5367538694d9aba9f4434059298e235c6bbb --- /dev/null +++ b/configs/dmnet/metafile.yaml @@ -0,0 +1,296 @@ +Collections: +- Name: DMNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + README: configs/dmnet/README.md + Frameworks: + - PyTorch +Models: +- Name: dmnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.78 + mIoU(ms+flip): 79.14 + Config: configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201215_042326-615373cf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes-20201215_042326.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.37 + mIoU(ms+flip): 79.72 + Config: configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201215_043100-8291e976.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes-20201215_043100.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.49 + mIoU(ms+flip): 80.27 + Config: configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201215_093706-e7f0e23e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes-20201215_093706.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.62 + mIoU(ms+flip): 78.94 + Config: configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201215_081348-a74261f6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes-20201215_081348.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.07 + mIoU(ms+flip): 80.22 + Config: configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201215_053728-3c8893b9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes-20201215_053728.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.64 + mIoU(ms+flip): 80.67 + Config: configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201215_031718-fa081cb8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes-20201215_031718.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.22 + mIoU(ms+flip): 80.55 + Config: configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201215_034006-6060840e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes-20201215_034006.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.19 + mIoU(ms+flip): 80.65 + Config: configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201215_082810-7f0de59a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes-20201215_082810.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.37 + mIoU(ms+flip): 43.62 + Config: configs/dmnet/dmnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201215_144744-f89092a6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k-20201215_144744.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.34 + mIoU(ms+flip): 46.13 + Config: configs/dmnet/dmnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201215_104812-bfa45311.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k-20201215_104812.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.15 + mIoU(ms+flip): 44.17 + Config: configs/dmnet/dmnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201215_115313-025ab3f9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k-20201215_115313.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.42 + mIoU(ms+flip): 46.76 + Config: configs/dmnet/dmnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201215_111145-a0bc02ef.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k-20201215_111145.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch diff --git a/configs/dnlnet/README.md b/configs/dnlnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6835ffd1ed9d94f1f7dbd70dc6a3cef0ca470dc3 --- /dev/null +++ b/configs/dnlnet/README.md @@ -0,0 +1,62 @@ +# DNLNet + +> [Disentangled Non-Local Neural Networks](https://arxiv.org/abs/2006.06668) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The non-local block is a popular module for strengthening the context modeling ability of a regular convolutional neural network. This paper first studies the non-local block in depth, where we find that its attention computation can be split into two terms, a whitened pairwise term accounting for the relationship between two pixels and a unary term representing the saliency of every pixel. We also observe that the two terms trained alone tend to model different visual clues, e.g. the whitened pairwise term learns within-region relationships while the unary term learns salient boundaries. However, the two terms are tightly coupled in the non-local block, which hinders the learning of each. Based on these findings, we present the disentangled non-local block, where the two terms are decoupled to facilitate learning for both terms. We demonstrate the effectiveness of the decoupled design on various tasks, such as semantic segmentation on Cityscapes, ADE20K and PASCAL Context, object detection on COCO, and action recognition on Kinetics. + + + +
+ +
+ +## Results and models (in progress) + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DNLNet | R-50-D8 | 512x1024 | 40000 | 7.3 | 2.56 | V100 | 78.61 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.96 | V100 | 78.31 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-50-D8 | 769x769 | 40000 | 9.2 | 1.50 | V100 | 78.44 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes-20200820_232206.log.json) | +| DNLNet | R-101-D8 | 769x769 | 40000 | 12.6 | 1.02 | V100 | 76.39 | 77.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes-20200820_171256.log.json) | +| DNLNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 79.33 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 80.41 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.36 | 80.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes-20200820_011925.log.json) | +| DNLNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.41 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes-20200821_051111.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DNLNet | R-50-D8 | 512x512 | 80000 | 8.8 | 20.66 | V100 | 41.76 | 42.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k-20200826_183354.log.json) | +| DNLNet | R-101-D8 | 512x512 | 80000 | 12.8 | 12.54 | V100 | 43.76 | 44.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k-20200826_183354.log.json) | +| DNLNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 41.87 | 43.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k-20200826_183350.log.json) | +| DNLNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 44.25 | 45.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k-20200826_183350.log.json) | + +## Notes + +This example is to reproduce ["Disentangled Non-Local Neural Networks"](https://arxiv.org/abs/2006.06668) for semantic segmentation. It is still in progress. + +## Citation + +```bibtex +@misc{yin2020disentangled, + title={Disentangled Non-Local Neural Networks}, + author={Minghao Yin and Zhuliang Yao and Yue Cao and Xiu Li and Zheng Zhang and Stephen Lin and Han Hu}, + year={2020}, + booktitle={ECCV} +} +``` diff --git a/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..310d84e5740b65ad50d3c480492fe92cee081faf --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..a94dbb89b3b528b7a0ad977f5f54212655427e03 --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..f9b6d5ee3df8d2be4ca3e65554641dc64b94c9e0 --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..9c7d557d028f82195f99c38b8e24c73e54769dd7 --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/dnlnet/dnl_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1edc26fd8c620f51ab27566a64e6fdce8e5d1507 --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/dnlnet/dnl_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..d29c17ef5b84bb87a0bc05079af2e3a90f4fafee --- /dev/null +++ b/configs/dnlnet/dnl_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..be389927ce050e0e21e1b5b7e2ee8c488a847b5c --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..9eaaa63ef279fcd044acf1ac9e1cbc92aa56a94c --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..2e431783ad52981ee4de63ee8f8fd81f2c3be8e1 --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..cb379c1e08cb9154393599481bd7dbbd2e7758a5 --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,16 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) + +optim_wrapper = dict( + paramwise_cfg=dict( + custom_keys=dict(theta=dict(wd_mult=0.), phi=dict(wd_mult=0.)))) diff --git a/configs/dnlnet/dnl_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/dnlnet/dnl_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b2ae2a85da973689fb0d7271442f02dc18fd990f --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/dnlnet/dnl_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/dnlnet/dnl_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..f310a4ebab43ae73844c191c23bbbf3b3080ecb7 --- /dev/null +++ b/configs/dnlnet/dnl_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/dnlnet/metafile.yaml b/configs/dnlnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22e48d3dc5361729ac1e1ebf94ffe9594d1ad35e --- /dev/null +++ b/configs/dnlnet/metafile.yaml @@ -0,0 +1,292 @@ +Collections: +- Name: DNLNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + README: configs/dnlnet/README.md + Frameworks: + - PyTorch +Models: +- Name: dnl_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.61 + Config: configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes-20200904_233629.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.31 + Config: configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes-20200904_233629.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.44 + mIoU(ms+flip): 80.27 + Config: configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes-20200820_232206.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.39 + mIoU(ms+flip): 77.77 + Config: configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes-20200820_171256.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.33 + Config: configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes-20200904_233629.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.41 + Config: configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes-20200904_233629.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.36 + mIoU(ms+flip): 80.7 + Config: configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes-20200820_011925.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.41 + mIoU(ms+flip): 80.68 + Config: configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes-20200821_051111.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.76 + mIoU(ms+flip): 42.99 + Config: configs/dnlnet/dnl_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k-20200826_183354.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.76 + mIoU(ms+flip): 44.91 + Config: configs/dnlnet/dnl_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k-20200826_183354.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.87 + mIoU(ms+flip): 43.01 + Config: configs/dnlnet/dnl_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k-20200826_183350.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.25 + mIoU(ms+flip): 45.78 + Config: configs/dnlnet/dnl_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k-20200826_183350.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch diff --git a/configs/dpt/README.md b/configs/dpt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b3a5573a65f1e6b60c8de63ae3452ec95049e812 --- /dev/null +++ b/configs/dpt/README.md @@ -0,0 +1,67 @@ +# DPT + +> [Vision Transformer for Dense Prediction](https://arxiv.org/abs/2103.13413) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art. Our models are available at [this https URL](https://github.com/isl-org/DPT). + + + +
+ +
+ +## Usage + +To use other repositories' pre-trained models, it is necessary to convert keys. + +We provide a script [`vit2mmseg.py`](../../tools/model_converters/vit2mmseg.py) in the tools directory to convert the key of models from [timm](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py) to MMSegmentation style. + +```shell +python tools/model_converters/vit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/vit2mmseg.py https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth pretrain/jx_vit_base_p16_224-80ecf9dd.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DPT | ViT-B | 512x512 | 160000 | 8.09 | 10.41 | V100 | 46.97 | 48.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dpt/dpt_vit-b16_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-20210809_172025.log.json) | + +## Citation + +```bibtex +@article{dosoViTskiy2020, + title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, + author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, + journal={arXiv preprint arXiv:2010.11929}, + year={2020} +} + +@article{Ranftl2021, + author = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun}, + title = {Vision Transformers for Dense Prediction}, + journal = {ArXiv preprint}, + year = {2021}, +} +``` diff --git a/configs/dpt/dpt_vit-b16_8xb2-160k_ade20k-512x512.py b/configs/dpt/dpt_vit-b16_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..56b33d96b9e424f6f73640986be419a0bb271458 --- /dev/null +++ b/configs/dpt/dpt_vit-b16_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/dpt_vit-b16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/dpt/metafile.yaml b/configs/dpt/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b721e041b612ab81bd69e89a7e9a956abf5276d4 --- /dev/null +++ b/configs/dpt/metafile.yaml @@ -0,0 +1,37 @@ +Collections: +- Name: DPT + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + Paper: + Title: Vision Transformer for Dense Prediction + URL: https://arxiv.org/abs/2103.13413 + README: configs/dpt/README.md + Frameworks: + - PyTorch +Models: +- Name: dpt_vit-b16_8xb2-160k_ade20k-512x512 + In Collection: DPT + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.97 + mIoU(ms+flip): 48.34 + Config: configs/dpt/dpt_vit-b16_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - DPT + Training Resources: 8x V100 GPUS + Memory (GB): 8.09 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-20210809_172025.log.json + Paper: + Title: Vision Transformer for Dense Prediction + URL: https://arxiv.org/abs/2103.13413 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dpt_head.py#L215 + Framework: PyTorch diff --git a/configs/dsdl/README.md b/configs/dsdl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e564cffdb212280e6982643c4ce6108ad16f2b9c --- /dev/null +++ b/configs/dsdl/README.md @@ -0,0 +1,103 @@ +# DSDL: Standard Description Language for DataSet + + + +## Abstract + + + +Data is the cornerstone of artificial intelligence. The efficiency of data acquisition, exchange, and application directly impacts the advances in technologies and applications. Over the long history of AI, a vast quantity of data sets have been developed and distributed. However, these datasets are defined in very different forms, which incurs significant overhead when it comes to exchange, integration, and utilization -- it is often the case that one needs to develop a new customized tool or script in order to incorporate a new dataset into a workflow. + +To overcome such difficulties, we develop **Data Set Description Language (DSDL)**. More details please visit our [official documents](https://opendatalab.github.io/dsdl-docs/getting_started/overview/), dsdl datasets can be downloaded from our platform [OpenDataLab](https://opendatalab.com/). + + + +## Steps + +- install dsdl and opendatalab: + + ``` + pip install dsdl + pip install opendatalab + ``` + +- install mmseg and pytorch: + please refer this [installation documents](https://mmsegmentation.readthedocs.io/en/latest/get_started.html). + +- prepare dsdl dataset (take voc2012 as an example) + + - dowaload dsdl dataset (you will need an opendatalab account to do so. [register one now](https://opendatalab.com/)) + + ``` + cd data + + odl login + odl get PASCAL_VOC2012 + ``` + + usually, dataset is compressed on opendatalab platform, the downloaded voc 2012 dataset should be like this: + + ``` + data/ + ├── PASCAL_VOC2012 + │   ├── dsdl + │   │   ├── dsdl_Det_full.zip + │   │   └── dsdl_SemSeg_full.zip + │   ├── raw + │   │   ├── VOC2012test.tar + │   │   ├── VOCdevkit_18-May-2011.tar + │   │   └── VOCtrainval_11-May-2012.tar + │   └── README.md + └── ... + ``` + + - decompress dataset + + ``` + cd dsdl + unzip dsdl_SemSeg_full.zip + ``` + + as we do not need detection dsdl files, we only decompress the semantic segmentation files here. + + ``` + cd ../raw + tar -xvf VOCtrainval_11-May-2012.tar + tar -xvf VOC2012test.tar + + cd ../../ + ``` + +- change traning config + + open the [voc config file](voc.py) and set some file paths as below: + + ``` + data_root = 'data/PASCAL_VOC2012' + img_prefix = 'raw/VOCdevkit/VOC2012' + train_ann = 'dsdl/dsdl_SemSeg_full/set-train/train.yaml' + val_ann = 'dsdl/dsdl_SemSeg_full/set-val/val.yaml' + ``` + + as dsdl datasets with one task using one dataloader, we can simplly change these file paths to train a model on a different dataset. + +- train: + + - using single gpu: + + ``` + python tools/train.py {config_file} + ``` + + - using slrum: + + ``` + ./tools/slurm_train.sh {partition} {job_name} {config_file} {work_dir} {gpu_nums} + ``` + +## Test Results + +| Datasets | Model | mIoU(%) | Config | +| :--------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----: | :-----------------------: | +| voc2012 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth) | 76.73 | [config](./voc.py) | +| cityscapes | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth) | 79.01 | [config](./cityscapes.py) | diff --git a/configs/dsdl/cityscapes.py b/configs/dsdl/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..94ccc068e0616720023e0cd2bdc5ee73f467a265 --- /dev/null +++ b/configs/dsdl/cityscapes.py @@ -0,0 +1,70 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] + +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +# dataset settings +dataset_type = 'DSDLSegDataset' +data_root = 'data/CityScapes' +img_prefix = 'raw/CityScapes' +train_ann = 'dsdl/dsdl_SemSeg_full/set-train/train.yaml' +val_ann = 'dsdl/dsdl_SemSeg_full/set-val/val.yaml' + +used_labels = [ + 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic_light', + 'traffic_sign', 'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', + 'truck', 'bus', 'train', 'motorcycle', 'bicycle' +] + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict( + batch_size=2, + num_workers=2, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path=img_prefix, seg_map_path=img_prefix), + ann_file=train_ann, + used_labels=used_labels, + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path=img_prefix, seg_map_path=img_prefix), + ann_file=val_ann, + used_labels=used_labels, + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/dsdl/voc.py b/configs/dsdl/voc.py new file mode 100644 index 0000000000000000000000000000000000000000..c1895f7c7d3816f602a3c4e6130a378d288c8d6a --- /dev/null +++ b/configs/dsdl/voc.py @@ -0,0 +1,65 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] + +# dataset settings +dataset_type = 'DSDLSegDataset' +data_root = 'data/PASCAL_VOC2012' +img_prefix = 'raw/VOCdevkit/VOC2012' +train_ann = 'dsdl/dsdl_SemSeg_full/set-train/train.yaml' +val_ann = 'dsdl/dsdl_SemSeg_full/set-val/val.yaml' +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path=img_prefix, seg_map_path=img_prefix), + ann_file=train_ann, + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path=img_prefix, seg_map_path=img_prefix), + ann_file=val_ann, + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator + +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/emanet/README.md b/configs/emanet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8ffaf471caf36187798ac6a8c861882dede66ddb --- /dev/null +++ b/configs/emanet/README.md @@ -0,0 +1,46 @@ +# EMANet + +> [Expectation-Maximization Attention Networks for Semantic Segmentation](https://arxiv.org/abs/1907.13426) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Self-attention mechanism has been widely used for various tasks. It is designed to compute the representation of each position by a weighted sum of the features at all positions. Thus, it can capture long-range relations for computer vision tasks. However, it is computationally consuming. Since the attention maps are computed w.r.t all other positions. In this paper, we formulate the attention mechanism into an expectation-maximization manner and iteratively estimate a much more compact set of bases upon which the attention maps are computed. By a weighted summation upon these bases, the resulting representation is low-rank and deprecates noisy information from the input. The proposed Expectation-Maximization Attention (EMA) module is robust to the variance of input and is also friendly in memory and computation. Moreover, we set up the bases maintenance and normalization methods to stabilize its training procedure. We conduct extensive experiments on popular semantic segmentation benchmarks including PASCAL VOC, PASCAL Context and COCO Stuff, on which we set new records. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EMANet | R-50-D8 | 512x1024 | 80000 | 5.4 | 4.58 | V100 | 77.59 | 79.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/eemanet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-101-D8 | 512x1024 | 80000 | 6.2 | 2.87 | V100 | 79.10 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-50-D8 | 769x769 | 80000 | 8.9 | 1.97 | V100 | 79.33 | 80.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-101-D8 | 769x769 | 80000 | 10.1 | 1.22 | V100 | 79.62 | 81.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes-20200901_100301.log.json) | + +## Citation + +```bibtex +@inproceedings{li2019expectation, + title={Expectation-maximization attention networks for semantic segmentation}, + author={Li, Xia and Zhong, Zhisheng and Wu, Jianlong and Yang, Yibo and Lin, Zhouchen and Liu, Hong}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision}, + pages={9167--9176}, + year={2019} +} +``` diff --git a/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..ee3a3b51672f14da083339bbf478322dc6faaba1 --- /dev/null +++ b/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './emanet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..7319a3e4b603d244f7b7723cc3237c97ae36be2a --- /dev/null +++ b/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './emanet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6198e1f9a2e8538e05d338d6f916c44743171d16 --- /dev/null +++ b/configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/emanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..a8e4521b07bda3bbcdf3e39c45733eb7d8477fec --- /dev/null +++ b/configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/emanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/emanet/metafile.yaml b/configs/emanet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2a6b09ed7296cf6efe0f545e17c9a90a2fc4405 --- /dev/null +++ b/configs/emanet/metafile.yaml @@ -0,0 +1,109 @@ +Collections: +- Name: EMANet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + README: configs/emanet/README.md + Frameworks: + - PyTorch +Models: +- Name: eemanet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: EMANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.59 + mIoU(ms+flip): 79.44 + Config: configs/emanet/eemanet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EMANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes-20200901_100301.log.json + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 + Framework: PyTorch +- Name: emanet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: EMANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.1 + mIoU(ms+flip): 81.21 + Config: configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EMANet + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes-20200901_100301.log.json + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 + Framework: PyTorch +- Name: emanet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: EMANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.33 + mIoU(ms+flip): 80.49 + Config: configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EMANet + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes-20200901_100301.log.json + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 + Framework: PyTorch +- Name: emanet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: EMANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.62 + mIoU(ms+flip): 81.0 + Config: configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EMANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes-20200901_100301.log.json + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 + Framework: PyTorch diff --git a/configs/encnet/README.md b/configs/encnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ff09bc32f86df1a91dc8eb1338c384599090643b --- /dev/null +++ b/configs/encnet/README.md @@ -0,0 +1,59 @@ +# EncNet + +> [Context Encoding for Semantic Segmentation](https://arxiv.org/abs/1803.08904) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Recent work has made significant progress in improving spatial resolution for pixelwise labeling with Fully Convolutional Network (FCN) framework by employing Dilated/Atrous convolution, utilizing multi-scale features and refining boundaries. In this paper, we explore the impact of global contextual information in semantic segmentation by introducing the Context Encoding Module, which captures the semantic context of scenes and selectively highlights class-dependent featuremaps. The proposed Context Encoding Module significantly improves semantic segmentation results with only marginal extra computation cost over FCN. Our approach has achieved new state-of-the-art results 51.7% mIoU on PASCAL-Context, 85.9% mIoU on PASCAL VOC 2012. Our single model achieves a final score of 0.5567 on ADE20K test set, which surpass the winning entry of COCO-Place Challenge in 2017. In addition, we also explore how the Context Encoding Module can improve the feature representation of relatively shallow networks for the image classification on CIFAR-10 dataset. Our 14 layer network has achieved an error rate of 3.45%, which is comparable with state-of-the-art approaches with over 10 times more layers. The source code for the complete system are publicly available. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EncNet | R-50-D8 | 512x1024 | 40000 | 8.6 | 4.58 | V100 | 75.67 | 77.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json) | +| EncNet | R-101-D8 | 512x1024 | 40000 | 12.1 | 2.66 | V100 | 75.81 | 77.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json) | +| EncNet | R-50-D8 | 769x769 | 40000 | 9.8 | 1.82 | V100 | 76.24 | 77.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json) | +| EncNet | R-101-D8 | 769x769 | 40000 | 13.7 | 1.26 | V100 | 74.25 | 76.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json) | +| EncNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 77.94 | 79.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json) | +| EncNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 78.55 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json) | +| EncNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 77.44 | 78.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json) | +| EncNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 76.10 | 76.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EncNet | R-50-D8 | 512x512 | 80000 | 10.1 | 22.81 | V100 | 39.53 | 41.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json) | +| EncNet | R-101-D8 | 512x512 | 80000 | 13.6 | 14.87 | V100 | 42.11 | 43.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json) | +| EncNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 40.10 | 41.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json) | +| EncNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 42.61 | 44.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json) | + +## Citation + +```bibtex +@InProceedings{Zhang_2018_CVPR, +author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, +title = {Context Encoding for Semantic Segmentation}, +booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2018} +} +``` diff --git a/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..13ab367be59d03a643e26e13e0171e5b2404802a --- /dev/null +++ b/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..7810ac440d417f839962edc2eee37fbbddb7b73b --- /dev/null +++ b/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..bec6bd907d9f7e5dcf117696f68073c41b47d85b --- /dev/null +++ b/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..e1f6409e6339c3929005ed278ef48923a911bd16 --- /dev/null +++ b/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/encnet/encnet_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9599f9c0d3d295accc7652719899eba870976ba7 --- /dev/null +++ b/configs/encnet/encnet_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/encnet/encnet_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a9edfc28a2315f0ee8c2c6a4836b9b9c261f99a8 --- /dev/null +++ b/configs/encnet/encnet_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/encnet/encnet_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..d2fbab59e38b9205c0cbfbda5b8b4284279e9eff --- /dev/null +++ b/configs/encnet/encnet_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/encnet/encnet_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..debe8c8331bb5aa849e07ae9503114e1129db995 --- /dev/null +++ b/configs/encnet/encnet_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..d5c3027a898b41c10b9e741031ed8a168e1a4a09 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..045d0feb0c58df1e3ceef8d4490f7c6d67ddfee7 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4dafcd5b7db201384c0d52d525438863baf06d24 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..e4d0b8045e2dbd3145e1cd45cdb43d46a0190b0d --- /dev/null +++ b/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/encnet/encnet_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/encnet/encnet_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b9167980627ff2fad662feddc412999fc615271b --- /dev/null +++ b/configs/encnet/encnet_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/encnet/encnet_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/encnet/encnet_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..e5c917158d6c73e9650b1bea01297de9b004c3a9 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/encnet/encnet_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/encnet/encnet_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..8ca126ab0236849feac685dbdd4649e5cef13108 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/encnet/encnet_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/encnet/encnet_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..931d6c019bff7296e0957c85c0bd187d053aa6e5 --- /dev/null +++ b/configs/encnet/encnet_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/encnet/encnet_r50s-d8_4xb4-80k_ade20k-512x512.py b/configs/encnet/encnet_r50s-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..e98104dbafec055df9ffdddc91719eb8acfa71fb --- /dev/null +++ b/configs/encnet/encnet_r50s-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict(stem_channels=128), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/encnet/metafile.yaml b/configs/encnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0dbdcfaab3f9a01d40e73b430c68d71b77b5c4b2 --- /dev/null +++ b/configs/encnet/metafile.yaml @@ -0,0 +1,296 @@ +Collections: +- Name: EncNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + README: configs/encnet/README.md + Frameworks: + - PyTorch +Models: +- Name: encnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.67 + mIoU(ms+flip): 77.08 + Config: configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.81 + mIoU(ms+flip): 77.21 + Config: configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.24 + mIoU(ms+flip): 77.85 + Config: configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.25 + mIoU(ms+flip): 76.25 + Config: configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.94 + mIoU(ms+flip): 79.13 + Config: configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.55 + mIoU(ms+flip): 79.47 + Config: configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.44 + mIoU(ms+flip): 78.72 + Config: configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.1 + mIoU(ms+flip): 76.97 + Config: configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.53 + mIoU(ms+flip): 41.17 + Config: configs/encnet/encnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.11 + mIoU(ms+flip): 43.61 + Config: configs/encnet/encnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.1 + mIoU(ms+flip): 41.71 + Config: configs/encnet/encnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.61 + mIoU(ms+flip): 44.01 + Config: configs/encnet/encnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch diff --git a/configs/erfnet/README.md b/configs/erfnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..55d71973a3e08b17263b7fff861971771b0ccd52 --- /dev/null +++ b/configs/erfnet/README.md @@ -0,0 +1,54 @@ +# ERFNet + +> [ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation](http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Semantic segmentation is a challenging task that addresses most of the perception needs of intelligent vehicles (IVs) in an unified way. Deep neural networks excel at this task, as they can be trained end-to-end to accurately classify multiple object categories in an image at pixel level. However, a good tradeoff between high quality and computational resources is yet not present in the state-of-the-art semantic segmentation approaches, limiting their application in real vehicles. In this paper, we propose a deep architecture that is able to run in real time while providing accurate semantic segmentation. The core of our architecture is a novel layer that uses residual connections and factorized convolutions in order to remain efficient while retaining remarkable accuracy. Our approach is able to run at over 83 FPS in a single Titan X, and 7 FPS in a Jetson TX1 (embedded device). A comprehensive set of experiments on the publicly available Cityscapes data set demonstrates that our system achieves an accuracy that is similar to the state of the art, while being orders of magnitude faster to compute than other architectures that achieve top precision. The resulting tradeoff makes our model an ideal approach for scene understanding in IV applications. The code is publicly available at: https://github.com/Eromera/erfnet. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ---: | ------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ERFNet | ERFNet | 512x1024 | 160000 | 6.04 | 15.26 | V100 | 72.5 | 74.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145-dc90157a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145.log.json) | + +Note: + +- The model is trained from scratch. + +- Last deconvolution layer in the [original paper](https://github.com/Eromera/erfnet_pytorch/blob/master/train/erfnet.py#L123) is replaced by a naive `FCNHead` decoder head and a bilinear upsampling layer, found more effective and efficient. + +- This model performance is sensitive to the seed values used, please refer to the log file for the specific settings of the seed. If you choose a different seed, the results might differ from the table results. + +## Citation + +```bibtex +@article{romera2017erfnet, + title={Erfnet: Efficient residual factorized convnet for real-time semantic segmentation}, + author={Romera, Eduardo and Alvarez, Jos{\'e} M and Bergasa, Luis M and Arroyo, Roberto}, + journal={IEEE Transactions on Intelligent Transportation Systems}, + volume={19}, + number={1}, + pages={263--272}, + year={2017}, + publisher={IEEE} +} +``` diff --git a/configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py b/configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..7d6558279818db7e3a56030d4ab6eca54f90c753 --- /dev/null +++ b/configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/erfnet_fcn.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/erfnet/metafile.yaml b/configs/erfnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf514124ee001a62109f336ca543a96e8ef6eabf --- /dev/null +++ b/configs/erfnet/metafile.yaml @@ -0,0 +1,37 @@ +Collections: +- Name: ERFNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: 'ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation' + URL: http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf + README: configs/erfnet/README.md + Frameworks: + - PyTorch +Models: +- Name: erfnet_fcn_4xb4-160k_cityscapes-512x1024 + In Collection: ERFNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.5 + mIoU(ms+flip): 74.75 + Config: configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - ERFNet + - ERFNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.04 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145-dc90157a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145.log.json + Paper: + Title: 'ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation' + URL: http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/erfnet.py#L321 + Framework: PyTorch diff --git a/configs/fastfcn/README.md b/configs/fastfcn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..48644e57e3ab2e3d65c73ebc2738ab3010f9b5d4 --- /dev/null +++ b/configs/fastfcn/README.md @@ -0,0 +1,63 @@ +# FastFCN + +> [FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation](https://arxiv.org/abs/1903.11816) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Modern approaches for semantic segmentation usually employ dilated convolutions in the backbone to extract high-resolution feature maps, which brings heavy computation complexity and memory footprint. To replace the time and memory consuming dilated convolutions, we propose a novel joint upsampling module named Joint Pyramid Upsampling (JPU) by formulating the task of extracting high-resolution feature maps into a joint upsampling problem. With the proposed JPU, our method reduces the computation complexity by more than three times without performance loss. Experiments show that JPU is superior to other upsampling modules, which can be plugged into many existing approaches to reduce computation complexity and improve performance. By replacing dilated convolutions with the proposed JPU module, our method achieves the state-of-the-art performance in Pascal Context dataset (mIoU of 53.13%) and ADE20K dataset (final score of 0.5584) while running 3 times faster. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------------------- | -------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 80000 | 5.67 | 2.64 | V100 | 79.12 | 80.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722.log.json) | +| FastFCN + DeepLabV3 | R-50-D32 (4x4) | 512x1024 | 80000 | 9.79 | - | V100 | 79.52 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357.log.json) | +| FastFCN + PSPNet | R-50-D32 | 512x1024 | 80000 | 5.67 | 4.40 | V100 | 79.26 | 80.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722.log.json) | +| FastFCN + PSPNet | R-50-D32 (4x4) | 512x1024 | 80000 | 9.94 | - | V100 | 78.76 | 80.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841.log.json) | +| FastFCN + EncNet | R-50-D32 | 512x1024 | 80000 | 8.15 | 4.77 | V100 | 77.97 | 79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036.log.json) | +| FastFCN + EncNet | R-50-D32 (4x4) | 512x1024 | 80000 | 15.45 | - | V100 | 78.6 | 80.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------------------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 80000 | 8.46 | 12.06 | V100 | 41.88 | 42.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619-3aa40f2d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619.log.json) | +| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 160000 | - | - | V100 | 43.58 | 44.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246-27036aee.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246.log.json) | +| FastFCN + PSPNet | R-50-D32 | 512x1024 | 80000 | 8.02 | 19.21 | V100 | 41.40 | 42.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137-993d07c8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137.log.json) | +| FastFCN + PSPNet | R-50-D32 | 512x1024 | 160000 | - | - | V100 | 42.63 | 43.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455-e8f5a2fd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455.log.json) | +| FastFCN + EncNet | R-50-D32 | 512x1024 | 80000 | 9.67 | 17.23 | V100 | 40.88 | 42.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214-65aef6dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214.log.json) | +| FastFCN + EncNet | R-50-D32 | 512x1024 | 160000 | - | - | V100 | 42.50 | 44.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456-d875ce3c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456.log.json) | + +Note: + +- `4x4` means 4 GPUs with 4 samples per GPU in training, default setting is 4 GPUs with 2 samples per GPU in training. +- Results of [DeepLabV3 (mIoU: 79.32)](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3), [PSPNet (mIoU: 78.55)](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet) and [ENCNet (mIoU: 77.94)](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet) can be found in each original repository. + +## Citation + +```bibtex +@article{wu2019fastfcn, +title={Fastfcn: Rethinking dilated convolution in the backbone for semantic segmentation}, +author={Wu, Huikai and Zhang, Junge and Huang, Kaiqi and Liang, Kongming and Yu, Yizhou}, +journal={arXiv preprint arXiv:1903.11816}, +year={2019} +} +``` diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..39e6e236b7cabd6baa72199359e5c45517c0962b --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,20 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='ASPPHead', + in_channels=2048, + in_index=2, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512.py b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1913544cfbdd640f6e32ef0b50b40b6bf2357c20 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,20 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='ASPPHead', + in_channels=2048, + in_index=2, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512.py b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..751689599dcd90c7c9a6e3ef8b2c028a9f1d9875 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,20 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='ASPPHead', + in_channels=2048, + in_index=2, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_cityscapes-512x1024.py b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..a8c5dc323205dbd2b25855835573eae7aca94187 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_cityscapes-512x1024.py @@ -0,0 +1,5 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py' +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4840dd02878d2ae90b6aae6b75ef4f0b33523a96 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,24 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(0, 1, 2), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512.py b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..619d0862f17e20c5b3ce308a47e39049fd908290 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,24 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(0, 1, 2), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512.py b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a76b026b6a2574f1fd0078edfb25be3f62a68809 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,24 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(0, 1, 2), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_cityscapes-512x1024.py b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6df1527272b51a013b2f11875e33a863470fbffb --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_cityscapes-512x1024.py @@ -0,0 +1,5 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py' +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..dc5c54d5537df83ecd2bc64753e05d2660830e43 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..887ace1d8786b20065cb74a3e42b49e89662db5b --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..3981e20a470de753d20a74ab9f220f2a5ebeb221 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_cityscapes-512x1024.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..2c7d5041606bd1305bd897b10c8bcdb026b2f19e --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_cityscapes-512x1024.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/fastfcn/metafile.yaml b/configs/fastfcn/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f5fe03ca45a656cb86ffeaf53b6e1da34fc251ec --- /dev/null +++ b/configs/fastfcn/metafile.yaml @@ -0,0 +1,311 @@ +Collections: +- Name: FastFCN + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + README: configs/fastfcn/README.md + Frameworks: + - PyTorch +Models: +- Name: fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.12 + mIoU(ms+flip): 80.58 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 5.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.52 + mIoU(ms+flip): 80.91 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.79 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.26 + mIoU(ms+flip): 80.86 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 5.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.76 + mIoU(ms+flip): 80.03 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.94 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.97 + mIoU(ms+flip): 79.92 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.15 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.6 + mIoU(ms+flip): 80.25 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 15.45 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.88 + mIoU(ms+flip): 42.91 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 8.46 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619-3aa40f2d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.58 + mIoU(ms+flip): 44.92 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246-27036aee.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.4 + mIoU(ms+flip): 42.12 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.02 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137-993d07c8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.63 + mIoU(ms+flip): 43.71 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455-e8f5a2fd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.88 + mIoU(ms+flip): 42.36 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214-65aef6dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.5 + mIoU(ms+flip): 44.21 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456-d875ce3c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch diff --git a/configs/fastscnn/README.md b/configs/fastscnn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6be981462ab1381c03a479eab5986566145c3b98 --- /dev/null +++ b/configs/fastscnn/README.md @@ -0,0 +1,42 @@ +# Fast-SCNN + +> [Fast-SCNN for Semantic Segmentation](https://arxiv.org/abs/1902.04502) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The encoder-decoder framework is state-of-the-art for offline semantic image segmentation. Since the rise in autonomous systems, real-time computation is increasingly desirable. In this paper, we introduce fast segmentation convolutional neural network (Fast-SCNN), an above real-time semantic segmentation model on high resolution image data (1024x2048px) suited to efficient computation on embedded devices with low memory. Building on existing two-branch methods for fast segmentation, we introduce our \`learning to downsample' module which computes low-level features for multiple resolution branches simultaneously. Our network combines spatial detail at high resolution with deep features extracted at lower resolution, yielding an accuracy of 68.0% mean intersection over union at 123.5 frames per second on Cityscapes. We also show that large scale pre-training is unnecessary. We thoroughly validate our metric in experiments with ImageNet pre-training and the coarse labeled data of Cityscapes. Finally, we show even faster computation with competitive results on subsampled inputs, without any network modifications. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| -------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FastSCNN | FastSCNN | 512x1024 | 160000 | 3.3 | 56.45 | V100 | 70.96 | 72.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853-0cec9937.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853.log.json) | + +## Citation + +```bibtex +@article{poudel2019fast, + title={Fast-scnn: Fast semantic segmentation network}, + author={Poudel, Rudra PK and Liwicki, Stephan and Cipolla, Roberto}, + journal={arXiv preprint arXiv:1902.04502}, + year={2019} +} +``` diff --git a/configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py b/configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..e7f68bfe73e9a2db67af2c8af048fdf58d4c8e58 --- /dev/null +++ b/configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py @@ -0,0 +1,15 @@ +_base_ = [ + '../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +# Re-config the data sampler. +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader + +# Re-config the optimizer. +optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/fastscnn/metafile.yaml b/configs/fastscnn/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e33c902db1ee8291dc13af0772ccbc1689bec10 --- /dev/null +++ b/configs/fastscnn/metafile.yaml @@ -0,0 +1,37 @@ +Collections: +- Name: FastSCNN + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: Fast-SCNN for Semantic Segmentation + URL: https://arxiv.org/abs/1902.04502 + README: configs/fastscnn/README.md + Frameworks: + - PyTorch +Models: +- Name: fast_scnn_8xb4-160k_cityscapes-512x1024 + In Collection: FastSCNN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.96 + mIoU(ms+flip): 72.65 + Config: configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 32 + Architecture: + - FastSCNN + - FastSCNN + Training Resources: 8x V100 GPUS + Memory (GB): 3.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853-0cec9937.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853.log.json + Paper: + Title: Fast-SCNN for Semantic Segmentation + URL: https://arxiv.org/abs/1902.04502 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/fast_scnn.py#L272 + Framework: PyTorch diff --git a/configs/fcn/README.md b/configs/fcn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cf7379ff3db5260d5a0d8260ab25bfdbe9aca001 --- /dev/null +++ b/configs/fcn/README.md @@ -0,0 +1,111 @@ +# FCN + +> [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Convolutional networks are powerful visual models that yield hierarchies of features. We show that convolutional networks by themselves, trained end-to-end, pixels-to-pixels, exceed the state-of-the-art in semantic segmentation. Our key insight is to build "fully convolutional" networks that take input of arbitrary size and produce correspondingly-sized output with efficient inference and learning. We define and detail the space of fully convolutional networks, explain their application to spatially dense prediction tasks, and draw connections to prior models. We adapt contemporary classification networks (AlexNet, the VGG net, and GoogLeNet) into fully convolutional networks and transfer their learned representations by fine-tuning to the segmentation task. We then define a novel architecture that combines semantic information from a deep, coarse layer with appearance information from a shallow, fine layer to produce accurate and detailed segmentations. Our fully convolutional network achieves state-of-the-art segmentation of PASCAL VOC (20% relative improvement to 62.2% mean IU on 2012), NYUDv2, and SIFT Flow, while inference takes one third of a second for a typical image. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | ---------- | --------- | ------: | -------- | -------------- | -------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x1024 | 40000 | 5.7 | 4.17 | V100 | 72.25 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json) | +| FCN | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.66 | V100 | 75.45 | 76.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json) | +| FCN | R-50-D8 | 769x769 | 40000 | 6.5 | 1.80 | V100 | 71.47 | 72.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json) | +| FCN | R-101-D8 | 769x769 | 40000 | 10.4 | 1.19 | V100 | 73.93 | 75.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json) | +| FCN | R-18-D8 | 512x1024 | 80000 | 1.7 | 14.65 | V100 | 71.11 | 72.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes-20201225_021327.log.json) | +| FCN | R-50-D8 | 512x1024 | 80000 | - | | V100 | 73.61 | 74.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json) | +| FCN | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 75.13 | 75.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json) | +| FCN (FP16) | R-101-D8 | 512x1024 | 80000 | 5.37 | 8.64 | V100 | 76.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921-fb13e883.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921.log.json) | +| FCN | R-18-D8 | 769x769 | 80000 | 1.9 | 6.40 | V100 | 70.80 | 73.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes-20201225_021451.log.json) | +| FCN | R-50-D8 | 769x769 | 80000 | - | - | V100 | 72.64 | 73.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json) | +| FCN | R-101-D8 | 769x769 | 80000 | - | - | V100 | 75.52 | 76.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json) | +| FCN | R-18b-D8 | 512x1024 | 80000 | 1.6 | 16.74 | V100 | 70.24 | 72.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes-20201225_230143.log.json) | +| FCN | R-50b-D8 | 512x1024 | 80000 | 5.6 | 4.20 | V100 | 75.65 | 77.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes-20201225_094221.log.json) | +| FCN | R-101b-D8 | 512x1024 | 80000 | 9.1 | 2.73 | V100 | 77.37 | 78.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes-20201226_160213.log.json) | +| FCN | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.70 | V100 | 69.66 | 72.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes-20201226_004430.log.json) | +| FCN | R-50b-D8 | 769x769 | 80000 | 6.3 | 1.82 | V100 | 73.83 | 76.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes-20201225_094223.log.json) | +| FCN | R-101b-D8 | 769x769 | 80000 | 10.3 | 1.15 | V100 | 77.02 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes-20201226_170012.log.json) | +| FCN (D6) | R-50-D16 | 512x1024 | 40000 | 3.4 | 10.22 | TITAN Xp | 77.06 | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes_20210305_130133-98d5d1bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes-20210305_130133.log.json) | +| FCN (D6) | R-50-D16 | 512x1024 | 80000 | - | 10.35 | TITAN Xp | 77.27 | 78.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes_20210306_115604-133c292f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes-20210306_115604.log.json) | +| FCN (D6) | R-50-D16 | 769x769 | 40000 | 3.7 | 4.17 | TITAN Xp | 76.82 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes_20210305_185744-1aab18ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes-20210305_185744.log.json) | +| FCN (D6) | R-50-D16 | 769x769 | 80000 | - | 4.15 | TITAN Xp | 77.04 | 78.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes_20210305_200413-109d88eb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes-20210305_200413.log.json) | +| FCN (D6) | R-101-D16 | 512x1024 | 40000 | 4.5 | 8.04 | TITAN Xp | 77.36 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes_20210305_130337-9cf2b450.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes-20210305_130337.log.json) | +| FCN (D6) | R-101-D16 | 512x1024 | 80000 | - | 8.26 | TITAN Xp | 78.46 | 80.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes_20210308_102747-cb336445.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes-20210308_102747.log.json) | +| FCN (D6) | R-101-D16 | 769x769 | 40000 | 5.0 | 3.12 | TITAN Xp | 77.28 | 78.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes_20210308_102453-60b114e9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes-20210308_102453.log.json) | +| FCN (D6) | R-101-D16 | 769x769 | 80000 | - | 3.21 | TITAN Xp | 78.06 | 79.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes_20210306_120016-e33adc4f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes-20210306_120016.log.json) | +| FCN (D6) | R-50b-D16 | 512x1024 | 80000 | 3.2 | 10.16 | TITAN Xp | 76.99 | 79.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes/fcn_d6_r50b-d16_512x1024_80k_cityscapes_20210311_125550-6a0b62e9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_512x1024_80k_cityscapes/fcn_d6_r50b_d16_512x1024_80k_cityscapes-20210311_125550.log.json) | +| FCN (D6) | R-50b-D16 | 769x769 | 80000 | 3.6 | 4.17 | TITAN Xp | 76.86 | 78.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes/fcn_d6_r50b-d16_769x769_80k_cityscapes_20210311_131012-d665f231.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_769x769_80k_cityscapes/fcn_d6_r50b_d16_769x769_80k_cityscapes-20210311_131012.log.json) | +| FCN (D6) | R-101b-D16 | 512x1024 | 80000 | 4.3 | 8.46 | TITAN Xp | 77.72 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes/fcn_d6_r101b-d16_512x1024_80k_cityscapes_20210311_144305-3f2eb5b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_512x1024_80k_cityscapes/fcn_d6_r101b_d16_512x1024_80k_cityscapes-20210311_144305.log.json) | +| FCN (D6) | R-101b-D16 | 769x769 | 80000 | 4.8 | 3.32 | TITAN Xp | 77.34 | 78.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes/fcn_d6_r101b-d16_769x769_80k_cityscapes_20210311_154527-c4d8bfbc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_769x769_80k_cityscapes/fcn_d6_r101b_d16_769x769_80k_cityscapes-20210311_154527.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x512 | 80000 | 8.5 | 23.49 | V100 | 35.94 | 37.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json) | +| FCN | R-101-D8 | 512x512 | 80000 | 12 | 14.78 | V100 | 39.61 | 40.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-80k_ade20k-512x512.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json) | +| FCN | R-50-D8 | 512x512 | 160000 | - | - | V100 | 36.10 | 38.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json) | +| FCN | R-101-D8 | 512x512 | 160000 | - | - | V100 | 39.91 | 41.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x512 | 20000 | 5.7 | 23.28 | V100 | 67.08 | 69.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json) | +| FCN | R-101-D8 | 512x512 | 20000 | 9.2 | 14.81 | V100 | 71.16 | 73.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) | +| FCN | R-50-D8 | 512x512 | 40000 | - | - | V100 | 66.97 | 69.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | +| FCN | R-101-D8 | 512x512 | 40000 | - | - | V100 | 69.91 | 72.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-101-D8 | 480x480 | 40000 | - | 9.93 | V100 | 44.43 | 45.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20210421_154757-b5e97937.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20210421_154757.log.json) | +| FCN | R-101-D8 | 480x480 | 80000 | - | - | V100 | 44.13 | 45.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20210421_163310-4711813f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20210421_163310.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-101-D8 | 480x480 | 40000 | - | - | V100 | 48.42 | 50.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59_20210415_230724-8cf83682.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59-20210415_230724.log.json) | +| FCN | R-101-D8 | 480x480 | 80000 | - | - | V100 | 49.35 | 51.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59_20210416_110804-9a6f2c94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59-20210416_110804.log.json) | + +Note: + +- `FP16` means Mixed Precision (FP16) is adopted in training. +- `FCN D6` means dilation rate of convolution operator in FCN is 6. + +## Citation + +```bibtex +@article{shelhamer2017fully, + title={Fully convolutional networks for semantic segmentation}, + author={Shelhamer, Evan and Long, Jonathan and Darrell, Trevor}, + journal={IEEE transactions on pattern analysis and machine intelligence}, + volume={39}, + number={4}, + pages={640--651}, + year={2017}, + publisher={IEEE Trans Pattern Anal Mach Intell} +} +``` diff --git a/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024.py b/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..8f2cd02b00409ce40ce24821cd98eebb89799cea --- /dev/null +++ b/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769.py b/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..4782b30377121ac6159eb1b89e74957e483fc68a --- /dev/null +++ b/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..5f654b4bbd8b7294a10e89b2af783a745cfe2a1d --- /dev/null +++ b/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..91eca1c52ec24ab16ac324040314f5bca38a00c9 --- /dev/null +++ b/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..62e61277991478939624160310baa6bcda0a505d --- /dev/null +++ b/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,4 @@ +_base_ = './fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..1b8d24799e69d2c1b404e50bbb8a5cf63c912a4f --- /dev/null +++ b/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,4 @@ +_base_ = './fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py b/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..9a1efb41d530f0d254b711b90ee6a0a5bc755637 --- /dev/null +++ b/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(dilation=6), + auxiliary_head=dict(dilation=6)) diff --git a/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py b/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..2b2a6f4537f3d7496fa8843796646c6271bcff7a --- /dev/null +++ b/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,13 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(align_corners=True, dilation=6), + auxiliary_head=dict(align_corners=True, dilation=6), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..e6cca006f3e66a64a09a021d8fa4466c0ba95cf0 --- /dev/null +++ b/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(dilation=6), + auxiliary_head=dict(dilation=6)) diff --git a/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..990ff9c58e8cb2064ff7ba2356f12203e3c9e6cd --- /dev/null +++ b/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,13 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(align_corners=True, dilation=6), + auxiliary_head=dict(align_corners=True, dilation=6), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..7d470a50be7e7b1b1bab8b83534defd7e605cdad --- /dev/null +++ b/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..e9093ea2dccbf5c248a3ab8fc59d190765e1225f --- /dev/null +++ b/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..b3ec0a742c33a56ae307244fcfc6fa9424cba75b --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..1f83fe20788770292804c5a373e14156f5f24a20 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4527b3b8a0ced0a32e1cb80a592dc1fc17587fda --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..6ce112484dad13435d50215a5c4474c77ad4e5dc --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py b/configs/fcn/fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..b4d94878c87771a6dfc2f18bc5d1c20e36df9f7d --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py @@ -0,0 +1,6 @@ +_base_ = './fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py' +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005), + loss_scale=512.) diff --git a/configs/fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b1f5c5c78510ea64028d00a5ffca3bb2b26d9f73 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/fcn/fcn_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..61ee96f94ebe03a1aaf99d6e20358e682ee07a65 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-480x480.py b/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..1161193adb4fdadbee0af56bfa1e1c06f1664082 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb4-40k_pascal-context-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-59-480x480.py b/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..f3a6dbc9ab5bb07856abbfa3ecb1e7a815b2380c --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb4-40k_pascal-context-59-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/fcn/fcn_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b68b6e04071bfb7ce6a6773910ae34b8e28b99d6 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/fcn/fcn_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..3facce30dc7e8230d3a22b0afff858ddb59432b5 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-480x480.py b/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..1161193adb4fdadbee0af56bfa1e1c06f1664082 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb4-40k_pascal-context-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-59-480x480.py b/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..cebe33082a12abe502f87b662951ab3a34c8f46f --- /dev/null +++ b/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb4-80k_pascal-context-59-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..e53751b14427370001d706c57c360624f0ec86fd --- /dev/null +++ b/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,4 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..daa65026109ff465ed04cf2e2c093fb2ef5b9a62 --- /dev/null +++ b/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,4 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4073148122e4a514cee971b36ace573245010266 --- /dev/null +++ b/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..2c1d2b6df01a38d13d5c27ffbede8c60bb47e671 --- /dev/null +++ b/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..08ab467573ac6894eedf66dd171d5426d98a3ea3 --- /dev/null +++ b/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..c591ebe9722d50cc019377a4d9aba7f8050eb314 --- /dev/null +++ b/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4fba72333def61347a773b9292e40c64cf6ccd20 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..d57afe1c224940398f50dc892a1273dc92e2e487 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6b1fdae80949add32835c3fcefb7da94e96437cd --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..8a713fd30937680199e23ffa8a0a83ed29275c48 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/fcn/fcn_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/fcn/fcn_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..258b9fb579fe531b120004cf5d06db767ba8dd11 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/fcn/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/fcn/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..eac86d5389fdeeea02b078212feb031d721605eb --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/fcn/fcn_r50-d8_4xb4-40k_pascal-context-480x480.py b/configs/fcn/fcn_r50-d8_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..d99cb0dc36462461785adf50378a7083853e817a --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,13 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/fcn/fcn_r50-d8_4xb4-40k_pascal-context-59-480x480.py b/configs/fcn/fcn_r50-d8_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..64c94105213a769594a6967d560868f466fb4bea --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/fcn/fcn_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/fcn/fcn_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..42edb46e94b1a4786c7e6fa0a6928b86871c1b34 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..099f6affa56e89387034c02cc2331fe93db10ade --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/fcn/fcn_r50-d8_4xb4-80k_pascal-context-480x480.py b/configs/fcn/fcn_r50-d8_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..1eeafb8a539b368328734c94173d465353b1ff92 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,13 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/fcn/fcn_r50-d8_4xb4-80k_pascal-context-59-480x480.py b/configs/fcn/fcn_r50-d8_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..c11a9bbb6de7ddbda781e3fb04f40348a1157097 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..44821fd7d38dcb0e68df4c196f55d2a78fd8ae1c --- /dev/null +++ b/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-769x769.py b/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..a85b39197edc35cead28bcd129c50dec0871714c --- /dev/null +++ b/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/fcn/metafile.yaml b/configs/fcn/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3d80f652ee49b3f9814c1354a5051fc33ecd249 --- /dev/null +++ b/configs/fcn/metafile.yaml @@ -0,0 +1,997 @@ +Collections: +- Name: FCN + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + README: configs/fcn/README.md + Frameworks: + - PyTorch +Models: +- Name: fcn_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.25 + mIoU(ms+flip): 73.36 + Config: configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 5.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.45 + mIoU(ms+flip): 76.58 + Config: configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.47 + mIoU(ms+flip): 72.54 + Config: configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.93 + mIoU(ms+flip): 75.14 + Config: configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 10.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r18-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.11 + mIoU(ms+flip): 72.91 + Config: configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes-20201225_021327.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.61 + mIoU(ms+flip): 74.24 + Config: configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.13 + mIoU(ms+flip): 75.94 + Config: configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.8 + Config: configs/fcn/fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + - (FP16) + Training Resources: 4x V100 GPUS + Memory (GB): 5.37 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921-fb13e883.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r18-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.8 + mIoU(ms+flip): 73.16 + Config: configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes-20201225_021451.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.64 + mIoU(ms+flip): 73.32 + Config: configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.52 + mIoU(ms+flip): 76.61 + Config: configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r18b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.24 + mIoU(ms+flip): 72.77 + Config: configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes-20201225_230143.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.65 + mIoU(ms+flip): 77.59 + Config: configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 5.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes-20201225_094221.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.37 + mIoU(ms+flip): 78.77 + Config: configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 9.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes-20201226_160213.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r18b-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 69.66 + mIoU(ms+flip): 72.07 + Config: configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes-20201226_004430.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50b-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.83 + mIoU(ms+flip): 76.6 + Config: configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes-20201225_094223.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101b-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.02 + mIoU(ms+flip): 78.67 + Config: configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 10.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes-20201226_170012.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.06 + mIoU(ms+flip): 78.85 + Config: configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 3.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes_20210305_130133-98d5d1bc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes-20210305_130133.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.27 + mIoU(ms+flip): 78.88 + Config: configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes_20210306_115604-133c292f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes-20210306_115604.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.82 + mIoU(ms+flip): 78.22 + Config: configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 3.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes_20210305_185744-1aab18ed.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes-20210305_185744.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.04 + mIoU(ms+flip): 78.4 + Config: configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes_20210305_200413-109d88eb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes-20210305_200413.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.36 + mIoU(ms+flip): 79.18 + Config: configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 4.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes_20210305_130337-9cf2b450.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes-20210305_130337.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.46 + mIoU(ms+flip): 80.42 + Config: configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes_20210308_102747-cb336445.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes-20210308_102747.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.28 + mIoU(ms+flip): 78.95 + Config: configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 5.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes_20210308_102453-60b114e9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes-20210308_102453.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.06 + mIoU(ms+flip): 79.58 + Config: configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes_20210306_120016-e33adc4f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes-20210306_120016.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.99 + mIoU(ms+flip): 79.03 + Config: configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 3.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes/fcn_d6_r50b-d16_512x1024_80k_cityscapes_20210311_125550-6a0b62e9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_512x1024_80k_cityscapes/fcn_d6_r50b_d16_512x1024_80k_cityscapes-20210311_125550.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.86 + mIoU(ms+flip): 78.52 + Config: configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 3.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes/fcn_d6_r50b-d16_769x769_80k_cityscapes_20210311_131012-d665f231.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_769x769_80k_cityscapes/fcn_d6_r50b_d16_769x769_80k_cityscapes-20210311_131012.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.72 + mIoU(ms+flip): 79.53 + Config: configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 4.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes/fcn_d6_r101b-d16_512x1024_80k_cityscapes_20210311_144305-3f2eb5b4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_512x1024_80k_cityscapes/fcn_d6_r101b_d16_512x1024_80k_cityscapes-20210311_144305.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.34 + mIoU(ms+flip): 78.91 + Config: configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 4.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes/fcn_d6_r101b-d16_769x769_80k_cityscapes_20210311_154527-c4d8bfbc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_769x769_80k_cityscapes/fcn_d6_r101b_d16_769x769_80k_cityscapes-20210311_154527.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 35.94 + mIoU(ms+flip): 37.94 + Config: configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 8.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.61 + mIoU(ms+flip): 40.83 + Config: configs/fcn/fcn_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 12.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.1 + mIoU(ms+flip): 38.08 + Config: configs/fcn/fcn_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.91 + mIoU(ms+flip): 41.4 + Config: configs/fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 67.08 + mIoU(ms+flip): 69.94 + Config: configs/fcn/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 5.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 71.16 + mIoU(ms+flip): 73.57 + Config: configs/fcn/fcn_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 66.97 + mIoU(ms+flip): 69.04 + Config: configs/fcn/fcn_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 69.91 + mIoU(ms+flip): 72.38 + Config: configs/fcn/fcn_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-40k_pascal-context-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 44.43 + mIoU(ms+flip): 45.63 + Config: configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20210421_154757-b5e97937.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20210421_154757.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-80k_pascal-context-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 44.13 + mIoU(ms+flip): 45.26 + Config: configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20210421_163310-4711813f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20210421_163310.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-40k_pascal-context-59-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 48.42 + mIoU(ms+flip): 50.4 + Config: configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59_20210415_230724-8cf83682.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59-20210415_230724.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-80k_pascal-context-59-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 49.35 + mIoU(ms+flip): 51.38 + Config: configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59_20210416_110804-9a6f2c94.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59-20210416_110804.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch diff --git a/configs/gcnet/README.md b/configs/gcnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ba1a21e8512e04339b6ac2fbfd45a9291a2b4837 --- /dev/null +++ b/configs/gcnet/README.md @@ -0,0 +1,68 @@ +# GCNet + +> [GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The Non-Local Network (NLNet) presents a pioneering approach for capturing long-range dependencies, via aggregating query-specific global context to each query position. However, through a rigorous empirical analysis, we have found that the global contexts modeled by non-local network are almost the same for different query positions within an image. In this paper, we take advantage of this finding to create a simplified network based on a query-independent formulation, which maintains the accuracy of NLNet but with significantly less computation. We further observe that this simplified design shares similar structure with Squeeze-Excitation Network (SENet). Hence we unify them into a three-step general framework for global context modeling. Within the general framework, we design a better instantiation, called the global context (GC) block, which is lightweight and can effectively model the global context. The lightweight property allows us to apply it for multiple layers in a backbone network to construct a global context network (GCNet), which generally outperforms both simplified NLNet and SENet on major benchmarks for various recognition tasks. The code and configurations are released at [this https URL](https://github.com/xvjiarui/GCNet). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x1024 | 40000 | 5.8 | 3.93 | V100 | 77.69 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | +| GCNet | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.61 | V100 | 78.28 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | +| GCNet | R-50-D8 | 769x769 | 40000 | 6.5 | 1.67 | V100 | 78.12 | 80.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json) | +| GCNet | R-101-D8 | 769x769 | 40000 | 10.5 | 1.13 | V100 | 78.95 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json) | +| GCNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 78.48 | 80.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | +| GCNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 79.03 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | +| GCNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 78.68 | 80.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json) | +| GCNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.18 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.38 | V100 | 41.47 | 42.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json) | +| GCNet | R-101-D8 | 512x512 | 80000 | 12 | 15.20 | V100 | 42.82 | 44.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json) | +| GCNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.37 | 43.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json) | +| GCNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 43.69 | 45.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x512 | 20000 | 5.8 | 23.35 | V100 | 76.42 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json) | +| GCNet | R-101-D8 | 512x512 | 20000 | 9.2 | 14.80 | V100 | 77.41 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json) | +| GCNet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.24 | 77.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json) | +| GCNet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 77.84 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json) | + +## Citation + +```bibtex +@inproceedings{cao2019gcnet, + title={Gcnet: Non-local networks meet squeeze-excitation networks and beyond}, + author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision Workshops}, + pages={0--0}, + year={2019} +} +``` diff --git a/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..e8f7c552fbd197d646dd1266cf1a3638a8935114 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..887d17b71d8063e6600c399dc31989818b53981e --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..aa47578d1630a17a245070fba4625190589af700 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..ddf4ad7bbc27716125fac62ab6777e3daf61da23 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/gcnet/gcnet_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..45285c0183067a52af04461600a9fbe3df983055 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/gcnet/gcnet_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b466c409e8f3da34dbdc64f5c126287977638fc4 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/gcnet/gcnet_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9c7f741f053e6b55b909f15253abf7b6c36fc6f7 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/gcnet/gcnet_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..61337dbda26be0cc55a8e17011027f78527697e6 --- /dev/null +++ b/configs/gcnet/gcnet_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..f976bd907a466edd04835db92f866de96302bfa8 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..34ce822c5921b478efbeda9a0b80a8162fb58a2e --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..50889290473a81fa511471952d3e99c1585c40f1 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..f886f170fcc7b67673aa2de8fcdb076bfbe9ca48 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/gcnet/gcnet_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/gcnet/gcnet_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..d3f56313190f6b3e55400688a7414a39b7a63c97 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/gcnet/gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/gcnet/gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..356b088236c10cda7dd557f9efc7d0facd3fea91 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/gcnet/gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/gcnet/gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..802b7668e055f0dd1d0c0a8f2212ede4701cadc4 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/gcnet/gcnet_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/gcnet/gcnet_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..7327934289cebc8713ebbdf527f629b170f27aa3 --- /dev/null +++ b/configs/gcnet/gcnet_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/gcnet/metafile.yaml b/configs/gcnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f3c4623a034a8e6a095535f8724df5058ae927b --- /dev/null +++ b/configs/gcnet/metafile.yaml @@ -0,0 +1,391 @@ +Collections: +- Name: GCNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + README: configs/gcnet/README.md + Frameworks: + - PyTorch +Models: +- Name: gcnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.69 + mIoU(ms+flip): 78.56 + Config: configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 5.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.28 + mIoU(ms+flip): 79.34 + Config: configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.12 + mIoU(ms+flip): 80.09 + Config: configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.95 + mIoU(ms+flip): 80.71 + Config: configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.48 + mIoU(ms+flip): 80.01 + Config: configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.03 + mIoU(ms+flip): 79.84 + Config: configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.68 + mIoU(ms+flip): 80.66 + Config: configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.18 + mIoU(ms+flip): 80.71 + Config: configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.47 + mIoU(ms+flip): 42.85 + Config: configs/gcnet/gcnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.82 + mIoU(ms+flip): 44.54 + Config: configs/gcnet/gcnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.37 + mIoU(ms+flip): 43.52 + Config: configs/gcnet/gcnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.69 + mIoU(ms+flip): 45.21 + Config: configs/gcnet/gcnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.42 + mIoU(ms+flip): 77.51 + Config: configs/gcnet/gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 5.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.41 + mIoU(ms+flip): 78.56 + Config: configs/gcnet/gcnet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.24 + mIoU(ms+flip): 77.63 + Config: configs/gcnet/gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.84 + mIoU(ms+flip): 78.59 + Config: configs/gcnet/gcnet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b529fc895ed0c6882b9af3cc450d0f1dc64a5355 --- /dev/null +++ b/configs/hrnet/README.md @@ -0,0 +1,122 @@ +# HRNet + +> [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1908.07919) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +High-resolution representations are essential for position-sensitive vision problems, such as human pose estimation, semantic segmentation, and object detection. Existing state-of-the-art frameworks first encode the input image as a low-resolution representation through a subnetwork that is formed by connecting high-to-low resolution convolutions \\emph{in series} (e.g., ResNet, VGGNet), and then recover the high-resolution representation from the encoded low-resolution representation. Instead, our proposed network, named as High-Resolution Network (HRNet), maintains high-resolution representations through the whole process. There are two key characteristics: (i) Connect the high-to-low resolution convolution streams \\emph{in parallel}; (ii) Repeatedly exchange the information across resolutions. The benefit is that the resulting representation is semantically richer and spatially more precise. We show the superiority of the proposed HRNet in a wide range of applications, including human pose estimation, semantic segmentation, and object detection, suggesting that the HRNet is a stronger backbone for computer vision problems. All the codes are available at [this https URL](https://github.com/HRNet). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x1024 | 40000 | 1.7 | 23.74 | V100 | 73.86 | 75.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 40000 | 2.9 | 12.97 | V100 | 77.19 | 78.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 40000 | 6.2 | 6.42 | V100 | 78.48 | 79.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json) | +| FCN | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | V100 | 75.31 | 77.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 80000 | - | - | V100 | 78.65 | 80.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 80000 | - | - | V100 | 79.93 | 80.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json) | +| FCN | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | V100 | 76.31 | 78.31 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 160000 | - | - | V100 | 78.80 | 80.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 160000 | - | - | V100 | 80.65 | 81.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 3.8 | 38.66 | V100 | 31.38 | 32.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 4.9 | 22.57 | V100 | 36.27 | 37.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910-6c9382c0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 8.2 | 21.23 | V100 | 41.90 | 43.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json) | +| FCN | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | V100 | 33.07 | 34.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739-f1e7c2e7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 160000 | - | - | V100 | 36.79 | 38.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 160000 | - | - | V100 | 42.02 | 43.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x512 | 20000 | 1.8 | 43.36 | V100 | 65.5 | 68.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910-0aceadb4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 20000 | 2.9 | 23.48 | V100 | 72.30 | 74.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 20000 | 6.2 | 22.05 | V100 | 75.87 | 78.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json) | +| FCN | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | V100 | 66.61 | 70.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 40000 | - | - | V100 | 72.90 | 75.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 40000 | - | - | V100 | 76.24 | 78.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W48 | 480x480 | 40000 | 6.1 | 8.86 | V100 | 45.14 | 47.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json) | +| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | V100 | 45.84 | 47.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W48 | 480x480 | 40000 | - | - | V100 | 50.33 | 52.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59_20210410_122738-b808b8b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59-20210410_122738.log.json) | +| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | V100 | 51.12 | 53.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59_20210411_003240-3ae7081e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59-20210411_003240.log.json) | + +### LoveDA + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.59 | 24.87 | V100 | 49.28 | 49.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_loveda-512x512.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228-60a86a7a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 12.92 | V100 | 50.81 | 50.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952-93d9c3b3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 9.61 | V100 | 51.42 | 51.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756-67072f55.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756.log.json) | + +### Potsdam + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 36.00 | V100 | 77.64 | 78.8 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.25 | V100 | 78.26 | 79.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 16.42 | V100 | 78.39 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601.log.json) | + +### Vaihingen + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 38.11 | V100 | 71.81 | 73.1 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.55 | V100 | 72.57 | 74.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 17.25 | V100 | 72.50 | 73.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244.log.json) | + +### iSAID + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 896x896 | 80000 | 4.95 | 13.84 | V100 | 62.30 | 62.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603-3cc0769b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603.log.json) | +| FCN | HRNetV2p-W18 | 896x896 | 80000 | 8.30 | 7.71 | V100 | 65.06 | 65.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230-49bf752e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230.log.json) | +| FCN | HRNetV2p-W48 | 896x896 | 80000 | 16.89 | 7.34 | V100 | 67.80 | 68.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643-547fc420.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643.log.json) | + +Note: + +- `896x896` is the Crop Size of iSAID dataset, which is followed by the implementation of [PointFlow: Flowing Semantics Through Points for Aerial Image Segmentation](https://arxiv.org/pdf/2103.06564.pdf) + +## Citation + +```bibtext +@inproceedings{SunXLW19, + title={Deep High-Resolution Representation Learning for Human Pose Estimation}, + author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, + booktitle={CVPR}, + year={2019} +} +``` diff --git a/configs/hrnet/fcn_hr18_4xb2-160k_cityscapes-512x1024.py b/configs/hrnet/fcn_hr18_4xb2-160k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..0b374632b8847ead680859e3b291749ac5a1760f --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb2-160k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py b/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..598b938a3ff3fd91b71a2f5ab325f6890d227146 --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/hrnet/fcn_hr18_4xb2-80k_cityscapes-512x1024.py b/configs/hrnet/fcn_hr18_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..eb7da49dbce21fbb83e8415e70195478b1f9393e --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/hrnet/fcn_hr18_4xb4-160k_ade20k-512x512.py b/configs/hrnet/fcn_hr18_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c4f732cd894a72150dfb39a32f39ff5705f4cca1 --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, decode_head=dict(num_classes=150)) diff --git a/configs/hrnet/fcn_hr18_4xb4-20k_voc12aug-512x512.py b/configs/hrnet/fcn_hr18_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..107df6b13b2cfeade3e1b82fe103b3d2750ade3f --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, decode_head=dict(num_classes=21)) diff --git a/configs/hrnet/fcn_hr18_4xb4-40k_pascal-context-480x480.py b/configs/hrnet/fcn_hr18_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..f744baec7bc9a53ede3fb0eecafea03d00f2e2ed --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/hrnet/fcn_hr18_4xb4-40k_pascal-context-59-480x480.py b/configs/hrnet/fcn_hr18_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..0daaa35ebce1146fe9df3e938f0de86500359271 --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/hrnet/fcn_hr18_4xb4-40k_voc12aug-512x512.py b/configs/hrnet/fcn_hr18_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2aa16b124de9885a4d828128ca6b3203224929fe --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, decode_head=dict(num_classes=21)) diff --git a/configs/hrnet/fcn_hr18_4xb4-80k_ade20k-512x512.py b/configs/hrnet/fcn_hr18_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..029b7d0e9acb9524e908ce9dac1ba3cdb1b695fa --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, decode_head=dict(num_classes=150)) diff --git a/configs/hrnet/fcn_hr18_4xb4-80k_isaid-896x896.py b/configs/hrnet/fcn_hr18_4xb4-80k_isaid-896x896.py new file mode 100644 index 0000000000000000000000000000000000000000..33a6ac70a646169b782d60e1f2e350899931c1c7 --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-80k_isaid-896x896.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/isaid.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (896, 896) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, decode_head=dict(num_classes=16)) diff --git a/configs/hrnet/fcn_hr18_4xb4-80k_loveda-512x512.py b/configs/hrnet/fcn_hr18_4xb4-80k_loveda-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1a918b2ecee78301c87d951faaa6eaf520828220 --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-80k_loveda-512x512.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/loveda.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, decode_head=dict(num_classes=7)) diff --git a/configs/hrnet/fcn_hr18_4xb4-80k_pascal-context-480x480.py b/configs/hrnet/fcn_hr18_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..4f37e8ade73a0cac24b218910afc6121055f0d89 --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/hrnet/fcn_hr18_4xb4-80k_pascal-context-59-480x480.py b/configs/hrnet/fcn_hr18_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..2c35cb9c329157183abf51d823047607dfdeec6c --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/hrnet/fcn_hr18_4xb4-80k_potsdam-512x512.py b/configs/hrnet/fcn_hr18_4xb4-80k_potsdam-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..181c03d379db578716681446d8872db4846caa35 --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-80k_potsdam-512x512.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/potsdam.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, decode_head=dict(num_classes=6)) diff --git a/configs/hrnet/fcn_hr18_4xb4-80k_vaihingen-512x512.py b/configs/hrnet/fcn_hr18_4xb4-80k_vaihingen-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..6303bb65c3f182184384fad834f565bbbae89315 --- /dev/null +++ b/configs/hrnet/fcn_hr18_4xb4-80k_vaihingen-512x512.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/vaihingen.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, decode_head=dict(num_classes=6)) diff --git a/configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py b/configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6ca631cbeee25028150c16145133fbc5fde33c5a --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb2-160k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb2-40k_cityscapes-512x1024.py b/configs/hrnet/fcn_hr18s_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..ba7e9c696ea3ff88596b2f9d5e568a155a7c9537 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb2-40k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb2-80k_cityscapes-512x1024.py b/configs/hrnet/fcn_hr18s_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..26ab6210ddb2f520c71d29f517e055a62bc61c47 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py b/configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..29cbd10cbfc410039d174cd43f60cb0ae7b4128d --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-20k_voc12aug-512x512.py b/configs/hrnet/fcn_hr18s_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9dd1933349995fb347f305039349708327f8f413 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-20k_voc12aug-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-40k_pascal-context-480x480.py b/configs/hrnet/fcn_hr18s_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..5f88f532a3c0edb864eb41e8bf474e1eafefedf4 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-40k_pascal-context-480x480.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-40k_pascal-context-59-480x480.py b/configs/hrnet/fcn_hr18s_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..b616fad8c2a55617d86f6fff82a898e026fdc57f --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-40k_pascal-context-59-480x480.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-40k_voc12aug-512x512.py b/configs/hrnet/fcn_hr18s_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b10b282dd8a999f18e67f931567d821ad1536292 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-40k_voc12aug-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-80k_ade20k-512x512.py b/configs/hrnet/fcn_hr18s_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..f9f49360bf27a1b7fcd29ce6645a95d1298548d2 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-80k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-80k_isaid-896x896.py b/configs/hrnet/fcn_hr18s_4xb4-80k_isaid-896x896.py new file mode 100644 index 0000000000000000000000000000000000000000..ab2d2414dd9360368e92ae9d5882b644d2ba02ec --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-80k_isaid-896x896.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-80k_isaid-896x896.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-80k_loveda-512x512.py b/configs/hrnet/fcn_hr18s_4xb4-80k_loveda-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..dd17076c3fcb0ba1d2729f3a5e189f31cec2d9ad --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-80k_loveda-512x512.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-80k_loveda-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-80k_pascal-context-480x480.py b/configs/hrnet/fcn_hr18s_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..b7b52331c77557f2956f26d7e274511ecdb8e6d5 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-80k_pascal-context-480x480.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-80k_pascal-context-59-480x480.py b/configs/hrnet/fcn_hr18s_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..ccf1040d1368ab46cede7e3d7c0d3d496436336f --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-80k_pascal-context-59-480x480.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-80k_potsdam-512x512.py b/configs/hrnet/fcn_hr18s_4xb4-80k_potsdam-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..3a5726f5d184d5ca0be19e5e4a6b84a57d6fec35 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-80k_potsdam-512x512.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-80k_potsdam-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_4xb4-80k_vaihingen-512x512.py b/configs/hrnet/fcn_hr18s_4xb4-80k_vaihingen-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..720c1732b074cd282b9c2cc63517b7b354166e11 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_4xb4-80k_vaihingen-512x512.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4xb4-80k_vaihingen-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py b/configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4aa5d94d1e8e30716c177c48f5571f958b7596bc --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb2-160k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb2-40k_cityscapes-512x1024.py b/configs/hrnet/fcn_hr48_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..7cb795250d027fc2a251a358fe19e5173386e934 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb2-40k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb2-80k_cityscapes-512x1024.py b/configs/hrnet/fcn_hr48_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..3e2ce034b2f23e59c5c2334bd7d6f2d6b36892c4 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py b/configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..89b1f046510a9e9cff0ee259b75fd3414f8aa9b3 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-20k_voc12aug-512x512.py b/configs/hrnet/fcn_hr48_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..7ca38a9a79ae77794befad8933fbe9282256e5d3 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-20k_voc12aug-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-480x480.py b/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..379be1d67e8a97c6c5a6a24b87c467e27063febd --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-40k_pascal-context-480x480.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-59-480x480.py b/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..12730dd53376bc29f30b998c98688810573fb769 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-40k_pascal-context-59-480x480.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-40k_voc12aug-512x512.py b/configs/hrnet/fcn_hr48_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..3e1b920c59c28c78e8750780359e1ecb432bb686 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-40k_voc12aug-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-80k_ade20k-512x512.py b/configs/hrnet/fcn_hr48_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..14fd663e87407cb13dc936a6c09fec7280722fd2 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-80k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-80k_isaid-896x896.py b/configs/hrnet/fcn_hr48_4xb4-80k_isaid-896x896.py new file mode 100644 index 0000000000000000000000000000000000000000..81815efa8d00dd559590abb1336a856014d93186 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-80k_isaid-896x896.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-80k_isaid-896x896.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-80k_loveda-512x512.py b/configs/hrnet/fcn_hr48_4xb4-80k_loveda-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..34d23af1634946f3b90911a12c0ff5733c920e64 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-80k_loveda-512x512.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-80k_loveda-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-480x480.py b/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..4d193d90423cfa7a104f8103224829049953dd33 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-80k_pascal-context-480x480.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-59-480x480.py b/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..d8b4c4aa8e26db6b8dab81448d894690a787523b --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-80k_pascal-context-59-480x480.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-80k_potsdam-512x512.py b/configs/hrnet/fcn_hr48_4xb4-80k_potsdam-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..58a650004dd96934cbaa315a0b7c624a901c3508 --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-80k_potsdam-512x512.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-80k_potsdam-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_4xb4-80k_vaihingen-512x512.py b/configs/hrnet/fcn_hr48_4xb4-80k_vaihingen-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..db91ed83ef7d6a08ded59aa8167dec82929acecc --- /dev/null +++ b/configs/hrnet/fcn_hr48_4xb4-80k_vaihingen-512x512.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4xb4-80k_vaihingen-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/metafile.yaml b/configs/hrnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11c30165a535454bf26174b2fb800cb60f99b761 --- /dev/null +++ b/configs/hrnet/metafile.yaml @@ -0,0 +1,874 @@ +Models: +- Name: fcn_hr18s_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.86 + mIoU(ms+flip): 75.91 + Config: configs/hrnet/fcn_hr18s_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.19 + mIoU(ms+flip): 78.92 + Config: configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.48 + mIoU(ms+flip): 79.69 + Config: configs/hrnet/fcn_hr48_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.31 + mIoU(ms+flip): 77.48 + Config: configs/hrnet/fcn_hr18s_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.65 + mIoU(ms+flip): 80.35 + Config: configs/hrnet/fcn_hr18_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.93 + mIoU(ms+flip): 80.72 + Config: configs/hrnet/fcn_hr48_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb2-160k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.31 + mIoU(ms+flip): 78.31 + Config: configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb2-160k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.8 + mIoU(ms+flip): 80.74 + Config: configs/hrnet/fcn_hr18_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb2-160k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.65 + mIoU(ms+flip): 81.92 + Config: configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 31.38 + mIoU(ms+flip): 32.45 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 3.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.27 + mIoU(ms+flip): 37.28 + Config: configs/hrnet/fcn_hr18_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 4.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910-6c9382c0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.9 + mIoU(ms+flip): 43.27 + Config: configs/hrnet/fcn_hr48_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 8.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 33.07 + mIoU(ms+flip): 34.56 + Config: configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739-f1e7c2e7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.79 + mIoU(ms+flip): 38.58 + Config: configs/hrnet/fcn_hr18_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.02 + mIoU(ms+flip): 43.86 + Config: configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 65.5 + mIoU(ms+flip): 68.89 + Config: configs/hrnet/fcn_hr18s_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910-0aceadb4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.3 + mIoU(ms+flip): 74.71 + Config: configs/hrnet/fcn_hr18_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.87 + mIoU(ms+flip): 78.58 + Config: configs/hrnet/fcn_hr48_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 66.61 + mIoU(ms+flip): 70.0 + Config: configs/hrnet/fcn_hr18s_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.9 + mIoU(ms+flip): 75.59 + Config: configs/hrnet/fcn_hr18_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.24 + mIoU(ms+flip): 78.49 + Config: configs/hrnet/fcn_hr48_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-40k_pascal-context-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 45.14 + mIoU(ms+flip): 47.42 + Config: configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_pascal-context-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 45.84 + mIoU(ms+flip): 47.84 + Config: configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-40k_pascal-context-59-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 50.33 + mIoU(ms+flip): 52.83 + Config: configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59_20210410_122738-b808b8b2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59-20210410_122738.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_pascal-context-59-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 51.12 + mIoU(ms+flip): 53.56 + Config: configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59_20210411_003240-3ae7081e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59-20210411_003240.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_loveda-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 49.28 + mIoU(ms+flip): 49.42 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.59 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228-60a86a7a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_loveda-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.81 + mIoU(ms+flip): 50.95 + Config: configs/hrnet/fcn_hr18_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.76 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952-93d9c3b3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_loveda-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 51.42 + mIoU(ms+flip): 51.64 + Config: configs/hrnet/fcn_hr48_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756-67072f55.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_potsdam-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 77.64 + mIoU(ms+flip): 78.8 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.58 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_potsdam-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.26 + mIoU(ms+flip): 79.24 + Config: configs/hrnet/fcn_hr18_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.76 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_potsdam-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.39 + mIoU(ms+flip): 79.34 + Config: configs/hrnet/fcn_hr48_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_vaihingen-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 71.81 + mIoU(ms+flip): 73.1 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.58 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_vaihingen-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.57 + mIoU(ms+flip): 74.09 + Config: configs/hrnet/fcn_hr18_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.76 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_vaihingen-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.5 + mIoU(ms+flip): 73.52 + Config: configs/hrnet/fcn_hr48_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_isaid-896x896 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 62.3 + mIoU(ms+flip): 62.97 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 4.95 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603-3cc0769b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_isaid-896x896 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 65.06 + mIoU(ms+flip): 65.6 + Config: configs/hrnet/fcn_hr18_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 8.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230-49bf752e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_isaid-896x896 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 67.8 + mIoU(ms+flip): 68.53 + Config: configs/hrnet/fcn_hr48_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 16.89 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643-547fc420.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch diff --git a/configs/icnet/README.md b/configs/icnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fa2327fc3910804bd520526dcca1fe14d4a5ae81 --- /dev/null +++ b/configs/icnet/README.md @@ -0,0 +1,56 @@ +# ICNet + +> [ICNet for Real-time Semantic Segmentation on High-resolution Images](https://arxiv.org/abs/1704.08545) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We focus on the challenging task of real-time semantic segmentation in this paper. It finds many practical applications and yet is with fundamental difficulty of reducing a large portion of computation for pixel-wise label inference. We propose an image cascade network (ICNet) that incorporates multi-resolution branches under proper label guidance to address this challenge. We provide in-depth analysis of our framework and introduce the cascade feature fusion unit to quickly achieve high-quality segmentation. Our system yields real-time inference on a single GPU card with decent quality results evaluated on challenging datasets like Cityscapes, CamVid and COCO-Stuff. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ICNet | R-18-D8 | 832x832 | 80000 | 1.70 | 27.12 | V100 | 68.14 | 70.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r18-d8_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json) | +| ICNet | R-18-D8 | 832x832 | 160000 | - | - | V100 | 71.64 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r18-d8_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json) | +| ICNet (in1k-pre) | R-18-D8 | 832x832 | 80000 | - | - | V100 | 72.51 | 74.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json) | +| ICNet (in1k-pre) | R-18-D8 | 832x832 | 160000 | - | - | V100 | 74.43 | 76.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json) | +| ICNet | R-50-D8 | 832x832 | 80000 | 2.53 | 20.08 | V100 | 68.91 | 69.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json) | +| ICNet | R-50-D8 | 832x832 | 160000 | - | - | V100 | 73.82 | 75.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r50-d8_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json) | +| ICNet (in1k-pre) | R-50-D8 | 832x832 | 80000 | - | - | V100 | 74.58 | 76.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json) | +| ICNet (in1k-pre) | R-50-D8 | 832x832 | 160000 | - | - | V100 | 76.29 | 78.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json) | +| ICNet | R-101-D8 | 832x832 | 80000 | 3.08 | 16.95 | V100 | 70.28 | 71.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r101-d8_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json) | +| ICNet | R-101-D8 | 832x832 | 160000 | - | - | V100 | 73.80 | 76.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r101-d8_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json) | +| ICNet (in1k-pre) | R-101-D8 | 832x832 | 80000 | - | - | V100 | 75.57 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json) | +| ICNet (in1k-pre) | R-101-D8 | 832x832 | 160000 | - | - | V100 | 76.15 | 77.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json) | + +Note: `in1k-pre` means pretrained model is used. + +## Citation + +```bibtext +@inproceedings{zhao2018icnet, + title={Icnet for real-time semantic segmentation on high-resolution images}, + author={Zhao, Hengshuang and Qi, Xiaojuan and Shen, Xiaoyong and Shi, Jianping and Jia, Jiaya}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={405--420}, + year={2018} +} +``` diff --git a/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py b/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..a6840a1155ff713a58f51435284b77a3b6d502f9 --- /dev/null +++ b/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py @@ -0,0 +1,7 @@ +_base_ = './icnet_r50-d8_4xb2-160k_cityscapes-832x832.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c')))) diff --git a/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py b/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..ca81df8c7b51e737b740f703998b0ced35028be2 --- /dev/null +++ b/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py @@ -0,0 +1,7 @@ +_base_ = './icnet_r50-d8_4xb2-80k_cityscapes-832x832.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c')))) diff --git a/configs/icnet/icnet_r101-d8_4xb2-160k_cityscapes-832x832.py b/configs/icnet/icnet_r101-d8_4xb2-160k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..ef60446bc57cd27810f6f1628c47d667cbaddb1a --- /dev/null +++ b/configs/icnet/icnet_r101-d8_4xb2-160k_cityscapes-832x832.py @@ -0,0 +1,2 @@ +_base_ = './icnet_r50-d8_4xb2-160k_cityscapes-832x832.py' +model = dict(backbone=dict(backbone_cfg=dict(depth=101))) diff --git a/configs/icnet/icnet_r101-d8_4xb2-80k_cityscapes-832x832.py b/configs/icnet/icnet_r101-d8_4xb2-80k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..5173d2d6f820ff7c9c479f4069d24b87504d5bed --- /dev/null +++ b/configs/icnet/icnet_r101-d8_4xb2-80k_cityscapes-832x832.py @@ -0,0 +1,2 @@ +_base_ = './icnet_r50-d8_4xb2-80k_cityscapes-832x832.py' +model = dict(backbone=dict(backbone_cfg=dict(depth=101))) diff --git a/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py b/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..5f72daab65727489639df7401d4cb5dbfee8fec1 --- /dev/null +++ b/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py @@ -0,0 +1,8 @@ +_base_ = './icnet_r50-d8_4xb2-160k_cityscapes-832x832.py' +model = dict( + backbone=dict( + layer_channels=(128, 512), + backbone_cfg=dict( + depth=18, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) diff --git a/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py b/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..2fc79ab1977b0c4c545f91d83928a5894c3369e6 --- /dev/null +++ b/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py @@ -0,0 +1,8 @@ +_base_ = './icnet_r50-d8_4xb2-80k_cityscapes-832x832.py' +model = dict( + backbone=dict( + layer_channels=(128, 512), + backbone_cfg=dict( + depth=18, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) diff --git a/configs/icnet/icnet_r18-d8_4xb2-160k_cityscapes-832x832.py b/configs/icnet/icnet_r18-d8_4xb2-160k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..2c70e948106250c9a9d5604393a3ee9d8333db6f --- /dev/null +++ b/configs/icnet/icnet_r18-d8_4xb2-160k_cityscapes-832x832.py @@ -0,0 +1,3 @@ +_base_ = './icnet_r50-d8_4xb2-160k_cityscapes-832x832.py' +model = dict( + backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18))) diff --git a/configs/icnet/icnet_r18-d8_4xb2-80k_cityscapes-832x832.py b/configs/icnet/icnet_r18-d8_4xb2-80k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..23c7ac29900bf3991b7b54da1ec6fff13acacf02 --- /dev/null +++ b/configs/icnet/icnet_r18-d8_4xb2-80k_cityscapes-832x832.py @@ -0,0 +1,3 @@ +_base_ = './icnet_r50-d8_4xb2-80k_cityscapes-832x832.py' +model = dict( + backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18))) diff --git a/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py b/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..f9ab863402a3c5a6d99176327618c9a188cc3faf --- /dev/null +++ b/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py @@ -0,0 +1,6 @@ +_base_ = './icnet_r50-d8_4xb2-160k_cityscapes-832x832.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py b/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..9a085d4f6120711c0e0da968a01a97fe1d1240ef --- /dev/null +++ b/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py @@ -0,0 +1,6 @@ +_base_ = './icnet_r50-d8_4xb2-80k_cityscapes-832x832.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/configs/icnet/icnet_r50-d8_4xb2-160k_cityscapes-832x832.py b/configs/icnet/icnet_r50-d8_4xb2-160k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..1b7b1884f79520a03eb277a50c70c7a1f4c0c755 --- /dev/null +++ b/configs/icnet/icnet_r50-d8_4xb2-160k_cityscapes-832x832.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/icnet_r50-d8.py', + '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (832, 832) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py b/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py new file mode 100644 index 0000000000000000000000000000000000000000..001dbcaf7f6ee0a78f5832b4fa28887d63fa57a3 --- /dev/null +++ b/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/icnet_r50-d8.py', + '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (832, 832) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/icnet/metafile.yaml b/configs/icnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d843ee4b66ceffd05fc3dc1c67c7a45b50f626f --- /dev/null +++ b/configs/icnet/metafile.yaml @@ -0,0 +1,298 @@ +Collections: +- Name: ICNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + README: configs/icnet/README.md + Frameworks: + - PyTorch +Models: +- Name: icnet_r18-d8_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.14 + mIoU(ms+flip): 70.16 + Config: configs/icnet/icnet_r18-d8_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - ICNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r18-d8_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.64 + mIoU(ms+flip): 74.18 + Config: configs/icnet/icnet_r18-d8_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - ICNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.51 + mIoU(ms+flip): 74.78 + Config: configs/icnet/icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.43 + mIoU(ms+flip): 76.72 + Config: configs/icnet/icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r50-d8_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.91 + mIoU(ms+flip): 69.72 + Config: configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ICNet + Training Resources: 4x V100 GPUS + Memory (GB): 2.53 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r50-d8_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.82 + mIoU(ms+flip): 75.67 + Config: configs/icnet/icnet_r50-d8_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ICNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.58 + mIoU(ms+flip): 76.41 + Config: configs/icnet/icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.29 + mIoU(ms+flip): 78.09 + Config: configs/icnet/icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r101-d8_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.28 + mIoU(ms+flip): 71.95 + Config: configs/icnet/icnet_r101-d8_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ICNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.08 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r101-d8_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.8 + mIoU(ms+flip): 76.1 + Config: configs/icnet/icnet_r101-d8_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ICNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.57 + mIoU(ms+flip): 77.86 + Config: configs/icnet/icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.15 + mIoU(ms+flip): 77.98 + Config: configs/icnet/icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch diff --git a/configs/isanet/README.md b/configs/isanet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c11744ffef8c6161ef2d721e98a81067ff4cf9b0 --- /dev/null +++ b/configs/isanet/README.md @@ -0,0 +1,80 @@ +# ISANet + +> [Interlaced Sparse Self-Attention for Semantic Segmentation](https://arxiv.org/abs/1907.12273) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this paper, we present a so-called interlaced sparse self-attention approach to improve the efficiency of the \\emph{self-attention} mechanism for semantic segmentation. The main idea is that we factorize the dense affinity matrix as the product of two sparse affinity matrices. There are two successive attention modules each estimating a sparse affinity matrix. The first attention module is used to estimate the affinities within a subset of positions that have long spatial interval distances and the second attention module is used to estimate the affinities within a subset of positions that have short spatial interval distances. These two attention modules are designed so that each position is able to receive the information from all the other positions. In contrast to the original self-attention module, our approach decreases the computation and memory complexity substantially especially when processing high-resolution feature maps. We empirically verify the effectiveness of our approach on six challenging semantic segmentation benchmarks. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | -----------------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ISANet | R-50-D8 | 512x1024 | 40000 | 5.869 | 2.91 | V100 | 78.49 | 79.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739.log.json) | +| ISANet | R-50-D8 | 512x1024 | 80000 | 5.869 | 2.91 | V100 | 78.68 | 80.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202.log.json) | +| ISANet | R-50-D8 | 769x769 | 40000 | 6.759 | 1.54 | V100 | 78.70 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200.log.json) | +| ISANet | R-50-D8 | 769x769 | 80000 | 6.759 | 1.54 | V100 | 79.29 | 80.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126.log.json) | +| ISANet | R-101-D8 | 512x1024 | 40000 | 9.425 | 2.35 | V100 | 79.58 | 81.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553.log.json) | +| ISANet | R-101-D8 | 512x1024 | 80000 | 9.425 | 2.35 | V100 | 80.32 | 81.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243.log.json) | +| ISANet | R-101-D8 | 769x769 | 40000 | 10.815 | 0.92 | V100 | 79.68 | 80.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320.log.json) | +| ISANet | R-101-D8 | 769x769 | 80000 | 10.815 | 0.92 | V100 | 80.61 | 81.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------: | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ISANet | R-50-D8 | 512x512 | 80000 | 9.0 | 22.55 | V100 | 41.12 | 42.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557.log.json) | +| ISANet | R-50-D8 | 512x512 | 160000 | 9.0 | 22.55 | V100 | 42.59 | 43.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850.log.json) | +| ISANet | R-101-D8 | 512x512 | 80000 | 12.562 | 10.56 | V100 | 43.51 | 44.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056.log.json) | +| ISANet | R-101-D8 | 512x512 | 160000 | 12.562 | 10.56 | V100 | 43.80 | 45.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ISANet | R-50-D8 | 512x512 | 20000 | 5.9 | 23.08 | V100 | 76.78 | 77.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838.log.json) | +| ISANet | R-50-D8 | 512x512 | 40000 | 5.9 | 23.08 | V100 | 76.20 | 77.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349.log.json) | +| ISANet | R-101-D8 | 512x512 | 20000 | 9.465 | 7.42 | V100 | 78.46 | 79.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805.log.json) | +| ISANet | R-101-D8 | 512x512 | 40000 | 9.465 | 7.42 | V100 | 78.12 | 79.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814.log.json) | + +## Citation + +```bibetex +@article{huang2019isa, + title={Interlaced Sparse Self-Attention for Semantic Segmentation}, + author={Huang, Lang and Yuan, Yuhui and Guo, Jianyuan and Zhang, Chao and Chen, Xilin and Wang, Jingdong}, + journal={arXiv preprint arXiv:1907.12273}, + year={2019} +} +``` + +The technical report above is also presented at: + +```bibetex +@article{yuan2021ocnet, + title={OCNet: Object Context for Semantic Segmentation}, + author={Yuan, Yuhui and Huang, Lang and Guo, Jianyuan and Zhang, Chao and Chen, Xilin and Wang, Jingdong}, + journal={International Journal of Computer Vision}, + pages={1--24}, + year={2021}, + publisher={Springer} +} +``` diff --git a/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6093aeb4f70b7be3d72557180e36901dfe737892 --- /dev/null +++ b/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..dc14c76dfb682ee85bfcbee26bb17702eeade400 --- /dev/null +++ b/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..1735f89d4183f1448bd360fde2dc6ec6a3d2dbb2 --- /dev/null +++ b/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..b1a6371b7678829a69fe489a522966a64ddf117e --- /dev/null +++ b/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/isanet/isanet_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/isanet/isanet_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c2fb09e374bb6e0a99279e80ef750df639ec70b3 --- /dev/null +++ b/configs/isanet/isanet_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/isanet/isanet_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/isanet/isanet_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..7c225cfe3a879fd41ea711300da61c8b8219fd32 --- /dev/null +++ b/configs/isanet/isanet_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/isanet/isanet_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/isanet/isanet_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5e86ee584f439b1fc07fc8a1bbeab7c69cdac52f --- /dev/null +++ b/configs/isanet/isanet_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/isanet/isanet_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/isanet/isanet_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..090e86f243cc140ceaaf6fc50ce4f33614baeaba --- /dev/null +++ b/configs/isanet/isanet_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..f03365e22405022ae94418da79a39c68bcaa870a --- /dev/null +++ b/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..f073a7b691c76a2b0c1d1de18d97cecaa975806f --- /dev/null +++ b/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4be445d5cf91436796aa5e274c57da113f7f9a06 --- /dev/null +++ b/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..0278ad852a262d9f6eacfea309661327074535c5 --- /dev/null +++ b/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/isanet/isanet_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/isanet/isanet_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1f4af8d0ba41367fc17fd5fec0d26a388a9a0afc --- /dev/null +++ b/configs/isanet/isanet_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/isanet/isanet_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/isanet/isanet_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..591df42a87ba73cbd6bfc49224a223e131bc1021 --- /dev/null +++ b/configs/isanet/isanet_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/isanet/isanet_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/isanet/isanet_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a59879b0fc8fbf71b3200b3997a131c0ff84f9df --- /dev/null +++ b/configs/isanet/isanet_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/isanet/isanet_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/isanet/isanet_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..7df05c37814201baab2d115833ff84790180fb14 --- /dev/null +++ b/configs/isanet/isanet_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/isanet/metafile.yaml b/configs/isanet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad394eabb2d8ab71395da265cabecce52ac5c225 --- /dev/null +++ b/configs/isanet/metafile.yaml @@ -0,0 +1,399 @@ +Collections: +- Name: ISANet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + README: configs/isanet/README.md + Frameworks: + - PyTorch +Models: +- Name: isanet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.49 + mIoU(ms+flip): 79.44 + Config: configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.869 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.68 + mIoU(ms+flip): 80.25 + Config: configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.869 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.7 + mIoU(ms+flip): 80.28 + Config: configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 6.759 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.29 + mIoU(ms+flip): 80.53 + Config: configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 6.759 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.58 + mIoU(ms+flip): 81.05 + Config: configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.425 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.32 + mIoU(ms+flip): 81.58 + Config: configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.425 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.68 + mIoU(ms+flip): 80.95 + Config: configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.815 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.61 + mIoU(ms+flip): 81.59 + Config: configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.815 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.12 + mIoU(ms+flip): 42.35 + Config: configs/isanet/isanet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.59 + mIoU(ms+flip): 43.07 + Config: configs/isanet/isanet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.51 + mIoU(ms+flip): 44.38 + Config: configs/isanet/isanet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 12.562 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.8 + mIoU(ms+flip): 45.4 + Config: configs/isanet/isanet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 12.562 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.78 + mIoU(ms+flip): 77.79 + Config: configs/isanet/isanet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.2 + mIoU(ms+flip): 77.22 + Config: configs/isanet/isanet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.46 + mIoU(ms+flip): 79.16 + Config: configs/isanet/isanet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.465 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.12 + mIoU(ms+flip): 79.04 + Config: configs/isanet/isanet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.465 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch diff --git a/configs/knet/README.md b/configs/knet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1f3f2ae268ecda8fbc70579b77b5296b64433917 --- /dev/null +++ b/configs/knet/README.md @@ -0,0 +1,52 @@ +# K-Net + +> [K-Net: Towards Unified Image Segmentation](https://arxiv.org/abs/2106.14855) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Semantic, instance, and panoptic segmentations have been addressed using different and specialized frameworks despite their underlying connections. This paper presents a unified, simple, and effective framework for these essentially similar tasks. The framework, named K-Net, segments both instances and semantic categories consistently by a group of learnable kernels, where each kernel is responsible for generating a mask for either a potential instance or a stuff class. To remedy the difficulties of distinguishing various instances, we propose a kernel update strategy that enables each kernel dynamic and conditional on its meaningful group in the input image. K-Net can be trained in an end-to-end manner with bipartite matching, and its training and inference are naturally NMS-free and box-free. Without bells and whistles, K-Net surpasses all previous published state-of-the-art single-model results of panoptic segmentation on MS COCO test-dev split and semantic segmentation on ADE20K val split with 55.2% PQ and 54.3% mIoU, respectively. Its instance segmentation performance is also on par with Cascade Mask R-CNN on MS COCO with 60%-90% faster inference speeds. Code and models will be released at [this https URL](https://github.com/ZwwWayne/K-Net/). + + + +
+ +
+ +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | -------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| KNet + FCN | R-50-D8 | 512x512 | 80000 | 7.01 | 19.24 | V100 | 43.60 | 45.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751-abcab920.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751.log.json) | +| KNet + PSPNet | R-50-D8 | 512x512 | 80000 | 6.98 | 20.04 | V100 | 44.18 | 45.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634-d2c72240.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634.log.json) | +| KNet + DeepLabV3 | R-50-D8 | 512x512 | 80000 | 7.42 | 12.10 | V100 | 45.06 | 46.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642-00c8fbeb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642.log.json) | +| KNet + UperNet | R-50-D8 | 512x512 | 80000 | 7.34 | 17.11 | V100 | 43.45 | 44.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657-215753b0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657.log.json) | +| KNet + UperNet | Swin-T | 512x512 | 80000 | 7.57 | 15.56 | V100 | 45.84 | 46.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059-7545e1dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059.log.json) | +| KNet + UperNet | Swin-L | 512x512 | 80000 | 13.5 | 8.29 | V100 | 52.05 | 53.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559-d8da9a90.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559.log.json) | +| KNet + UperNet | Swin-L | 640x640 | 80000 | 13.54 | 8.29 | V100 | 52.21 | 53.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747-8787fc71.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747.log.json) | + +Note: + +- All experiments of K-Net are implemented with 8 V100 (32G) GPUs with 2 samplers per GPU. + +# Citation + +```bibtex +@inproceedings{zhang2021knet, + title={{K-Net: Towards} Unified Image Segmentation}, + author={Wenwei Zhang and Jiangmiao Pang and Kai Chen and Chen Change Loy}, + year={2021}, + booktitle={NeurIPS}, +} +``` diff --git a/configs/knet/knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512.py b/configs/knet/knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..7946cca0673ff3b3da4bad21472ebf6c07b6fa4d --- /dev/null +++ b/configs/knet/knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512.py @@ -0,0 +1,111 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + size=crop_size, + seg_pad_val=255) +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='IterativeDecodeHead', + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=2048, + in_channels=512, + out_channels=512, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_updator_cfg=dict( + type='KernelUpdator', + in_channels=256, + feat_channels=256, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))) for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='ASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0005), + clip_grad=dict(max_norm=1, norm_type=2)) +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] +# In K-Net implementation we use batch size 2 per GPU as default +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/knet/knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512.py b/configs/knet/knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..497cd04bf56cc85ba2da5eccb072b2b5a8883c82 --- /dev/null +++ b/configs/knet/knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512.py @@ -0,0 +1,112 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + size=crop_size, + seg_pad_val=255) +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='IterativeDecodeHead', + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=2048, + in_channels=512, + out_channels=512, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_updator_cfg=dict( + type='KernelUpdator', + in_channels=256, + feat_channels=256, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))) for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0005), + clip_grad=dict(max_norm=1, norm_type=2)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] +# In K-Net implementation we use batch size 2 per GPU as default +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/knet/knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512.py b/configs/knet/knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b918671bfcf28fd2e26a0d70aa0d3ccc3a5b60a3 --- /dev/null +++ b/configs/knet/knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512.py @@ -0,0 +1,110 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + size=crop_size, + seg_pad_val=255) +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='IterativeDecodeHead', + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=2048, + in_channels=512, + out_channels=512, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_updator_cfg=dict( + type='KernelUpdator', + in_channels=256, + feat_channels=256, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))) for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0005), + clip_grad=dict(max_norm=1, norm_type=2)) +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] +# In K-Net implementation we use batch size 2 per GPU as default +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/knet/knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py b/configs/knet/knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a0a66c57ddb970cbd4a47728319171534acd8093 --- /dev/null +++ b/configs/knet/knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py @@ -0,0 +1,111 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + size=crop_size, + seg_pad_val=255) +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 + +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='IterativeDecodeHead', + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=2048, + in_channels=512, + out_channels=512, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_updator_cfg=dict( + type='KernelUpdator', + in_channels=256, + feat_channels=256, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))) for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='UPerHead', + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0005), + clip_grad=dict(max_norm=1, norm_type=2)) +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] +# In K-Net implementation we use batch size 2 per GPU as default +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512.py b/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c6f4eb6ae2764c1961f1be3ad1cbaa70d63067aa --- /dev/null +++ b/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512.py @@ -0,0 +1,21 @@ +_base_ = 'knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py' + +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220308-d5bdebaf.pth' # noqa +# model settings +model = dict( + pretrained=checkpoint_file, + backbone=dict( + embed_dims=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=7, + use_abs_pos_embed=False, + drop_path_rate=0.3, + patch_norm=True), + decode_head=dict( + kernel_generate_head=dict(in_channels=[192, 384, 768, 1536])), + auxiliary_head=dict(in_channels=768)) +# In K-Net implementation we use batch size 2 per GPU as default +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640.py b/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..84c3d8cc6a57e8f72e5b6ba27bb648d1fe53846e --- /dev/null +++ b/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640.py @@ -0,0 +1,57 @@ +_base_ = 'knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py' + +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220308-d5bdebaf.pth' # noqa +# model settings +crop_size = (640, 640) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + size=crop_size, + seg_pad_val=255) +model = dict( + data_preprocessor=data_preprocessor, + pretrained=checkpoint_file, + backbone=dict( + embed_dims=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=7, + use_abs_pos_embed=False, + drop_path_rate=0.4, + patch_norm=True), + decode_head=dict( + kernel_generate_head=dict(in_channels=[192, 384, 768, 1536])), + auxiliary_head=dict(in_channels=768)) + +crop_size = (640, 640) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(2048, 640), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 640), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader +# In K-Net implementation we use batch size 2 per GPU as default +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/knet/knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py b/configs/knet/knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a7acec49963570ec662e9de0b2834a384ddd50c3 --- /dev/null +++ b/configs/knet/knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py @@ -0,0 +1,63 @@ +_base_ = 'knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py' + +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220308-f41b89d3.pth' # noqa + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 + +model = dict( + type='EncoderDecoder', + pretrained=checkpoint_file, + backbone=dict( + _delete_=True, + type='SwinTransformer', + embed_dims=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + use_abs_pos_embed=False, + patch_norm=True, + out_indices=(0, 1, 2, 3)), + decode_head=dict( + kernel_generate_head=dict(in_channels=[96, 192, 384, 768])), + auxiliary_head=dict(in_channels=384)) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + # modify learning rate following the official implementation of Swin Transformer # noqa + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.0005), + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + }), + clip_grad=dict(max_norm=1, norm_type=2)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] +# In K-Net implementation we use batch size 2 per GPU as default +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/knet/metafile.yaml b/configs/knet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f4ab796096f545d7a47a84b2568c4393004caed --- /dev/null +++ b/configs/knet/metafile.yaml @@ -0,0 +1,188 @@ +Collections: +- Name: KNet + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + README: configs/knet/README.md + Frameworks: + - PyTorch +Models: +- Name: knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.6 + mIoU(ms+flip): 45.12 + Config: configs/knet/knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - KNet + - FCN + Training Resources: 8x V100 GPUS + Memory (GB): 7.01 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751-abcab920.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.18 + mIoU(ms+flip): 45.58 + Config: configs/knet/knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - KNet + - PSPNet + Training Resources: 8x V100 GPUS + Memory (GB): 6.98 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634-d2c72240.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.06 + mIoU(ms+flip): 46.11 + Config: configs/knet/knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - KNet + - DeepLabV3 + Training Resources: 8x V100 GPUS + Memory (GB): 7.42 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642-00c8fbeb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.45 + mIoU(ms+flip): 44.07 + Config: configs/knet/knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - KNet + - UperNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.34 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657-215753b0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.84 + mIoU(ms+flip): 46.27 + Config: configs/knet/knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-T + - KNet + - UperNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.57 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059-7545e1dc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.05 + mIoU(ms+flip): 53.24 + Config: configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-L + - KNet + - UperNet + Training Resources: 8x V100 GPUS + Memory (GB): 13.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559-d8da9a90.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.21 + mIoU(ms+flip): 53.34 + Config: configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-L + - KNet + - UperNet + Training Resources: 8x V100 GPUS + Memory (GB): 13.54 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747-8787fc71.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch diff --git a/configs/mae/README.md b/configs/mae/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d14e3830be248b4556f0e0c2496bc3a94b5d6974 --- /dev/null +++ b/configs/mae/README.md @@ -0,0 +1,82 @@ +# MAE + +> [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +This paper shows that masked autoencoders (MAE) are scalable self-supervised learners for computer vision. Our MAE approach is simple: we mask random patches of the input image and reconstruct the missing pixels. It is based on two core designs. First, we develop an asymmetric encoder-decoder architecture, with an encoder that operates only on the visible subset of patches (without mask tokens), along with a lightweight decoder that reconstructs the original image from the latent representation and mask tokens. Second, we find that masking a high proportion of the input image, e.g., 75%, yields a nontrivial and meaningful self-supervisory task. Coupling these two designs enables us to train large models efficiently and effectively: we accelerate training (by 3x or more) and improve accuracy. Our scalable approach allows for learning high-capacity models that generalize well: e.g., a vanilla ViT-Huge model achieves the best accuracy (87.8%) among methods that use only ImageNet-1K data. Transfer performance in downstream tasks outperforms supervised pre-training and shows promising scaling behavior. + + + +
+ +
+ +## Usage + +To use other repositories' pre-trained models, it is necessary to convert keys. + +We provide a script [`beit2mmseg.py`](../../tools/model_converters/beit2mmseg.py) in the tools directory to convert the key of MAE model from [the official repo](https://github.com/facebookresearch/mae) to MMSegmentation style. + +```shell +python tools/model_converters/beit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/beit2mmseg.py https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth pretrain/mae_pretrain_vit_base_mmcls.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +In our default setting, pretrained models could be defined below: + +| pretrained models | original models | +| ------------------------------- | ------------------------------------------------------------------------------------------------ | +| mae_pretrain_vit_base_mmcls.pth | ['mae_pretrain_vit_base'](https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth) | + +Verify the single-scale results of the model: + +```shell +sh tools/dist_test.sh \ +configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py \ +upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth $GPUS --eval mIoU +``` + +Since relative position embedding requires the input length and width to be equal, the sliding window is adopted for multi-scale inference. So we set min_size=512, that is, the shortest edge is 512. So the multi-scale inference of config is performed separately, instead of '--aug-test'. For multi-scale inference: + +```shell +sh tools/dist_test.sh \ +configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py \ +upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth $GPUS --eval mIoU +``` + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ----------- | ----------------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UPerNet | ViT-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 9.96 | 7.14 | V100 | 48.13 | 48.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752.log.json) | + +## Citation + +```bibtex +@article{he2021masked, + title={Masked autoencoders are scalable vision learners}, + author={He, Kaiming and Chen, Xinlei and Xie, Saining and Li, Yanghao and Doll{\'a}r, Piotr and Girshick, Ross}, + journal={arXiv preprint arXiv:2111.06377}, + year={2021} +} +``` diff --git a/configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512-ms.py b/configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512-ms.py new file mode 100644 index 0000000000000000000000000000000000000000..ec32fea54b00a26cb9b045d57ae42208f958a8b2 --- /dev/null +++ b/configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512-ms.py @@ -0,0 +1,16 @@ +_base_ = './mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py' + +test_pipeline = [ + dict(type='LoadImageFromFile'), + # TODO: Refactor 'MultiScaleFlipAug' which supports + # `min_size` feature in `Resize` class + # img_ratios is [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] + # original image scale is (2048, 512) + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py b/configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b8eae174e9b48f39519fa3aad8cf2a044d7e7c7f --- /dev/null +++ b/configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py @@ -0,0 +1,54 @@ +_base_ = [ + '../_base_/models/upernet_mae.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='./pretrain/mae_pretrain_vit_base_mmcls.pth', + backbone=dict( + type='MAE', + img_size=(512, 512), + patch_size=16, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + init_values=1.0, + drop_path_rate=0.1, + out_indices=[3, 5, 7, 11]), + neck=dict(embed_dim=768, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + in_channels=[768, 768, 768, 768], num_classes=150, channels=768), + auxiliary_head=dict(in_channels=768, num_classes=150), + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341))) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=1e-4, betas=(0.9, 0.999), weight_decay=0.05), + paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65), + constructor='LayerDecayOptimizerConstructor') + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] + +# mixed precision +fp16 = dict(loss_scale='dynamic') + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/mae/metafile.yaml b/configs/mae/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..567eafe1317055339b49ddf49d2e98fe77f488a4 --- /dev/null +++ b/configs/mae/metafile.yaml @@ -0,0 +1,25 @@ +Models: +- Name: mae-base_upernet_8xb2-amp-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.13 + mIoU(ms+flip): 48.7 + Config: configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.96 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752.log.json + Paper: + Title: Masked Autoencoders Are Scalable Vision Learners + URL: https://arxiv.org/abs/2111.06377 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.24.0/mmseg/models/backbones/mae.py#L46 + Framework: PyTorch diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c21ab0d0c6009e0786fa659e85323a0850064ba2 --- /dev/null +++ b/configs/mask2former/README.md @@ -0,0 +1,74 @@ +# Mask2Former + +> [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Image segmentation is about grouping pixels with different semantics, e.g., category or instance membership, where each choice of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K). + +### Usage + +- Mask2Former model needs to install [MMDetection](https://github.com/open-mmlab/mmdetection) first. + +```shell +pip install "mmdet>=3.0.0rc4" +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Mask2Former | R-50-D32 | 512x1024 | 90000 | 5.67 | 9.17 | A100 | 80.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802.json) | +| Mask2Former | R-101-D32 | 512x1024 | 90000 | 6.81 | 7.11 | A100 | 80.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628.json)) | +| Mask2Former | Swin-T | 512x1024 | 90000 | 6.36 | 7.18 | A100 | 81.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501.json)) | +| Mask2Former | Swin-S | 512x1024 | 90000 | 8.09 | 5.57 | A100 | 82.57 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802.json)) | +| Mask2Former | Swin-B (in22k) | 512x1024 | 90000 | 10.89 | 4.32 | A100 | 83.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030.json)) | +| Mask2Former | Swin-L (in22k) | 512x1024 | 90000 | 15.83 | 2.86 | A100 | 83.65 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901.json)) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Mask2Former | R-50-D32 | 512x512 | 160000 | 3.31 | 26.59 | A100 | 47.87 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.json)) | +| Mask2Former | R-101-D32 | 512x512 | 160000 | 4.09 | 22.97 | A100 | 48.60 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905.json)) | +| Mask2Former | Swin-T | 512x512 | 160000 | 3826 | 23.82 | A100 | 48.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230.json)) | +| Mask2Former | Swin-S | 512x512 | 160000 | 3.74 | 19.69 | A100 | 51.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905.json)) | +| Mask2Former | Swin-B | 640x640 | 160000 | 5.66 | 12.48 | A100 | 52.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118.json)) | +| Mask2Former | Swin-B (in22k) | 640x640 | 160000 | 5.66 | 12.43 | A100 | 53.90 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230.json)) | +| Mask2Former | Swin-L (in22k) | 640x640 | 160000 | 8.86 | 8.81 | A100 | 56.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933.json)) | + +Note: + +- All experiments of Mask2Former are implemented with 8 A100 GPUs with 2 samplers per GPU. +- As mentioned at [the official repo](https://github.com/facebookresearch/Mask2Former/issues/5), the results of Mask2Former are relatively not stable, the result of Mask2Former(swin-s) on ADE20K dataset in the table is the medium result obtained by training 5 times following the suggestion of the author. +- The ResNet backbones utilized in MaskFormer models are standard `ResNet` rather than `ResNetV1c`. +- Test time augmentation is not supported in MMSegmentation 1.x version yet, we would add "ms+flip" results as soon as possible. + +## Citation + +```bibtex +@inproceedings{cheng2021mask2former, + title={Masked-attention Mask Transformer for Universal Image Segmentation}, + author={Bowen Cheng and Ishan Misra and Alexander G. Schwing and Alexander Kirillov and Rohit Girdhar}, + journal={CVPR}, + year={2022} +} +@inproceedings{cheng2021maskformer, + title={Per-Pixel Classification is Not All You Need for Semantic Segmentation}, + author={Bowen Cheng and Alexander G. Schwing and Alexander Kirillov}, + journal={NeurIPS}, + year={2021} +} +``` diff --git a/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py b/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..48f6c12d13569cdc3a9419a40f2e052891e76cdc --- /dev/null +++ b/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,7 @@ +_base_ = ['./mask2former_r50_8xb2-160k_ade20k-512x512.py'] + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py b/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..275a7dab52c42cecbed66d7e1a9b8e4e97b61170 --- /dev/null +++ b/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = ['./mask2former_r50_8xb2-90k_cityscapes-512x1024.py'] + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py b/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..78cf60510c7007c72c8b1440781a3aeaed255d83 --- /dev/null +++ b/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,200 @@ +_base_ = ['../_base_/default_runtime.py', '../_base_/datasets/ade20k.py'] + +custom_imports = dict(imports='mmdet.models', allow_failed_imports=False) + +crop_size = (512, 512) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=crop_size, + test_cfg=dict(size_divisor=32)) +num_classes = 150 +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='ResNet', + depth=50, + deep_stem=False, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='SyncBN', requires_grad=False), + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + decode_head=dict( + type='Mask2FormerHead', + in_channels=[256, 512, 1024, 2048], + strides=[4, 8, 16, 32], + feat_channels=256, + out_channels=256, + num_classes=num_classes, + num_queries=100, + num_transformer_feat_level=3, + align_corners=False, + pixel_decoder=dict( + type='mmdet.MSDeformAttnPixelDecoder', + num_outs=3, + norm_cfg=dict(type='GN', num_groups=32), + act_cfg=dict(type='ReLU'), + encoder=dict( # DeformableDetrTransformerEncoder + num_layers=6, + layer_cfg=dict( # DeformableDetrTransformerEncoderLayer + self_attn_cfg=dict( # MultiScaleDeformableAttention + embed_dims=256, + num_heads=8, + num_levels=3, + num_points=4, + im2col_step=64, + dropout=0.0, + batch_first=True, + norm_cfg=None, + init_cfg=None), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0.0, + act_cfg=dict(type='ReLU', inplace=True))), + init_cfg=None), + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + init_cfg=None), + enforce_decoder_input_project=False, + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # Mask2FormerTransformerDecoder + return_intermediate=True, + num_layers=9, + layer_cfg=dict( # Mask2FormerTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.0, + dropout_layer=None, + add_identity=True)), + init_cfg=None), + loss_cls=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=False, + loss_weight=2.0, + reduction='mean', + class_weight=[1.0] * num_classes + [0.1]), + loss_mask=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=True, + reduction='mean', + loss_weight=5.0), + loss_dice=dict( + type='mmdet.DiceLoss', + use_sigmoid=True, + activate=True, + reduction='mean', + naive_dice=True, + eps=1.0, + loss_weight=5.0), + train_cfg=dict( + num_points=12544, + oversample_ratio=3.0, + importance_sample_ratio=0.75, + assigner=dict( + type='mmdet.HungarianAssigner', + match_costs=[ + dict(type='mmdet.ClassificationCost', weight=2.0), + dict( + type='mmdet.CrossEntropyLossCost', + weight=5.0, + use_sigmoid=True), + dict( + type='mmdet.DiceCost', + weight=5.0, + pred_act=True, + eps=1.0) + ]), + sampler=dict(type='mmdet.MaskPseudoSampler'))), + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# dataset config +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomChoiceResize', + scales=[int(512 * x * 0.1) for x in range(5, 21)], + resize_type='ResizeShortestEdge', + max_size=2048), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +train_dataloader = dict(batch_size=2, dataset=dict(pipeline=train_pipeline)) + +# optimizer +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +optimizer = dict( + type='AdamW', lr=0.0001, weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999)) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict( + custom_keys={ + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi, + }, + norm_decay_mult=0.0)) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=160000, + by_epoch=False) +] + +# training schedule for 160k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=160000, val_interval=5000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', by_epoch=False, interval=5000, + save_best='mIoU'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +# Default setting for scaling LR automatically +# - `enable` means enable scaling LR automatically +# or not by default. +# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). +auto_scale_lr = dict(enable=False, base_batch_size=16) diff --git a/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py b/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..d2211b66a3d5279462cb8047165e67c85c689039 --- /dev/null +++ b/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py @@ -0,0 +1,197 @@ +_base_ = ['../_base_/default_runtime.py', '../_base_/datasets/cityscapes.py'] + +crop_size = (512, 1024) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=crop_size, + test_cfg=dict(size_divisor=32)) +num_classes = 19 +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='ResNet', + depth=50, + deep_stem=False, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='SyncBN', requires_grad=False), + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + decode_head=dict( + type='Mask2FormerHead', + in_channels=[256, 512, 1024, 2048], + strides=[4, 8, 16, 32], + feat_channels=256, + out_channels=256, + num_classes=num_classes, + num_queries=100, + num_transformer_feat_level=3, + align_corners=False, + pixel_decoder=dict( + type='mmdet.MSDeformAttnPixelDecoder', + num_outs=3, + norm_cfg=dict(type='GN', num_groups=32), + act_cfg=dict(type='ReLU'), + encoder=dict( # DeformableDetrTransformerEncoder + num_layers=6, + layer_cfg=dict( # DeformableDetrTransformerEncoderLayer + self_attn_cfg=dict( # MultiScaleDeformableAttention + embed_dims=256, + num_heads=8, + num_levels=3, + num_points=4, + im2col_step=64, + dropout=0.0, + batch_first=True, + norm_cfg=None, + init_cfg=None), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0.0, + act_cfg=dict(type='ReLU', inplace=True))), + init_cfg=None), + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + init_cfg=None), + enforce_decoder_input_project=False, + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # Mask2FormerTransformerDecoder + return_intermediate=True, + num_layers=9, + layer_cfg=dict( # Mask2FormerTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.0, + dropout_layer=None, + add_identity=True)), + init_cfg=None), + loss_cls=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=False, + loss_weight=2.0, + reduction='mean', + class_weight=[1.0] * num_classes + [0.1]), + loss_mask=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=True, + reduction='mean', + loss_weight=5.0), + loss_dice=dict( + type='mmdet.DiceLoss', + use_sigmoid=True, + activate=True, + reduction='mean', + naive_dice=True, + eps=1.0, + loss_weight=5.0), + train_cfg=dict( + num_points=12544, + oversample_ratio=3.0, + importance_sample_ratio=0.75, + assigner=dict( + type='mmdet.HungarianAssigner', + match_costs=[ + dict(type='mmdet.ClassificationCost', weight=2.0), + dict( + type='mmdet.CrossEntropyLossCost', + weight=5.0, + use_sigmoid=True), + dict( + type='mmdet.DiceCost', + weight=5.0, + pred_act=True, + eps=1.0) + ]), + sampler=dict(type='mmdet.MaskPseudoSampler'))), + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# dataset config +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomChoiceResize', + scales=[int(1024 * x * 0.1) for x in range(5, 21)], + resize_type='ResizeShortestEdge', + max_size=4096), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +# optimizer +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +optimizer = dict( + type='AdamW', lr=0.0001, weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999)) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict( + custom_keys={ + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi, + }, + norm_decay_mult=0.0)) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=90000, + by_epoch=False) +] + +# training schedule for 90k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=90000, val_interval=5000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', by_epoch=False, interval=5000, + save_best='mIoU'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +# Default setting for scaling LR automatically +# - `enable` means enable scaling LR automatically +# or not by default. +# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). +auto_scale_lr = dict(enable=False, base_batch_size=16) diff --git a/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py b/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..b8b1d6cfff0274cd29e358f87caa73663340093d --- /dev/null +++ b/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py @@ -0,0 +1,229 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/ade20k_640x640.py' +] + +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth' # noqa + +crop_size = (640, 640) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=crop_size) +num_classes = 150 + +depths = [2, 2, 18, 2] +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='SwinTransformer', + pretrain_img_size=384, + embed_dims=128, + depths=depths, + num_heads=[4, 8, 16, 32], + window_size=12, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + patch_norm=True, + out_indices=(0, 1, 2, 3), + with_cp=False, + frozen_stages=-1, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + decode_head=dict( + type='Mask2FormerHead', + in_channels=[128, 256, 512, 1024], + strides=[4, 8, 16, 32], + feat_channels=256, + out_channels=256, + num_classes=num_classes, + num_queries=100, + num_transformer_feat_level=3, + align_corners=False, + pixel_decoder=dict( + type='mmdet.MSDeformAttnPixelDecoder', + num_outs=3, + norm_cfg=dict(type='GN', num_groups=32), + act_cfg=dict(type='ReLU'), + encoder=dict( # DeformableDetrTransformerEncoder + num_layers=6, + layer_cfg=dict( # DeformableDetrTransformerEncoderLayer + self_attn_cfg=dict( # MultiScaleDeformableAttention + embed_dims=256, + num_heads=8, + num_levels=3, + num_points=4, + im2col_step=64, + dropout=0.0, + batch_first=True, + norm_cfg=None, + init_cfg=None), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0.0, + act_cfg=dict(type='ReLU', inplace=True))), + init_cfg=None), + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + init_cfg=None), + enforce_decoder_input_project=False, + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # Mask2FormerTransformerDecoder + return_intermediate=True, + num_layers=9, + layer_cfg=dict( # Mask2FormerTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.0, + dropout_layer=None, + add_identity=True)), + init_cfg=None), + loss_cls=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=False, + loss_weight=2.0, + reduction='mean', + class_weight=[1.0] * num_classes + [0.1]), + loss_mask=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=True, + reduction='mean', + loss_weight=5.0), + loss_dice=dict( + type='mmdet.DiceLoss', + use_sigmoid=True, + activate=True, + reduction='mean', + naive_dice=True, + eps=1.0, + loss_weight=5.0), + train_cfg=dict( + num_points=12544, + oversample_ratio=3.0, + importance_sample_ratio=0.75, + assigner=dict( + type='mmdet.HungarianAssigner', + match_costs=[ + dict(type='mmdet.ClassificationCost', weight=2.0), + dict( + type='mmdet.CrossEntropyLossCost', + weight=5.0, + use_sigmoid=True), + dict( + type='mmdet.DiceCost', + weight=5.0, + pred_act=True, + eps=1.0) + ]), + sampler=dict(type='mmdet.MaskPseudoSampler'))), + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# dataset config +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomChoiceResize', + scales=[int(x * 0.1 * 640) for x in range(5, 21)], + resize_type='ResizeShortestEdge', + max_size=2560), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +train_dataloader = dict(batch_size=2, dataset=dict(pipeline=train_pipeline)) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optimizer = dict( + type='AdamW', lr=0.0001, weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999)) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) + +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=160000, + by_epoch=False) +] + +# training schedule for 160k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=160000, val_interval=5000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', by_epoch=False, interval=5000, + save_best='mIoU'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +# Default setting for scaling LR automatically +# - `enable` means enable scaling LR automatically +# or not by default. +# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). +auto_scale_lr = dict(enable=False, base_batch_size=16) diff --git a/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py b/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..f39a3c59066f381575ea74d0e977b841d371827f --- /dev/null +++ b/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py @@ -0,0 +1,5 @@ +_base_ = ['./mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py'] + +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_22k_20220317-e5c09f74.pth' # noqa +model = dict( + backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=pretrained))) diff --git a/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py b/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..0c229c145d9993f0ed07c2efc977fed9c5ebab65 --- /dev/null +++ b/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py @@ -0,0 +1,42 @@ +_base_ = ['./mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py'] +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_22k_20220317-e5c09f74.pth' # noqa + +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + pretrain_img_size=384, + embed_dims=128, + depths=depths, + num_heads=[4, 8, 16, 32], + window_size=12, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + decode_head=dict(in_channels=[128, 256, 512, 1024])) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py b/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..f2657e884253f993135384fd9848fbab7fdb733e --- /dev/null +++ b/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py @@ -0,0 +1,9 @@ +_base_ = ['./mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py'] +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth' # noqa + +model = dict( + backbone=dict( + embed_dims=192, + num_heads=[6, 12, 24, 48], + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + decode_head=dict(num_queries=100, in_channels=[192, 384, 768, 1536])) diff --git a/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py b/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..01a7b9988fc011c1398976f5484830f62426924a --- /dev/null +++ b/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py @@ -0,0 +1,42 @@ +_base_ = ['./mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py'] +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth' # noqa + +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + pretrain_img_size=384, + embed_dims=192, + depths=depths, + num_heads=[6, 12, 24, 48], + window_size=12, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + decode_head=dict(in_channels=[192, 384, 768, 1536])) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py b/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a7796d5693b3305fc2fb2feb3f9383f81891104e --- /dev/null +++ b/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,37 @@ +_base_ = ['./mask2former_swin-t_8xb2-160k_ade20k-512x512.py'] +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth' # noqa + +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + depths=depths, init_cfg=dict(type='Pretrained', + checkpoint=pretrained))) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py b/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..5f75544b1a560c7426d0fa9802fb5d0b072e393a --- /dev/null +++ b/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py @@ -0,0 +1,37 @@ +_base_ = ['./mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py'] +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth' # noqa + +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + depths=depths, init_cfg=dict(type='Pretrained', + checkpoint=pretrained))) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py b/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9de3d242ebaa6838004f2613351176c39d4d01f7 --- /dev/null +++ b/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,52 @@ +_base_ = ['./mask2former_r50_8xb2-160k_ade20k-512x512.py'] +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth' # noqa +depths = [2, 2, 6, 2] +model = dict( + backbone=dict( + _delete_=True, + type='SwinTransformer', + embed_dims=96, + depths=depths, + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + patch_norm=True, + out_indices=(0, 1, 2, 3), + with_cp=False, + frozen_stages=-1, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + decode_head=dict(in_channels=[96, 192, 384, 768])) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py b/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..0abda6430ca639b022758eac4329cdbcbade4856 --- /dev/null +++ b/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py @@ -0,0 +1,52 @@ +_base_ = ['./mask2former_r50_8xb2-90k_cityscapes-512x1024.py'] +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth' # noqa +depths = [2, 2, 6, 2] +model = dict( + backbone=dict( + _delete_=True, + type='SwinTransformer', + embed_dims=96, + depths=depths, + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + patch_norm=True, + out_indices=(0, 1, 2, 3), + with_cp=False, + frozen_stages=-1, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + decode_head=dict(in_channels=[96, 192, 384, 768])) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/metafile.yaml b/configs/mask2former/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..090c95e7cfee4a704559f98e5e54f96afa6294cc --- /dev/null +++ b/configs/mask2former/metafile.yaml @@ -0,0 +1,314 @@ +Collections: +- Name: Mask2Former + License: Apache License 2.0 + Metadata: + Training Data: + - Usage + - Cityscapes + - ADE20K + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + README: configs/mask2former/README.md + Frameworks: + - PyTorch +Models: +- Name: mask2former_r50_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.44 + Config: configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - R-50-D32 + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 5.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_r101_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.8 + Config: configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - R-101-D32 + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 6.81 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-t_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.71 + Config: configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - Swin-T + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 6.36 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-s_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 82.57 + Config: configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - Swin-S + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 8.09 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 83.52 + Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - Swin-B + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 10.89 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 83.65 + Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - Swin-L + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 15.83 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_r50_8xb2-160k_ade20k-512x512 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.87 + Config: configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 3.31 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_r101_8xb2-160k_ade20k-512x512 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.6 + Config: configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D32 + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 4.09 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-t_8xb2-160k_ade20k-512x512 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.66 + Config: configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-T + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 3826.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-s_8xb2-160k_ade20k-512x512 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 51.24 + Config: configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-S + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 3.74 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.44 + Config: configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 5.66 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 53.9 + Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 5.66 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 56.01 + Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-L + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 8.86 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch diff --git a/configs/maskformer/README.md b/configs/maskformer/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a899bac0909b3000f1e72a53480e7cd7322ad3ba --- /dev/null +++ b/configs/maskformer/README.md @@ -0,0 +1,62 @@ +# MaskFormer + +> [MaskFormer: Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models. + + + +
+ +
+ +### Usage + +- MaskFormer model needs to install [MMDetection](https://github.com/open-mmlab/mmdetection) first. + +```shell +pip install "mmdet>=3.0.0rc4" +``` + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | --------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| MaskFormer | R-50-D32 | 512x512 | 160000 | 3.29 | A100 | 42.20 | 44.29 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.json) | +| MaskFormer | R-101-D32 | 512x512 | 160000 | 4.12 | A100 | 34.90 | 45.11 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053.json) | +| MaskFormer | Swin-T | 512x512 | 160000 | 3.73 | A100 | 40.53 | 46.69 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813.json) | +| MaskFormer | Swin-S | 512x512 | 160000 | 5.33 | A100 | 26.98 | 49.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710.json) | + +Note: + +- All experiments of MaskFormer are implemented with 8 V100 (32G) GPUs with 2 samplers per GPU. +- The results of MaskFormer are relatively not stable. The accuracy (mIoU) of model with `R-101-D32` is from 44.7 to 46.0, and with `Swin-S` is from 49.0 to 49.8. +- The ResNet backbones utilized in MaskFormer models are standard `ResNet` rather than `ResNetV1c`. +- Test time augmentation is not supported in MMSegmentation 1.x version yet, we would add "ms+flip" results as soon as possible. + +## Citation + +```bibtex +@article{cheng2021per, + title={Per-pixel classification is not all you need for semantic segmentation}, + author={Cheng, Bowen and Schwing, Alex and Kirillov, Alexander}, + journal={Advances in Neural Information Processing Systems}, + volume={34}, + pages={17864--17875}, + year={2021} +} +``` diff --git a/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py b/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..04bd37546abfe979d83a4bf488f249b16aba79e9 --- /dev/null +++ b/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,7 @@ +_base_ = './maskformer_r50-d32_8xb2-160k_ade20k-512x512.py' + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py b/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2a83746171072fb266e4d33480943de85426d4b7 --- /dev/null +++ b/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,141 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +crop_size = (512, 512) +data_preprocessor = dict( + type='SegDataPreProcessor', + size=crop_size, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +# model_cfg +num_classes = 150 +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=True, + style='pytorch', + contract_dilation=True, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + decode_head=dict( + type='MaskFormerHead', + in_channels=[256, 512, 1024, + 2048], # input channels of pixel_decoder modules + feat_channels=256, + in_index=[0, 1, 2, 3], + num_classes=150, + out_channels=256, + num_queries=100, + pixel_decoder=dict( + type='mmdet.PixelDecoder', + norm_cfg=dict(type='GN', num_groups=32), + act_cfg=dict(type='ReLU')), + enforce_decoder_input_project=False, + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # DetrTransformerDecoder + return_intermediate=True, + num_layers=6, + layer_cfg=dict( # DetrTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.1, + proj_drop=0.1, + dropout_layer=None, + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.1, + proj_drop=0.1, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.1, + dropout_layer=None, + add_identity=True)), + init_cfg=None), + loss_cls=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0, + reduction='mean', + class_weight=[1.0] * num_classes + [0.1]), + loss_mask=dict( + type='mmdet.FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + reduction='mean', + loss_weight=20.0), + loss_dice=dict( + type='mmdet.DiceLoss', + use_sigmoid=True, + activate=True, + reduction='mean', + naive_dice=True, + eps=1.0, + loss_weight=1.0), + train_cfg=dict( + assigner=dict( + type='mmdet.HungarianAssigner', + match_costs=[ + dict(type='mmdet.ClassificationCost', weight=1.0), + dict( + type='mmdet.FocalLossCost', + weight=20.0, + binary_input=True), + dict( + type='mmdet.DiceCost', + weight=1.0, + pred_act=True, + eps=1.0) + ]), + sampler=dict(type='mmdet.MaskPseudoSampler'))), + # training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), +) +# optimizer +optimizer = dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.0001) +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict(custom_keys={ + 'backbone': dict(lr_mult=0.1), + })) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=160000, + by_epoch=False) +] + +# In MaskFormer implementation we use batch size 2 per GPU as default +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py b/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2cbc038ac244a4984faece3455c0f3b905ea81c1 --- /dev/null +++ b/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,79 @@ +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth' # noqa +_base_ = './maskformer_r50-d32_8xb2-160k_ade20k-512x512.py' +backbone_norm_cfg = dict(type='LN', requires_grad=True) +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + _delete_=True, + type='SwinTransformer', + pretrain_img_size=224, + embed_dims=96, + patch_size=4, + window_size=7, + mlp_ratio=4, + depths=depths, + num_heads=[3, 6, 12, 24], + strides=(4, 2, 2, 2), + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + patch_norm=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + use_abs_pos_embed=False, + act_cfg=dict(type='GELU'), + norm_cfg=backbone_norm_cfg, + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)), + decode_head=dict( + type='MaskFormerHead', + in_channels=[96, 192, 384, + 768], # input channels of pixel_decoder modules + )) + +# optimizer +optimizer = dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01) +# set all layers in backbone to lr_mult=1.0 +# set all norm layers, position_embeding, +# query_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=1.0, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +embed_multi = dict(decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict(custom_keys=custom_keys)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] diff --git a/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py b/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..aa242dbe31f1cfaff17b6d57b9d5f5dab5695aea --- /dev/null +++ b/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,81 @@ +_base_ = './maskformer_r50-d32_8xb2-160k_ade20k-512x512.py' + +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth' # noqa +backbone_norm_cfg = dict(type='LN', requires_grad=True) +depths = [2, 2, 6, 2] +model = dict( + backbone=dict( + _delete_=True, + type='SwinTransformer', + pretrain_img_size=224, + embed_dims=96, + patch_size=4, + window_size=7, + mlp_ratio=4, + depths=depths, + num_heads=[3, 6, 12, 24], + strides=(4, 2, 2, 2), + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + patch_norm=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + use_abs_pos_embed=False, + act_cfg=dict(type='GELU'), + norm_cfg=backbone_norm_cfg, + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)), + decode_head=dict( + type='MaskFormerHead', + in_channels=[96, 192, 384, + 768], # input channels of pixel_decoder modules + )) + +# optimizer +optimizer = dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01) + +# set all layers in backbone to lr_mult=1.0 +# set all norm layers, position_embeding, +# query_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=1.0, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +embed_multi = dict(decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict(custom_keys=custom_keys)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] diff --git a/configs/maskformer/metafile.yaml b/configs/maskformer/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9853e131f6968deddcea14840f277b682696da0 --- /dev/null +++ b/configs/maskformer/metafile.yaml @@ -0,0 +1,111 @@ +Collections: +- Name: MaskFormer + License: Apache License 2.0 + Metadata: + Training Data: + - Usage + - ADE20K + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + README: configs/maskformer/README.md + Frameworks: + - PyTorch +Models: +- Name: maskformer_r50-d32_8xb2-160k_ade20k-512x512 + In Collection: MaskFormer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.29 + Config: configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - MaskFormer + Training Resources: 8x 42.20 GPUS + Memory (GB): 3.29 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.json + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + Code: https://github.com/open-mmlab/mmdetection/blob/dev-3.x/mmdet/models/dense_heads/maskformer_head.py#L21 + Framework: PyTorch +- Name: maskformer_r101-d32_8xb2-160k_ade20k-512x512 + In Collection: MaskFormer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.11 + Config: configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D32 + - MaskFormer + Training Resources: 8x 34.90 GPUS + Memory (GB): 4.12 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053.json + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + Code: https://github.com/open-mmlab/mmdetection/blob/dev-3.x/mmdet/models/dense_heads/maskformer_head.py#L21 + Framework: PyTorch +- Name: maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512 + In Collection: MaskFormer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.69 + Config: configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-T + - MaskFormer + Training Resources: 8x 40.53 GPUS + Memory (GB): 3.73 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813.json + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + Code: https://github.com/open-mmlab/mmdetection/blob/dev-3.x/mmdet/models/dense_heads/maskformer_head.py#L21 + Framework: PyTorch +- Name: maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512 + In Collection: MaskFormer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.36 + Config: configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-S + - MaskFormer + Training Resources: 8x 26.98 GPUS + Memory (GB): 5.33 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710.json + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + Code: https://github.com/open-mmlab/mmdetection/blob/dev-3.x/mmdet/models/dense_heads/maskformer_head.py#L21 + Framework: PyTorch diff --git a/configs/mobilenet_v2/README.md b/configs/mobilenet_v2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bff5259129cd22a4f672e2ba24ec5bd7e746e039 --- /dev/null +++ b/configs/mobilenet_v2/README.md @@ -0,0 +1,56 @@ +# MobileNetV2 + +> [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3. +The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | M-V2-D8 | 512x1024 | 80000 | 3.4 | 14.2 | A100 | 71.19 | 73.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024_20230224_185436.json) | +| PSPNet | M-V2-D8 | 512x1024 | 80000 | 3.6 | 11.2 | V100 | 70.23 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | +| DeepLabV3 | M-V2-D8 | 512x1024 | 80000 | 3.9 | 8.4 | V100 | 73.84 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | +| DeepLabV3+ | M-V2-D8 | 512x1024 | 80000 | 5.1 | 8.4 | V100 | 75.20 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | M-V2-D8 | 512x512 | 160000 | 6.5 | 64.4 | V100 | 19.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | +| PSPNet | M-V2-D8 | 512x512 | 160000 | 6.5 | 57.7 | V100 | 29.68 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | +| DeepLabV3 | M-V2-D8 | 512x512 | 160000 | 6.8 | 39.9 | V100 | 34.08 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | +| DeepLabV3+ | M-V2-D8 | 512x512 | 160000 | 8.2 | 43.1 | V100 | 34.02 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | + +## Citation + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` diff --git a/configs/mobilenet_v2/metafile.yaml b/configs/mobilenet_v2/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..119c9ae7d663c31212ee08cd43684b743d6086e0 --- /dev/null +++ b/configs/mobilenet_v2/metafile.yaml @@ -0,0 +1,186 @@ +Models: +- Name: mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.19 + mIoU(ms+flip): 73.34 + Config: configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - M-V2-D8 + - FCN + Training Resources: 4x A100 GPUS + Memory (GB): 3.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024_20230224_185436.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.23 + Config: configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - M-V2-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.84 + Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - M-V2-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 3.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.2 + Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - M-V2-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 5.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 19.71 + Config: configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - M-V2-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 29.68 + Config: configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - M-V2-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 34.08 + Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - M-V2-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 34.02 + Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - M-V2-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 8.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..ece9b0bf8fe3d96043f1dc86209959f8c14e7524 --- /dev/null +++ b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,13 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6), + norm_cfg=dict(type='SyncBN', requires_grad=True)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..86eec0d9489cbbf75240da394522ee41d3dd2754 --- /dev/null +++ b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,13 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6), + norm_cfg=dict(type='SyncBN', requires_grad=True)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..195046edc497e96df5542e0767ac13d653dd93ae --- /dev/null +++ b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,15 @@ +_base_ = [ + '../deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py' +] +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6), + norm_cfg=dict(type='SyncBN', requires_grad=True)), + decode_head=dict(in_channels=320, c1_in_channels=24), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..d4f669f163fbdd07de4a1648c04ba994c8d496bf --- /dev/null +++ b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,13 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6), + norm_cfg=dict(type='SyncBN', requires_grad=True)), + decode_head=dict(in_channels=320, c1_in_channels=24), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py b/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..0829f438a76839fb045d40ffaef70b8845ba7e93 --- /dev/null +++ b/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,13 @@ +_base_ = '../fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6), + norm_cfg=dict(type='SyncBN', requires_grad=True)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py b/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..015fa6f2011a7c2a8991923e1da34ffef7cb5737 --- /dev/null +++ b/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,13 @@ +_base_ = '../fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6), + norm_cfg=dict(type='SyncBN', requires_grad=True)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py b/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..8542e0266905d127c66d4dbbd2d7189c9bb32e7f --- /dev/null +++ b/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,13 @@ +_base_ = '../pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6), + norm_cfg=dict(type='SyncBN', requires_grad=True)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py b/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..73db59beaea2d6b005ecdea9053b9caa448b7100 --- /dev/null +++ b/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,13 @@ +_base_ = '../pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6), + norm_cfg=dict(type='SyncBN', requires_grad=True)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/configs/mobilenet_v3/README.md b/configs/mobilenet_v3/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8ed0a5692a13d0b172417869eb2df0953eec3d21 --- /dev/null +++ b/configs/mobilenet_v3/README.md @@ -0,0 +1,50 @@ +# MobileNetV3 + +> [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) + +## Introduction + + + + + +Official Repo + +Code Snippet + +## Abstract + + + +We present the next generation of MobileNets based on a combination of complementary search techniques as well as a novel architecture design. MobileNetV3 is tuned to mobile phone CPUs through a combination of hardware-aware network architecture search (NAS) complemented by the NetAdapt algorithm and then subsequently improved through novel architecture advances. This paper starts the exploration of how automated search algorithms and network design can work together to harness complementary approaches improving the overall state of the art. Through this process we create two new MobileNet models for release: MobileNetV3-Large and MobileNetV3-Small which are targeted for high and low resource use cases. These models are then adapted and applied to the tasks of object detection and semantic segmentation. For the task of semantic segmentation (or any dense pixel prediction), we propose a new efficient segmentation decoder Lite Reduced Atrous Spatial Pyramid Pooling (LR-ASPP). We achieve new state of the art results for mobile classification, detection and segmentation. MobileNetV3-Large is 3.2% more accurate on ImageNet classification while reducing latency by 15% compared to MobileNetV2. MobileNetV3-Small is 4.6% more accurate while reducing latency by 5% compared to MobileNetV2. MobileNetV3-Large detection is 25% faster at roughly the same accuracy as MobileNetV2 on COCO detection. MobileNetV3-Large LR-ASPP is 30% faster than MobileNetV2 R-ASPP at similar accuracy for Cityscapes segmentation. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| LRASPP | M-V3-D8 | 512x1024 | 320000 | 8.9 | 15.22 | V100 | 69.54 | 70.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3/mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes-20201224_220337.log.json) | +| LRASPP | M-V3-D8 (scratch) | 512x1024 | 320000 | 8.9 | 14.77 | V100 | 67.87 | 69.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3/mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes-20201224_220337.log.json) | +| LRASPP | M-V3s-D8 | 512x1024 | 320000 | 5.3 | 23.64 | V100 | 64.11 | 66.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3/mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes-20201224_223935.log.json) | +| LRASPP | M-V3s-D8 (scratch) | 512x1024 | 320000 | 5.3 | 24.50 | V100 | 62.74 | 65.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3/mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes-20201224_223935.log.json) | + +## Citation + +```bibtex +@inproceedings{Howard_2019_ICCV, + title={Searching for MobileNetV3}, + author={Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and Le, Quoc V. and Adam, Hartwig}, + booktitle={The IEEE International Conference on Computer Vision (ICCV)}, + pages={1314-1324}, + month={October}, + year={2019}, + doi={10.1109/ICCV.2019.00140}} +} +``` diff --git a/configs/mobilenet_v3/metafile.yaml b/configs/mobilenet_v3/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0351d3b8e459430ee8c3bb614a482b894fee84e9 --- /dev/null +++ b/configs/mobilenet_v3/metafile.yaml @@ -0,0 +1,109 @@ +Collections: +- Name: LRASPP + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + README: configs/mobilenet_v3/README.md + Frameworks: + - PyTorch +Models: +- Name: mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024 + In Collection: LRASPP + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 69.54 + mIoU(ms+flip): 70.89 + Config: configs/mobilenet_v3/mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - M-V3-D8 + - LRASPP + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes-20201224_220337.log.json + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 + Framework: PyTorch +- Name: mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024 + In Collection: LRASPP + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 67.87 + mIoU(ms+flip): 69.78 + Config: configs/mobilenet_v3/mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - M-V3-D8 + - LRASPP + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes-20201224_220337.log.json + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 + Framework: PyTorch +- Name: mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024 + In Collection: LRASPP + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 64.11 + mIoU(ms+flip): 66.42 + Config: configs/mobilenet_v3/mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - M-V3s-D8 + - LRASPP + Training Resources: 4x V100 GPUS + Memory (GB): 5.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes-20201224_223935.log.json + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 + Framework: PyTorch +- Name: mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024 + In Collection: LRASPP + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 62.74 + mIoU(ms+flip): 65.01 + Config: configs/mobilenet_v3/mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - M-V3s-D8 + - LRASPP + Training Resources: 4x V100 GPUS + Memory (GB): 5.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes-20201224_223935.log.json + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 + Framework: PyTorch diff --git a/configs/mobilenet_v3/mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024.py b/configs/mobilenet_v3/mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..bc6322fe40b3b7bc8b32f8e2a92c46ee4322f672 --- /dev/null +++ b/configs/mobilenet_v3/mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024.py @@ -0,0 +1,23 @@ +_base_ = './mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py' +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://contrib/mobilenet_v3_small', + backbone=dict( + type='MobileNetV3', + arch='small', + out_indices=(0, 1, 12), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 16, 576), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) diff --git a/configs/mobilenet_v3/mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024.py b/configs/mobilenet_v3/mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..7260936e60a3b5394c0092e7fefd7255385ba862 --- /dev/null +++ b/configs/mobilenet_v3/mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024.py @@ -0,0 +1,22 @@ +_base_ = './mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py' +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='MobileNetV3', + arch='small', + out_indices=(0, 1, 12), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 16, 576), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) diff --git a/configs/mobilenet_v3/mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py b/configs/mobilenet_v3/mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..8dcbc3395f13ad07f501be74de9667f7d5697f72 --- /dev/null +++ b/configs/mobilenet_v3/mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py @@ -0,0 +1,13 @@ +_base_ = [ + '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +# Re-config the data sampler. +model = dict(data_preprocessor=data_preprocessor) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader + +runner = dict(type='IterBasedRunner', max_iters=320000) diff --git a/configs/mobilenet_v3/mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py b/configs/mobilenet_v3/mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..cd84265f32f67632eaf9882f4dfc9480d4e89a12 --- /dev/null +++ b/configs/mobilenet_v3/mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py @@ -0,0 +1,16 @@ +_base_ = [ + '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://contrib/mobilenet_v3_large') + +# Re-config the data sampler. +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader + +runner = dict(type='IterBasedRunner', max_iters=320000) diff --git a/configs/nonlocal_net/README.md b/configs/nonlocal_net/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4c3f49f981e209950188b4e2c4f89e7825f412dc --- /dev/null +++ b/configs/nonlocal_net/README.md @@ -0,0 +1,68 @@ +# NonLocal Net + +> [Non-local Neural Networks](https://arxiv.org/abs/1711.07971) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Both convolutional and recurrent operations are building blocks that process one local neighborhood at a time. In this paper, we present non-local operations as a generic family of building blocks for capturing long-range dependencies. Inspired by the classical non-local means method in computer vision, our non-local operation computes the response at a position as a weighted sum of the features at all positions. This building block can be plugged into many computer vision architectures. On the task of video classification, even without any bells and whistles, our non-local models can compete or outperform current competition winners on both Kinetics and Charades datasets. In static image recognition, our non-local models improve object detection/segmentation and pose estimation on the COCO suite of tasks. Code is available at [this https URL](https://github.com/facebookresearch/video-nonlocal-net). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NonLocalNet | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.72 | V100 | 78.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | +| NonLocalNet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.95 | V100 | 78.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | +| NonLocalNet | R-50-D8 | 769x769 | 40000 | 8.9 | 1.52 | V100 | 78.33 | 79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json) | +| NonLocalNet | R-101-D8 | 769x769 | 40000 | 12.8 | 1.05 | V100 | 78.57 | 80.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json) | +| NonLocalNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 78.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json) | +| NonLocalNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 78.93 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json) | +| NonLocalNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.05 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json) | +| NonLocalNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.40 | 80.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| NonLocalNet | R-50-D8 | 512x512 | 80000 | 9.1 | 21.37 | V100 | 40.75 | 42.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 80000 | 12.6 | 13.97 | V100 | 42.90 | 44.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json) | +| NonLocalNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.03 | 43.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 44.63 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502-7881aa1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NonLocalNet | R-50-D8 | 512x512 | 20000 | 6.4 | 21.21 | V100 | 76.20 | 77.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 20000 | 9.8 | 14.01 | V100 | 78.15 | 78.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json) | +| NonLocalNet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.65 | 77.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 78.27 | 79.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json) | + +## Citation + +```bibtex +@inproceedings{wang2018non, + title={Non-local neural networks}, + author={Wang, Xiaolong and Girshick, Ross and Gupta, Abhinav and He, Kaiming}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={7794--7803}, + year={2018} +} +``` diff --git a/configs/nonlocal_net/metafile.yaml b/configs/nonlocal_net/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69bd72570b8d45da20d641f19d7eb2f2a29588d3 --- /dev/null +++ b/configs/nonlocal_net/metafile.yaml @@ -0,0 +1,387 @@ +Collections: +- Name: NonLocalNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + README: configs/nonlocal_net/README.md + Frameworks: + - PyTorch +Models: +- Name: nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.24 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.66 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.33 + mIoU(ms+flip): 79.92 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.57 + mIoU(ms+flip): 80.29 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.01 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.93 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.05 + mIoU(ms+flip): 80.68 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.4 + mIoU(ms+flip): 80.85 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.75 + mIoU(ms+flip): 42.05 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.9 + mIoU(ms+flip): 44.27 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.03 + mIoU(ms+flip): 43.04 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.63 + mIoU(ms+flip): 45.79 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502-7881aa1a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.2 + mIoU(ms+flip): 77.12 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.15 + mIoU(ms+flip): 78.86 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.65 + mIoU(ms+flip): 77.47 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.27 + mIoU(ms+flip): 79.12 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch diff --git a/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..5fcf7bcb16016949a49dc6a7f21221cbf87c495f --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..ee984c2bbd6407907f15fc19c289b054ac814865 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..aca80d676a2f82b210eacbfeb7d32f89fc8b70a0 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..8a7aeea7f6b755774c19124c99a91b0ca6c29e58 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/nonlocal_net/nonlocal_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0cdb3caaf33376bba84bc5cb8f6fafaa6f0b9a3f --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/nonlocal_net/nonlocal_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a7cacea5178f22adfad95c22c2e1890881cda448 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/nonlocal_net/nonlocal_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ec475443e8275de210e314a65e5e434d73402354 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/nonlocal_net/nonlocal_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ca79f6fdc0630806a17fe1bd42aa8cd6b0dc0352 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..f4d5fd22f956b3cb876c3bf8830305c92a18dd77 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..17423f2658f84fce535d49fb4e3f5b6ffdb5ec39 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc752c2c7293e66eb0ce59ea4568df215495f30 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..f855a814e507dea59ba17ed1272e4679cf140a66 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/nonlocal_net/nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..848be4a233d19b449fff49a961be4bdf4f6a9e4e --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/nonlocal_net/nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..cd840a03d1c0491ea9c41691b5e7544c39ccb522 --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/nonlocal_net/nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0efb9d096917ba901358f2cce10bd216291d021f --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/nonlocal_net/nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/nonlocal_net/nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..52783bcc9881ef59fe72d5a29b69faf77e52accd --- /dev/null +++ b/configs/nonlocal_net/nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/ocrnet/README.md b/configs/ocrnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..628a3b1597cdac03a875da6437cf1f5c88454fcb --- /dev/null +++ b/configs/ocrnet/README.md @@ -0,0 +1,89 @@ +# OCRNet + +> [Object-Contextual Representations for Semantic Segmentation](https://arxiv.org/abs/1909.11065) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this paper, we address the problem of semantic segmentation and focus on the context aggregation strategy for robust segmentation. Our motivation is that the label of a pixel is the category of the object that the pixel belongs to. We present a simple yet effective approach, object-contextual representations, characterizing a pixel by exploiting the representation of the corresponding object class. First, we construct object regions based on a feature map supervised by the ground-truth segmentation, and then compute the object region representations. Second, we compute the representation similarity between each pixel and each object region, and augment the representation of each pixel with an object contextual representation, which is a weighted aggregation of all the object region representations according to their similarities with the pixel. We empirically demonstrate that the proposed approach achieves competitive performance on six challenging semantic segmentation benchmarks: Cityscapes, ADE20K, LIP, PASCAL VOC 2012, PASCAL-Context and COCO-Stuff. Notably, we achieved the \\nth{2} place on the Cityscapes leader-board with a single model. + + + +
+ +
+ +## Results and models + +### Cityscapes + +#### HRNet backbone + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | A100 | 76.61 | 78.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | V100 | 77.72 | 79.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | V100 | 80.58 | 81.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | V100 | 77.16 | 78.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | V100 | 78.57 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | V100 | 80.70 | 81.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | V100 | 78.45 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | V100 | 79.47 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | V100 | 81.35 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) | + +#### ResNet backbone + +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| OCRNet | R-101-D8 | 512x1024 | 8 | 40000 | - | - | V100 | 80.09 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721-02ac0f13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721.log.json) | +| OCRNet | R-101-D8 | 512x1024 | 16 | 40000 | 8.8 | 3.02 | V100 | 80.30 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726-db500f80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726.log.json) | +| OCRNet | R-101-D8 | 512x1024 | 16 | 80000 | 8.8 | 3.02 | V100 | 80.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421-78688424.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 80000 | 6.7 | 28.98 | V100 | 35.06 | 35.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 80000 | 7.9 | 18.93 | V100 | 37.79 | 39.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 80000 | 11.2 | 16.99 | V100 | 43.00 | 44.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | V100 | 37.19 | 38.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 160000 | - | - | V100 | 39.32 | 40.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 160000 | - | - | V100 | 43.25 | 44.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 20000 | 3.5 | 31.55 | V100 | 71.70 | 73.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 20000 | 4.7 | 19.91 | V100 | 74.75 | 77.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 20000 | 8.1 | 17.83 | V100 | 77.72 | 79.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | V100 | 72.76 | 74.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 40000 | - | - | V100 | 74.98 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 40000 | - | - | V100 | 77.14 | 79.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json) | + +## Citation + +```bibtex +@article{YuanW18, + title={Ocnet: Object context network for scene parsing}, + author={Yuhui Yuan and Jingdong Wang}, + booktitle={arXiv preprint arXiv:1809.00916}, + year={2018} +} + +@article{YuanCW20, + title={Object-Contextual Representations for Semantic Segmentation}, + author={Yuhui Yuan and Xilin Chen and Jingdong Wang}, + booktitle={ECCV}, + year={2020} +} +``` diff --git a/configs/ocrnet/metafile.yaml b/configs/ocrnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5467feb9753c5dec1033e526ad3d993bf00d8f14 --- /dev/null +++ b/configs/ocrnet/metafile.yaml @@ -0,0 +1,577 @@ +Collections: +- Name: OCRNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - '# HRNet backbone' + - '# ResNet backbone' + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + README: configs/ocrnet/README.md + Frameworks: + - PyTorch +Models: +- Name: ocrnet_hr18s_4xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 76.61 + mIoU(ms+flip): 78.01 + Config: configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x A100 GPUS + Memory (GB): 3.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 77.72 + mIoU(ms+flip): 79.49 + Config: configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 4.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 80.58 + mIoU(ms+flip): 81.79 + Config: configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb2-80k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 77.16 + mIoU(ms+flip): 78.66 + Config: configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb2-80k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 78.57 + mIoU(ms+flip): 80.46 + Config: configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb2-80k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 80.7 + mIoU(ms+flip): 81.87 + Config: configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb2-160k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 78.45 + mIoU(ms+flip): 79.97 + Config: configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb2-160k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 79.47 + mIoU(ms+flip): 80.91 + Config: configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb2-160k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 81.35 + mIoU(ms+flip): 82.7 + Config: configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# ResNet backbone' + Metrics: + mIoU: 80.09 + Config: configs/ocrnet/ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# ResNet backbone' + Batch Size: 8 + Architecture: + - R-101-D8 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721-02ac0f13.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# ResNet backbone' + Metrics: + mIoU: 80.3 + Config: configs/ocrnet/ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# ResNet backbone' + Batch Size: 16 + Architecture: + - R-101-D8 + - OCRNet + Training Resources: 8x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726-db500f80.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# ResNet backbone' + Metrics: + mIoU: 80.81 + Config: configs/ocrnet/ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: '# ResNet backbone' + Batch Size: 16 + Architecture: + - R-101-D8 + - OCRNet + Training Resources: 8x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421-78688424.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 35.06 + mIoU(ms+flip): 35.8 + Config: configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.79 + mIoU(ms+flip): 39.16 + Config: configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.0 + mIoU(ms+flip): 44.3 + Config: configs/ocrnet/ocrnet_hr48_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 11.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.19 + mIoU(ms+flip): 38.4 + Config: configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.32 + mIoU(ms+flip): 40.8 + Config: configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb4-160k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.25 + mIoU(ms+flip): 44.88 + Config: configs/ocrnet/ocrnet_hr48_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb4-20k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 71.7 + mIoU(ms+flip): 73.84 + Config: configs/ocrnet/ocrnet_hr18s_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb4-20k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.75 + mIoU(ms+flip): 77.11 + Config: configs/ocrnet/ocrnet_hr18_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 4.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb4-20k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.72 + mIoU(ms+flip): 79.87 + Config: configs/ocrnet/ocrnet_hr48_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb4-40k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.76 + mIoU(ms+flip): 74.6 + Config: configs/ocrnet/ocrnet_hr18s_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb4-40k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.98 + mIoU(ms+flip): 77.4 + Config: configs/ocrnet/ocrnet_hr18_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb4-40k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.14 + mIoU(ms+flip): 79.71 + Config: configs/ocrnet/ocrnet_hr48_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch diff --git a/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..659217cf69d55b12492051ccac177da4f5733452 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..d401c4b1e77ecdc71dedc2b6316dbcf8c5f75bf9 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..44426a28e161dc68c56008302d34f5e17def52b9 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/ocrnet/ocrnet_hr18_4xb4-160k_ade20k-512x512.py b/configs/ocrnet/ocrnet_hr18_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..353005b57bd46beb7eabdf93249cff5362b0e4c8 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) diff --git a/configs/ocrnet/ocrnet_hr18_4xb4-20k_voc12aug-512x512.py b/configs/ocrnet/ocrnet_hr18_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c696c21e966c209ee1cf80678bb74e579d2c7eda --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) diff --git a/configs/ocrnet/ocrnet_hr18_4xb4-40k_voc12aug-512x512.py b/configs/ocrnet/ocrnet_hr18_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c6b69ea632da380287cbb3c9dde264395b568147 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) diff --git a/configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py b/configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ceca8df6965748b8a762fdd4463fb34d1e686296 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ]) diff --git a/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..c5388fb75177e266c9ba351c8c31cea488a0a8c2 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..2335f3b7620652fbe9a768fe77961d460c596fae --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..b2d1a8fa8481c85c1e3844a257a36e8083d2e168 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_4xb4-160k_ade20k-512x512.py b/configs/ocrnet/ocrnet_hr18s_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..fabf5826cd6081a1d80ab01fec6351b8137a62b3 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_4xb4-20k_voc12aug-512x512.py b/configs/ocrnet/ocrnet_hr18s_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0eca655cfce554160171c1e50582fc90db966832 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_4xb4-20k_voc12aug-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_4xb4-40k_voc12aug-512x512.py b/configs/ocrnet/ocrnet_hr18s_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..13b02b9df6b452fae559f3d7676221a942644a69 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_4xb4-40k_voc12aug-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py b/configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..60c79c2dc5f2035acd9b6ad55e22b3a6ea08f1e6 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_4xb4-80k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..184d38dd2cb8c961f05b66e76f12803d0e9e7614 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..7025ee9e77139e24d3b2246d3976f4b903ee9412 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..9c68a15fc5e657b14efc27eab592f0668657eefb --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_4xb4-160k_ade20k-512x512.py b/configs/ocrnet/ocrnet_hr48_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..e74976c80510d0363b8335425a3459d53d39fc7d --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_4xb4-160k_ade20k-512x512.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_4xb4-20k_voc12aug-512x512.py b/configs/ocrnet/ocrnet_hr48_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..f015b920e1dcc4afae22921991650e614d685647 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_4xb4-20k_voc12aug-512x512.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_4xb4-40k_voc12aug-512x512.py b/configs/ocrnet/ocrnet_hr48_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..baafa380d41ccb89b52785d2f00cc6de945467c6 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_4xb4-40k_voc12aug-512x512.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_hr48_4xb4-80k_ade20k-512x512.py b/configs/ocrnet/ocrnet_hr48_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..85514b9d7edc9d0a160ca26a1f883f6054411da8 --- /dev/null +++ b/configs/ocrnet/ocrnet_hr48_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_4xb4-80k_ade20k-512x512.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/configs/ocrnet/ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..a94597bf35347017f137aa0d0528bccec4d48f39 --- /dev/null +++ b/configs/ocrnet/ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101)) diff --git a/configs/ocrnet/ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..88e5ad08fd5989ddd8efc6640756b7aef79595a8 --- /dev/null +++ b/configs/ocrnet/ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,21 @@ +_base_ = [ + '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=2e-4, + power=0.9, + begin=0, + end=40000, + by_epoch=False) +] diff --git a/configs/ocrnet/ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024.py b/configs/ocrnet/ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..a3b420909cef60a559e83a8b047bc34f658ae8d5 --- /dev/null +++ b/configs/ocrnet/ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,21 @@ +_base_ = [ + '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=2e-4, + power=0.9, + begin=0, + end=40000, + by_epoch=False) +] diff --git a/configs/pidnet/README.md b/configs/pidnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e23efbd3f3aaab795880d3532535ad3e64b712cc --- /dev/null +++ b/configs/pidnet/README.md @@ -0,0 +1,50 @@ +# PIDNet + +> [PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller](https://arxiv.org/pdf/2206.02066.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Two-branch network architecture has shown its efficiency and effectiveness for real-time semantic segmentation tasks. However, direct fusion of low-level details and high-level semantics will lead to a phenomenon that the detailed features are easily overwhelmed by surrounding contextual information, namely overshoot in this paper, which limits the improvement of the accuracy of existed two-branch models. In this paper, we bridge a connection between Convolutional Neural Network (CNN) and Proportional-IntegralDerivative (PID) controller and reveal that the two-branch network is nothing but a Proportional-Integral (PI) controller, which inherently suffers from the similar overshoot issue. To alleviate this issue, we propose a novel threebranch network architecture: PIDNet, which possesses three branches to parse the detailed, context and boundary information (derivative of semantics), respectively, and employs boundary attention to guide the fusion of detailed and context branches in final stage. The family of PIDNets achieve the best trade-off between inference speed and accuracy and their test accuracy surpasses all the existed models with similar inference speed on Cityscapes, CamVid and COCO-Stuff datasets. Especially, PIDNet-S achieves 78.6% mIOU with inference speed of 93.2 FPS on Cityscapes test set and 80.1% mIOU with speed of 153.7 FPS on CamVid test set. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PIDNet | PIDNet-S | 1024x1024 | 120000 | 3.38 | 80.82 | A100 | 78.74 | 80.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes/pidnet-s_2xb6-120k_1024x1024-cityscapes_20230302_191700-bb8e3bcc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes/pidnet-s_2xb6-120k_1024x1024-cityscapes_20230302_191700.json) | +| PIDNet | PIDNet-M | 1024x1024 | 120000 | 5.14 | 71.98 | A100 | 80.22 | 82.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes/pidnet-m_2xb6-120k_1024x1024-cityscapes_20230301_143452-f9bcdbf3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes/pidnet-m_2xb6-120k_1024x1024-cityscapes_20230301_143452.json) | +| PIDNet | PIDNet-L | 1024x1024 | 120000 | 5.83 | 60.06 | A100 | 80.89 | 82.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes/pidnet-l_2xb6-120k_1024x1024-cityscapes_20230303_114514-0783ca6b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes/pidnet-l_2xb6-120k_1024x1024-cityscapes_20230303_114514.json) | + +## Notes + +The pretrained weights in config files are converted from [the official repo](https://github.com/XuJiacong/PIDNet#models). + +## Citation + +```bibtex +@misc{xu2022pidnet, + title={PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller}, + author={Jiacong Xu and Zixiang Xiong and Shankar P. Bhattacharyya}, + year={2022}, + eprint={2206.02066}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/configs/pidnet/metafile.yaml b/configs/pidnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51b514a487d194996674a3de112ccdd25a11982a --- /dev/null +++ b/configs/pidnet/metafile.yaml @@ -0,0 +1,85 @@ +Collections: +- Name: PIDNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: 'PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller' + URL: https://arxiv.org/pdf/2206.02066.pdf + README: configs/pidnet/README.md + Frameworks: + - PyTorch +Models: +- Name: pidnet-s_2xb6-120k_1024x1024-cityscapes + In Collection: PIDNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.74 + mIoU(ms+flip): 80.87 + Config: configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py + Metadata: + Training Data: Cityscapes + Batch Size: 12 + Architecture: + - PIDNet-S + - PIDNet + Training Resources: 2x A100 GPUS + Memory (GB): 3.38 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes/pidnet-s_2xb6-120k_1024x1024-cityscapes_20230302_191700-bb8e3bcc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes/pidnet-s_2xb6-120k_1024x1024-cityscapes_20230302_191700.json + Paper: + Title: 'PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller' + URL: https://arxiv.org/pdf/2206.02066.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/pidnet.py + Framework: PyTorch +- Name: pidnet-m_2xb6-120k_1024x1024-cityscapes + In Collection: PIDNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.22 + mIoU(ms+flip): 82.05 + Config: configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py + Metadata: + Training Data: Cityscapes + Batch Size: 12 + Architecture: + - PIDNet-M + - PIDNet + Training Resources: 2x A100 GPUS + Memory (GB): 5.14 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes/pidnet-m_2xb6-120k_1024x1024-cityscapes_20230301_143452-f9bcdbf3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes/pidnet-m_2xb6-120k_1024x1024-cityscapes_20230301_143452.json + Paper: + Title: 'PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller' + URL: https://arxiv.org/pdf/2206.02066.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/pidnet.py + Framework: PyTorch +- Name: pidnet-l_2xb6-120k_1024x1024-cityscapes + In Collection: PIDNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.89 + mIoU(ms+flip): 82.37 + Config: configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py + Metadata: + Training Data: Cityscapes + Batch Size: 12 + Architecture: + - PIDNet-L + - PIDNet + Training Resources: 2x A100 GPUS + Memory (GB): 5.83 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes/pidnet-l_2xb6-120k_1024x1024-cityscapes_20230303_114514-0783ca6b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes/pidnet-l_2xb6-120k_1024x1024-cityscapes_20230303_114514.json + Paper: + Title: 'PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller' + URL: https://arxiv.org/pdf/2206.02066.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/pidnet.py + Framework: PyTorch diff --git a/configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py b/configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..1955c91e051ec50ea3d6e7b9e663dff1e4e2d5c7 --- /dev/null +++ b/configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py @@ -0,0 +1,10 @@ +_base_ = './pidnet-s_2xb6-120k_1024x1024-cityscapes.py' +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/pidnet/pidnet-l_imagenet1k_20230306-67889109.pth' # noqa +model = dict( + backbone=dict( + channels=64, + ppm_channels=112, + num_stem_blocks=3, + num_branch_blocks=4, + init_cfg=dict(checkpoint=checkpoint_file)), + decode_head=dict(in_channels=256, channels=256)) diff --git a/configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py b/configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..38a69c1c45a22366250ccda60c769cdfa32b8b94 --- /dev/null +++ b/configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py @@ -0,0 +1,5 @@ +_base_ = './pidnet-s_2xb6-120k_1024x1024-cityscapes.py' +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/pidnet/pidnet-m_imagenet1k_20230306-39893c52.pth' # noqa +model = dict( + backbone=dict(channels=64, init_cfg=dict(checkpoint=checkpoint_file)), + decode_head=dict(in_channels=256)) diff --git a/configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py b/configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f70ca4287ab2d3266e69951cce957875853be06d --- /dev/null +++ b/configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py @@ -0,0 +1,113 @@ +_base_ = [ + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py' +] + +# The class_weight is borrowed from https://github.com/openseg-group/OCNet.pytorch/issues/14 # noqa +# Licensed under the MIT License +class_weight = [ + 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, + 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529, + 1.0507 +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/pidnet/pidnet-s_imagenet1k_20230306-715e6273.pth' # noqa +crop_size = (1024, 1024) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='PIDNet', + in_channels=3, + channels=32, + ppm_channels=96, + num_stem_blocks=2, + num_branch_blocks=3, + align_corners=False, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)), + decode_head=dict( + type='PIDHead', + in_channels=128, + channels=128, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), + align_corners=True, + loss_decode=[ + dict( + type='CrossEntropyLoss', + use_sigmoid=False, + class_weight=class_weight, + loss_weight=0.4), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=1.0), + dict(type='BoundaryLoss', loss_weight=20.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=1.0) + ]), + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='GenerateEdge', edge_width=4), + dict(type='PackSegInputs') +] +train_dataloader = dict(batch_size=6, dataset=dict(pipeline=train_pipeline)) + +iters = 120000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] +# training schedule for 120k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=iters, val_interval=iters // 10) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', by_epoch=False, interval=iters // 10), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/point_rend/README.md b/configs/point_rend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..487d3bcc7f9506ce010c296cbcf1ba4cf0392d43 --- /dev/null +++ b/configs/point_rend/README.md @@ -0,0 +1,51 @@ +# PointRend + +> [PointRend: Image Segmentation as Rendering](https://arxiv.org/abs/1912.08193) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We present a new method for efficient high-quality image segmentation of objects and scenes. By analogizing classical computer graphics methods for efficient rendering with over- and undersampling challenges faced in pixel labeling tasks, we develop a unique perspective of image segmentation as a rendering problem. From this vantage, we present the PointRend (Point-based Rendering) neural network module: a module that performs point-based segmentation predictions at adaptively selected locations based on an iterative subdivision algorithm. PointRend can be flexibly applied to both instance and semantic segmentation tasks by building on top of existing state-of-the-art models. While many concrete implementations of the general idea are possible, we show that a simple design already achieves excellent results. Qualitatively, PointRend outputs crisp object boundaries in regions that are over-smoothed by previous methods. Quantitatively, PointRend yields significant gains on COCO and Cityscapes, for both instance and semantic segmentation. PointRend's efficiency enables output resolutions that are otherwise impractical in terms of memory or computation compared to existing approaches. Code has been made available at [this https URL](https://github.com/facebookresearch/detectron2/tree/main/projects/PointRend). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PointRend | R-50 | 512x1024 | 80000 | 3.1 | 8.48 | V100 | 76.47 | 78.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend/pointrend_r50_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes-20200715_214714.log.json) | +| PointRend | R-101 | 512x1024 | 80000 | 4.2 | 7.00 | V100 | 78.30 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend/pointrend_r101_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes-20200715_214824.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PointRend | R-50 | 512x512 | 160000 | 5.1 | 17.31 | V100 | 37.64 | 39.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend/pointrend_r50_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k-20200807_232644.log.json) | +| PointRend | R-101 | 512x512 | 160000 | 6.1 | 15.50 | V100 | 40.02 | 41.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k-20200808_030852.log.json) | + +## Citation + +```bibtex +@inproceedings{kirillov2020pointrend, + title={Pointrend: Image segmentation as rendering}, + author={Kirillov, Alexander and Wu, Yuxin and He, Kaiming and Girshick, Ross}, + booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, + pages={9799--9808}, + year={2020} +} +``` diff --git a/configs/point_rend/metafile.yaml b/configs/point_rend/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..064717c9df61fc77425c789105f7f946ebe8084e --- /dev/null +++ b/configs/point_rend/metafile.yaml @@ -0,0 +1,110 @@ +Collections: +- Name: PointRend + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + README: configs/point_rend/README.md + Frameworks: + - PyTorch +Models: +- Name: pointrend_r50_4xb2-80k_cityscapes-512x1024 + In Collection: PointRend + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.47 + mIoU(ms+flip): 78.13 + Config: configs/point_rend/pointrend_r50_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - PointRend + Training Resources: 4x V100 GPUS + Memory (GB): 3.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes-20200715_214714.log.json + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 + Framework: PyTorch +- Name: pointrend_r101_4xb2-80k_cityscapes-512x1024 + In Collection: PointRend + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.3 + mIoU(ms+flip): 79.97 + Config: configs/point_rend/pointrend_r101_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - PointRend + Training Resources: 4x V100 GPUS + Memory (GB): 4.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes-20200715_214824.log.json + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 + Framework: PyTorch +- Name: pointrend_r50_4xb4-160k_ade20k-512x512 + In Collection: PointRend + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.64 + mIoU(ms+flip): 39.17 + Config: configs/point_rend/pointrend_r50_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50 + - PointRend + Training Resources: 4x V100 GPUS + Memory (GB): 5.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k-20200807_232644.log.json + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 + Framework: PyTorch +- Name: pointrend_r101_4xb4-160k_ade20k-512x512 + In Collection: PointRend + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.02 + mIoU(ms+flip): 41.6 + Config: configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101 + - PointRend + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k-20200808_030852.log.json + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 + Framework: PyTorch diff --git a/configs/point_rend/pointrend_r101_4xb2-80k_cityscapes-512x1024.py b/configs/point_rend/pointrend_r101_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..ca2a19a196e868cbbbfb9de88c91cc2a128ef7c8 --- /dev/null +++ b/configs/point_rend/pointrend_r101_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './pointrend_r50_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py b/configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..6729d3b672eb6116cb25d6ae6c4bb30d22f7208a --- /dev/null +++ b/configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pointrend_r50_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/point_rend/pointrend_r50_4xb2-80k_cityscapes-512x1024.py b/configs/point_rend/pointrend_r50_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..fb005d8bee4dd141e579409ae78b2e1269fd2849 --- /dev/null +++ b/configs/point_rend/pointrend_r50_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/pointrend_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=200), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=200, + end=80000, + by_epoch=False, + ) +] diff --git a/configs/point_rend/pointrend_r50_4xb4-160k_ade20k-512x512.py b/configs/point_rend/pointrend_r50_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..d350fa686b3f2d2f7ba64caaf06120dc6e94541b --- /dev/null +++ b/configs/point_rend/pointrend_r50_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,46 @@ +_base_ = [ + '../_base_/models/pointrend_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=200), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=200, + end=160000, + by_epoch=False, + ) +] diff --git a/configs/poolformer/README.md b/configs/poolformer/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e6e2eac210ea68d2c0ba799bffddc9608d72088f --- /dev/null +++ b/configs/poolformer/README.md @@ -0,0 +1,65 @@ +# PoolFormer + +> [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design. Code is available at [this https URL](https://github.com/sail-sg/poolformer) + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{yu2022metaformer, + title={Metaformer is actually what you need for vision}, + author={Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={10819--10829}, + year={2022} +} +``` + +### Usage + +- PoolFormer backbone needs to install [MMClassification](https://github.com/open-mmlab/mmclassification) first, which has abundant backbones for downstream tasks. + +```shell +pip install "mmpretrain>=1.0.0rc7" +``` + +- The pretrained models could also be downloaded from [PoolFormer config of MMClassification](https://github.com/open-mmlab/mmclassification/tree/master/configs/poolformer). + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | pretrain | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | mIoU\* | mIoU\*(ms+flip) | config | download | +| ------ | -------------- | --------- | ----------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | ------ | --------------: | --------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FPN | PoolFormer-S12 | 512x512 | ImageNet-1K | 32 | 40000 | 4.17 | 23.48 | V100 | 36.68 | - | 37.07 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154.log.json) | +| FPN | PoolFormer-S24 | 512x512 | ImageNet-1K | 32 | 40000 | 5.47 | 15.74 | V100 | 40.12 | - | 40.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049.log.json) | +| FPN | PoolFormer-S36 | 512x512 | ImageNet-1K | 32 | 40000 | 6.77 | 11.34 | V100 | 41.61 | - | 41.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_s36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122-b47e607d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122.log.json) | +| FPN | PoolFormer-M36 | 512x512 | ImageNet-1K | 32 | 40000 | 8.59 | 8.97 | V100 | 41.95 | - | 42.35 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230.log.json) | +| FPN | PoolFormer-M48 | 512x512 | ImageNet-1K | 32 | 40000 | 10.48 | 6.69 | V100 | 42.43 | - | 42.76 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923.log.json) | + +Note: + +- We replace `AlignedResize` in original PoolFormer implementation to `Resize + ResizeToMultiple`. + +- `mIoU` with * is collected when `Resize + ResizeToMultiple` is adopted in `test_pipeline`, so do `mIoU` in logs. + +- The Test Time Augmentation i.e., "ms+flip" in MMSegmentation v1.x is developing, stay tuned! diff --git a/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py b/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..4100eb99233284f465a7cd4d4941b6debf90c176 --- /dev/null +++ b/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py @@ -0,0 +1,11 @@ +_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py' +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m36_3rdparty_32xb128_in1k_20220414-c55e0949.pth' # noqa + +# model settings +model = dict( + backbone=dict( + arch='m36', + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + neck=dict(in_channels=[96, 192, 384, 768])) diff --git a/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py b/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..cfc49ccbdb9edd29d94e90b32bef0e18364ce34d --- /dev/null +++ b/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py @@ -0,0 +1,11 @@ +_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py' +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m48_3rdparty_32xb128_in1k_20220414-9378f3eb.pth' # noqa + +# model settings +model = dict( + backbone=dict( + arch='m48', + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + neck=dict(in_channels=[96, 192, 384, 768])) diff --git a/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py b/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c0b15312fed42b29a7b72d2be03bb571e0bd6119 --- /dev/null +++ b/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py @@ -0,0 +1,91 @@ +_base_ = [ + '../_base_/models/fpn_poolformer_s12.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] + +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='ResizeToMultiple', size_divisor=32), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] + +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type='RepeatDataset', + times=50, + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', + seg_map_path='annotations/training'), + pipeline=train_pipeline))) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator + +# model settings +model = dict( + data_preprocessor=data_preprocessor, + neck=dict(in_channels=[64, 128, 320, 512]), + decode_head=dict(num_classes=150)) + +# optimizer +# optimizer = dict(_delete_=True, type='AdamW', lr=0.0002, weight_decay=0.0001) +# optimizer_config = dict() +# # learning policy +# lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False) +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict(type='AdamW', lr=0.0002, weight_decay=0.0001)) +param_scheduler = [ + dict( + type='PolyLR', + power=0.9, + begin=0, + end=40000, + eta_min=0.0, + by_epoch=False, + ) +] diff --git a/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py b/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1f9d24cd41bc32d641f99585220d90ee2e0be56f --- /dev/null +++ b/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py' +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s24_3rdparty_32xb128_in1k_20220414-d7055904.pth' # noqa +# model settings +model = dict( + backbone=dict( + arch='s24', + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.'))) diff --git a/configs/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k.py b/configs/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..231dcf6c20e52820b3d09721ff2bcdcd4c21efdf --- /dev/null +++ b/configs/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py' +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s36_3rdparty_32xb128_in1k_20220414-d78ff3e8.pth' # noqa + +# model settings +model = dict( + backbone=dict( + arch='s36', + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.'))) diff --git a/configs/poolformer/metafile.yaml b/configs/poolformer/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12f402be65727b7378e3f9342111031f34e600fc --- /dev/null +++ b/configs/poolformer/metafile.yaml @@ -0,0 +1,116 @@ +Models: +- Name: fpn_poolformer_s12_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.68 + Config: configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-S12 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 4.17 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch +- Name: fpn_poolformer_s24_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.12 + Config: configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-S24 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 5.47 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch +- Name: fpn_poolformer_s36_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.61 + Config: configs/poolformer/fpn_poolformer_s36_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-S36 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 6.77 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122-b47e607d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch +- Name: fpn_poolformer_m36_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.95 + Config: configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-M36 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 8.59 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch +- Name: fpn_poolformer_m48_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.43 + Config: configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-M48 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 10.48 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch diff --git a/configs/psanet/README.md b/configs/psanet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1f5680fbabcc7c537e2d529300ce656bce5d0f47 --- /dev/null +++ b/configs/psanet/README.md @@ -0,0 +1,68 @@ +# PSANet + +> [PSANet: Point-wise Spatial Attention Network for Scene Parsing](https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We notice information flow in convolutional neural networksis restricted inside local neighborhood regions due to the physical de-sign of convolutional filters, which limits the overall understanding ofcomplex scenes. In this paper, we propose thepoint-wise spatial atten-tion network(PSANet) to relax the local neighborhood constraint. Eachposition on the feature map is connected to all the other ones througha self-adaptively learned attention mask. Moreover, information propa-gation in bi-direction for scene parsing is enabled. Information at otherpositions can be collected to help the prediction of the current positionand vice versa, information at the current position can be distributedto assist the prediction of other ones. Our proposed approach achievestop performance on various competitive scene parsing datasets, includ-ing ADE20K, PASCAL VOC 2012 and Cityscapes, demonstrating itseffectiveness and generality. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x1024 | 40000 | 7 | 3.17 | V100 | 77.63 | 79.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json) | +| PSANet | R-101-D8 | 512x1024 | 40000 | 10.5 | 2.20 | V100 | 79.14 | 80.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json) | +| PSANet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.40 | V100 | 77.99 | 79.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json) | +| PSANet | R-101-D8 | 769x769 | 40000 | 11.9 | 0.98 | V100 | 78.43 | 80.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json) | +| PSANet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 77.24 | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json) | +| PSANet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 79.31 | 80.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json) | +| PSANet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.31 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json) | +| PSANet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.69 | 80.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x512 | 80000 | 9 | 18.91 | V100 | 41.14 | 41.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json) | +| PSANet | R-101-D8 | 512x512 | 80000 | 12.5 | 13.13 | V100 | 43.80 | 44.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json) | +| PSANet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 41.67 | 42.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json) | +| PSANet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 43.74 | 45.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x512 | 20000 | 6.9 | 18.24 | V100 | 76.39 | 77.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json) | +| PSANet | R-101-D8 | 512x512 | 20000 | 10.4 | 12.63 | V100 | 77.91 | 79.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json) | +| PSANet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.30 | 77.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json) | +| PSANet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 77.73 | 79.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json) | + +## Citation + +```bibtex +@inproceedings{zhao2018psanet, + title={Psanet: Point-wise spatial attention network for scene parsing}, + author={Zhao, Hengshuang and Zhang, Yi and Liu, Shu and Shi, Jianping and Change Loy, Chen and Lin, Dahua and Jia, Jiaya}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + pages={267--283}, + year={2018} +} +``` diff --git a/configs/psanet/metafile.yaml b/configs/psanet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3fbe6f6d3e9fde439d7946537ef820b70a6bd27a --- /dev/null +++ b/configs/psanet/metafile.yaml @@ -0,0 +1,391 @@ +Collections: +- Name: PSANet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + README: configs/psanet/README.md + Frameworks: + - PyTorch +Models: +- Name: psanet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.63 + mIoU(ms+flip): 79.04 + Config: configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 7.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.14 + mIoU(ms+flip): 80.19 + Config: configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.99 + mIoU(ms+flip): 79.64 + Config: configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 7.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.43 + mIoU(ms+flip): 80.26 + Config: configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 11.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.24 + mIoU(ms+flip): 78.69 + Config: configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.31 + mIoU(ms+flip): 80.53 + Config: configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.31 + mIoU(ms+flip): 80.91 + Config: configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.69 + mIoU(ms+flip): 80.89 + Config: configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.14 + mIoU(ms+flip): 41.91 + Config: configs/psanet/psanet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.8 + mIoU(ms+flip): 44.75 + Config: configs/psanet/psanet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 12.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.67 + mIoU(ms+flip): 42.95 + Config: configs/psanet/psanet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.74 + mIoU(ms+flip): 45.38 + Config: configs/psanet/psanet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.39 + mIoU(ms+flip): 77.34 + Config: configs/psanet/psanet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 6.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.91 + mIoU(ms+flip): 79.3 + Config: configs/psanet/psanet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.3 + mIoU(ms+flip): 77.35 + Config: configs/psanet/psanet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.73 + mIoU(ms+flip): 79.05 + Config: configs/psanet/psanet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch diff --git a/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..e69cf4270353a17bc4c375280a06d4409b6c2faf --- /dev/null +++ b/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..e543099842bd7af2dac9d8679ad0adb8e120e5e4 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..b8636384d0a871740ec1b9c1c04238fd252c2b78 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..097b1c58cea777c94ad7bd02dc49440ef0b1be80 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/psanet/psanet_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ac86306cb6089dc6f3c04bd02872b501090bb63b --- /dev/null +++ b/configs/psanet/psanet_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/psanet/psanet_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..abd8e56512ac6846817455d36800bb2c6c58ca4b --- /dev/null +++ b/configs/psanet/psanet_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/psanet/psanet_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..d3154a8f147a4edb7f2061b1402460f8a973cc84 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/psanet/psanet_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b34d4248e86d802927e03d7ee7950a71f723b0c7 --- /dev/null +++ b/configs/psanet/psanet_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..82463aaebaeeaee28cac8eb079f4ecee3a7180a1 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..af44b3080f0dcdee32fce7e64a40f5f962ba6681 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..5e5052f8c5361d685b61dfa5b71982730466113d --- /dev/null +++ b/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..0eaf83070354f722b9bf1ad98187de787208273f --- /dev/null +++ b/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/psanet/psanet_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/psanet/psanet_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..de13296afff122be25ac1a48967be2e2d014aabe --- /dev/null +++ b/configs/psanet/psanet_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(mask_size=(66, 66), num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/psanet/psanet_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/psanet/psanet_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..45d8762a090de5b5d7ca929c2136778c47009d56 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/psanet/psanet_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/psanet/psanet_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b5d99d1000e9ac608ab3bc026b013841739a4c48 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/psanet/psanet_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/psanet/psanet_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c3b65287e57057540042f1adca4bb1d7e3011846 --- /dev/null +++ b/configs/psanet/psanet_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(mask_size=(66, 66), num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/pspnet/README.md b/configs/pspnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4209d259b7e7c1238871cb10137ce02e46b995e3 --- /dev/null +++ b/configs/pspnet/README.md @@ -0,0 +1,182 @@ +# PSPNet + +> [Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Scene parsing is challenging for unrestricted open vocabulary and diverse scenes. In this paper, we exploit the capability of global context information by different-region-based context aggregation through our pyramid pooling module together with the proposed pyramid scene parsing network (PSPNet). Our global prior representation is effective to produce good quality results on the scene parsing task, while PSPNet provides a superior framework for pixel-level prediction tasks. The proposed approach achieves state-of-the-art performance on various datasets. It came first in ImageNet scene parsing challenge 2016, PASCAL VOC 2012 benchmark and Cityscapes benchmark. A single PSPNet yields new record of mIoU accuracy 85.4% on PASCAL VOC 2012 and accuracy 80.2% on Cityscapes. + + + +
+ +
+ +
+PSPNet-R50-D8 +PSPNet-R50 D8 model structure +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------------- | ------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-50-D8 | 512x1024 | 40000 | 6.1 | 4.07 | V100 | 77.85 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 40000 | 9.6 | 2.68 | V100 | 78.34 | 79.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-50-D8 | 769x769 | 40000 | 6.9 | 1.76 | V100 | 78.26 | 79.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json) | +| PSPNet | R-101-D8 | 769x769 | 40000 | 10.9 | 1.15 | V100 | 79.08 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json) | +| PSPNet | R-18-D8 | 512x1024 | 80000 | 1.7 | 15.71 | V100 | 74.87 | 76.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes-20201225_021458.log.json) | +| PSPNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 78.55 | 79.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json) | +| PSPNet | R-50b-D8 rsb | 512x1024 | 80000 | 6.2 | 3.82 | V100 | 78.47 | 79.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238-588c30be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 79.76 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json) | +| PSPNet (FP16) | R-101-D8 | 512x1024 | 80000 | 5.34 | 8.77 | V100 | 79.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919-a0875e5c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919.log.json) | +| PSPNet | R-18-D8 | 769x769 | 80000 | 1.9 | 6.20 | V100 | 75.90 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes-20201225_021458.log.json) | +| PSPNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.59 | 80.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json) | +| PSPNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.77 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.oz1z1penmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json) | +| PSPNet | R-18b-D8 | 512x1024 | 80000 | 1.5 | 16.28 | V100 | 74.23 | 75.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes-20201226_063116.log.json) | +| PSPNet | R-50b-D8 | 512x1024 | 80000 | 6.0 | 4.30 | V100 | 78.22 | 79.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes-20201225_094315.log.json) | +| PSPNet | R-101b-D8 | 512x1024 | 80000 | 9.5 | 2.76 | V100 | 79.69 | 80.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.41 | V100 | 74.92 | 76.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes-20201226_080942.log.json) | +| PSPNet | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.88 | V100 | 78.50 | 79.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes-20201225_094316.log.json) | +| PSPNet | R-101b-D8 | 769x769 | 80000 | 10.8 | 1.17 | V100 | 78.87 | 80.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes-20201226_171823.log.json) | +| PSPNet | R-50-D32 | 512x1024 | 80000 | 3.0 | 15.21 | V100 | 73.88 | 76.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840-9092b254.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840.log.json) | +| PSPNet | R-50b-D32 rsb | 512x1024 | 80000 | 3.1 | 16.08 | V100 | 74.09 | 77.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229-dd9c9610.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229.log.json) | +| PSPNet | R-50b-D32 | 512x1024 | 80000 | 2.9 | 15.41 | V100 | 72.61 | 75.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152-23bcaf8c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.53 | V100 | 41.13 | 41.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 12 | 15.30 | V100 | 43.57 | 44.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json) | +| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.48 | 43.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json) | +| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 44.39 | 45.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 20000 | 6.1 | 23.59 | V100 | 76.78 | 77.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json) | +| PSPNet | R-101-D8 | 512x512 | 20000 | 9.6 | 15.02 | V100 | 78.47 | 79.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) | +| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 77.29 | 78.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | +| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 78.52 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-101-D8 | 480x480 | 40000 | 8.8 | 9.68 | V100 | 46.60 | 47.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) | +| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | V100 | 46.03 | 47.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-101-D8 | 480x480 | 40000 | - | - | V100 | 52.02 | 53.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59_20210416_114524-86d44cd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59-20210416_114524.log.json) | +| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | V100 | 52.47 | 53.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59_20210416_114418-fa6caaa2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59-20210416_114418.log.json) | + +### Dark Zurich and Nighttime Driving + +We support evaluation results on these two datasets using models above trained on Cityscapes training set. + +| Method | Backbone | Training Dataset | Test Dataset | mIoU | config | evaluation checkpoint | +| ------ | --------- | ----------------------- | ------------------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-50-D8 | Cityscapes Training set | Dark Zurich | 10.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-50-D8 | Cityscapes Training set | Nighttime Driving | 23.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-50-D8 | Cityscapes Training set | Cityscapes Validation set | 77.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-101-D8 | Cityscapes Training set | Dark Zurich | 10.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-101-D8 | Cityscapes Training set | Nighttime Driving | 20.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-101-D8 | Cityscapes Training set | Cityscapes Validation set | 78.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-101b-D8 | Cityscapes Training set | Dark Zurich | 15.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-101b-D8 | Cityscapes Training set | Nighttime Driving | 22.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-101b-D8 | Cityscapes Training set | Cityscapes Validation set | 79.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | + +### COCO-Stuff 10k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 20000 | 9.6 | 20.5 | V100 | 35.69 | 36.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258-b88df27f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258.log.json) | +| PSPNet | R-101-D8 | 512x512 | 20000 | 13.2 | 11.1 | V100 | 37.26 | 38.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135-76aae482.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135.log.json) | +| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 36.33 | 37.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857-92e2902b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857.log.json) | +| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 37.76 | 38.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022-831aec95.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022.log.json) | + +### COCO-Stuff 164k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 80000 | 9.6 | 20.5 | V100 | 38.80 | 39.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-0e41b2db.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 13.2 | 11.1 | V100 | 40.34 | 40.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-7eb41789.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json) | +| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 39.64 | 39.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-51276a57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json) | +| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 41.28 | 41.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-4af9621b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json) | +| PSPNet | R-50-D8 | 512x512 | 320000 | - | - | V100 | 40.53 | 40.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-be9610cc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json) | +| PSPNet | R-101-D8 | 512x512 | 320000 | - | - | V100 | 41.95 | 42.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-72220c60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json) | + +### LoveDA + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-18-D8 | 512x512 | 80000 | 1.45 | 26.87 | V100 | 48.62 | 47.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100-b97697f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100.log.json) | +| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 6.60 | V100 | 50.46 | 50.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 4.58 | V100 | 51.86 | 51.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212-1c06c6a8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212.log.json) | + +### Potsdam + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-18-D8 | 512x512 | 80000 | 1.50 | 85.12 | V100 | 77.09 | 78.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612-7cd046e1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) | +| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.21 | V100 | 78.12 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.40 | V100 | 78.62 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) | + +### Vaihingen + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-18-D8 | 512x512 | 80000 | 1.45 | 85.06 | V100 | 71.46 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) | +| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.29 | V100 | 72.36 | 73.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.97 | V100 | 72.61 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806.log.json) | + +### iSAID + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-18-D8 | 896x896 | 80000 | 4.52 | 26.91 | V100 | 60.22 | 61.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526-e84c0b6a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | +| PSPNet | R-50-D8 | 896x896 | 80000 | 16.58 | 8.88 | V100 | 65.36 | 66.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629-1f21dc32.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629.log.json) | + +Note: + +- `FP16` means Mixed Precision (FP16) is adopted in training. +- `896x896` is the Crop Size of iSAID dataset, which is followed by the implementation of [PointFlow: Flowing Semantics Through Points for Aerial Image Segmentation](https://arxiv.org/pdf/2103.06564.pdf) +- `rsb` is short for 'Resnet strikes back'. +- The `b` in `R-50b` means ResNetV1b, which is a standard ResNet backbone. In MMSegmentation, default backbone is ResNetV1c, which usually performs better in semantic segmentation task. + +## Citation + +```bibtex +@inproceedings{zhao2017pspnet, + title={Pyramid Scene Parsing Network}, + author={Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya}, + booktitle={CVPR}, + year={2017} +} +``` + +```bibtex +@article{wightman2021resnet, + title={Resnet strikes back: An improved training procedure in timm}, + author={Wightman, Ross and Touvron, Hugo and J{\'e}gou, Herv{\'e}}, + journal={arXiv preprint arXiv:2110.00476}, + year={2021} +} +``` diff --git a/configs/pspnet/metafile.yaml b/configs/pspnet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d00b89d5cf780439533ab4189e7b8db1fea5574e --- /dev/null +++ b/configs/pspnet/metafile.yaml @@ -0,0 +1,1303 @@ +Collections: +- Name: PSPNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + - Dark Zurich and Nighttime Driving + - COCO-Stuff 10k + - COCO-Stuff 164k + - LoveDA + - Potsdam + - Vaihingen + - iSAID + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + README: configs/pspnet/README.md + Frameworks: + - PyTorch +Models: +- Name: pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.85 + mIoU(ms+flip): 79.18 + Config: configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.34 + mIoU(ms+flip): 79.74 + Config: configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.26 + mIoU(ms+flip): 79.88 + Config: configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.08 + mIoU(ms+flip): 80.28 + Config: configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.87 + mIoU(ms+flip): 76.04 + Config: configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes-20201225_021458.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.55 + mIoU(ms+flip): 79.79 + Config: configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.47 + mIoU(ms+flip): 79.45 + Config: configs/pspnet/pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238-588c30be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.76 + mIoU(ms+flip): 81.01 + Config: configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.46 + Config: configs/pspnet/pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + - (FP16) + Training Resources: 4x V100 GPUS + Memory (GB): 5.34 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919-a0875e5c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.9 + mIoU(ms+flip): 77.86 + Config: configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes-20201225_021458.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.59 + mIoU(ms+flip): 80.69 + Config: configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.77 + mIoU(ms+flip): 81.06 + Config: configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.oz1z1penmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.23 + mIoU(ms+flip): 75.79 + Config: configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes-20201226_063116.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.22 + mIoU(ms+flip): 79.46 + Config: configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes-20201225_094315.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.69 + mIoU(ms+flip): 80.79 + Config: configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18b-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.92 + mIoU(ms+flip): 76.9 + Config: configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes-20201226_080942.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50b-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.5 + mIoU(ms+flip): 79.96 + Config: configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes-20201225_094316.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101b-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.87 + mIoU(ms+flip): 80.04 + Config: configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes-20201226_171823.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.88 + mIoU(ms+flip): 76.85 + Config: configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840-9092b254.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.09 + mIoU(ms+flip): 77.18 + Config: configs/pspnet/pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D32 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229-dd9c9610.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.61 + mIoU(ms+flip): 75.51 + Config: configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D32 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 2.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152-23bcaf8c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.13 + mIoU(ms+flip): 41.94 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.57 + mIoU(ms+flip): 44.35 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.48 + mIoU(ms+flip): 43.44 + Config: configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.39 + mIoU(ms+flip): 45.35 + Config: configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.78 + mIoU(ms+flip): 77.61 + Config: configs/pspnet/pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.47 + mIoU(ms+flip): 79.25 + Config: configs/pspnet/pspnet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.29 + mIoU(ms+flip): 78.48 + Config: configs/pspnet/pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.52 + mIoU(ms+flip): 79.57 + Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-40k_pascal-context-480x480 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.6 + mIoU(ms+flip): 47.78 + Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_pascal-context-480x480 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.03 + mIoU(ms+flip): 47.15 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.02 + mIoU(ms+flip): 53.54 + Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59_20210416_114524-86d44cd4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59-20210416_114524.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.47 + mIoU(ms+flip): 53.99 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59_20210416_114418-fa6caaa2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59-20210416_114418.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 35.69 + mIoU(ms+flip): 36.62 + Config: configs/pspnet/pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258-b88df27f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.26 + mIoU(ms+flip): 38.52 + Config: configs/pspnet/pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135-76aae482.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 36.33 + mIoU(ms+flip): 37.24 + Config: configs/pspnet/pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857-92e2902b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.76 + mIoU(ms+flip): 38.86 + Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022-831aec95.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 38.8 + mIoU(ms+flip): 39.19 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-0e41b2db.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 40.34 + mIoU(ms+flip): 40.79 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-7eb41789.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 39.64 + mIoU(ms+flip): 39.97 + Config: configs/pspnet/pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-51276a57.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.28 + mIoU(ms+flip): 41.66 + Config: configs/pspnet/pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-4af9621b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 40.53 + mIoU(ms+flip): 40.75 + Config: configs/pspnet/pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-be9610cc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.95 + mIoU(ms+flip): 42.42 + Config: configs/pspnet/pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-72220c60.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb4-80k_loveda-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 48.62 + mIoU(ms+flip): 47.57 + Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.45 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100-b97697f1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_loveda-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.46 + mIoU(ms+flip): 50.19 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.14 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_loveda-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 51.86 + mIoU(ms+flip): 51.34 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212-1c06c6a8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb4-80k_potsdam-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 77.09 + mIoU(ms+flip): 78.3 + Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612-7cd046e1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_potsdam-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.12 + mIoU(ms+flip): 78.98 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.14 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_potsdam-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.62 + mIoU(ms+flip): 79.47 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb4-80k_vaihingen-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 71.46 + mIoU(ms+flip): 73.36 + Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.45 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_vaihingen-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.36 + mIoU(ms+flip): 73.75 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.14 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_vaihingen-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.61 + mIoU(ms+flip): 74.18 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb4-80k_isaid-896x896 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 60.22 + mIoU(ms+flip): 61.25 + Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 4.52 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526-e84c0b6a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_isaid-896x896 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 65.36 + mIoU(ms+flip): 66.48 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 16.58 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629-1f21dc32.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch diff --git a/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..f33d653b7621703a9e7948a09d56605dcdeb55a7 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py b/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py new file mode 100644 index 0000000000000000000000000000000000000000..5babaa885169930d924e787222a1eb5d2b23c020 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py' # noqa +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py b/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py new file mode 100644 index 0000000000000000000000000000000000000000..a9480c52f8edbc39775834e9e73124ea03a312a0 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py' # noqa +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-769x769.py b/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..e05cff6d8e03059714f1a2c940308d229af2407a --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6704cdd5d24d6631660059eac6fb04777c19e46f --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-769x769.py b/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..3733e691982b17ac9d0d8d99b3efd30e97dc2956 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..52f86b5e752b279fdf765ae15bfa796c5daff187 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py @@ -0,0 +1,6 @@ +_base_ = './pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py' +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005), + loss_scale=512.) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2231049b8a21cc48e819706f204b14db25ad1e47 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..f5390f8c7631db41acf20ae17e3799d8fbec14ce --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..84a986cd9de1878027b76d4d78ab97034a60bbd5 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-20k_voc12aug-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..71897ddc2d2131955fdb0128c3efcaa3c31bcfd2 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ebaea36da837ff8ec7c7fd72b98c1f2123bb9543 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2a55f53ee94d218605572e603fa5afc7e618ac28 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-480x480.py b/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..205d00bac9e24d9fc818a99ef3159652b185a68c --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-40k_pascal-context-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480.py b/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..0d7c176073202920255bb5c5c40c99ab1464a51e --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-40k_pascal-context-59-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-40k_voc12aug-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0599f31f9631fde33a6a275b6df01fb0e459271e --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-80k_ade20k-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..f95560347a5d3926ba00d1afcdd115dfea6cb1c2 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..4a34f9748583426297e08256e8754d60d1aab370 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-80k_loveda-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-80k_loveda-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..7076877980a9bd295caca603a7575f7f5311d1e4 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-80k_loveda-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_loveda-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-480x480.py b/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..0ac40dc861ba10e7b33637d00402cb7a3296beac --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_pascal-context-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480.py b/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..307188c783783ee89c88ec3564167e880df81a09 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_pascal-context-59-480x480.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-80k_potsdam-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-80k_potsdam-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..31ed2f2938adc59cac52ed7f0ab77e27959c8823 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-80k_potsdam-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_potsdam-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_4xb4-80k_vaihingen-512x512.py b/configs/pspnet/pspnet_r101-d8_4xb4-80k_vaihingen-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ac33ed7cdadaf6649c518176306a9dce8330f745 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_4xb4-80k_vaihingen-512x512.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..d2c0f696380dd12840c88320050b6f511ca195a1 --- /dev/null +++ b/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py b/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py new file mode 100644 index 0000000000000000000000000000000000000000..b1817441495b19eea61c690333416657ba834e13 --- /dev/null +++ b/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py' # noqa +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py b/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py new file mode 100644 index 0000000000000000000000000000000000000000..6a8994b4c820e3e31a4f8db41094bbbd582f7da8 --- /dev/null +++ b/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py' # noqa +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-769x769.py b/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..891bfd51ed3973abff5d8609ae2bf984b40c8953 --- /dev/null +++ b/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..a4b342ef23522f7778e4572631a7cd1bd027c8f9 --- /dev/null +++ b/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-769x769.py b/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..0e7f3e90ac49cd304407241da2ee5be42a98ef6f --- /dev/null +++ b/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18-d8_4xb4-80k_isaid-896x896.py b/configs/pspnet/pspnet_r18-d8_4xb4-80k_isaid-896x896.py new file mode 100644 index 0000000000000000000000000000000000000000..efce7a0e7de4e22d96ff4b2d4a480f6494ac1381 --- /dev/null +++ b/configs/pspnet/pspnet_r18-d8_4xb4-80k_isaid-896x896.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_isaid-896x896.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18-d8_4xb4-80k_loveda-512x512.py b/configs/pspnet/pspnet_r18-d8_4xb4-80k_loveda-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..80e2d20cbecbadcd01fbbfeefbe033e4302175da --- /dev/null +++ b/configs/pspnet/pspnet_r18-d8_4xb4-80k_loveda-512x512.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_loveda-512x512.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py b/configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1ef0585e799fdd4e9f9093b89433fbf6d176da5f --- /dev/null +++ b/configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_potsdam-512x512.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18-d8_4xb4-80k_vaihingen-512x512.py b/configs/pspnet/pspnet_r18-d8_4xb4-80k_vaihingen-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..51e66d2e5131063a41165f8ea0a91c9512e3cfbb --- /dev/null +++ b/configs/pspnet/pspnet_r18-d8_4xb4-80k_vaihingen-512x512.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..2e356c5c5fab1a043d5c2b77f91c9619e1310f41 --- /dev/null +++ b/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-769x769.py b/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..831354d4ce2df80019de431afee30f37c4037dd5 --- /dev/null +++ b/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/configs/pspnet/pspnet_r50-d32_4xb2-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r50-d32_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..5700b5b3b4553d6ab665804a467dfa76b0f941c4 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d32_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict(dilations=(1, 1, 2, 4), strides=(1, 2, 2, 2))) diff --git a/configs/pspnet/pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..13903293426107292394c14b8ea03a28a8c996ad --- /dev/null +++ b/configs/pspnet/pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024.py @@ -0,0 +1,35 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='ResNet', + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint), + dilations=(1, 1, 2, 4), + strides=(1, 2, 2, 2))) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=0.0005, weight_decay=0.05), + clip_grad=dict(max_norm=1, norm_type=2)) +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + by_epoch=False, + milestones=[60000, 72000], + ) +] diff --git a/configs/pspnet/pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..b83a0b447cdea6f9b636eaab9e0d5314602e6f67 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024.py @@ -0,0 +1,33 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='ResNet', + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint))) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=0.0005, weight_decay=0.05), + clip_grad=dict(max_norm=1, norm_type=2)) +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + by_epoch=False, + milestones=[60000, 72000], + ) +] diff --git a/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..a9dcb52b668cf7d3b67e53cc4796657afcfe82b1 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py b/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py new file mode 100644 index 0000000000000000000000000000000000000000..1bf4a135c7fc3edea4d79ae55492d802fb009b0b --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py @@ -0,0 +1,24 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(1920, 1080), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +test_dataloader = dict( + dataset=dict( + type='DarkZurichDataset', + data_root='data/dark_zurich/', + data_prefix=dict( + img_path='rgb_anon/val/night/GOPR0356', + seg_map_path='gt/val/night/GOPR0356'), + pipeline=test_pipeline)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py b/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py new file mode 100644 index 0000000000000000000000000000000000000000..b91258913104d67e319d2e3307cb1e259df339e4 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py @@ -0,0 +1,25 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] + +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(1920, 1080), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +test_dataloader = dict( + dataset=dict( + type='NightDrivingDataset', + data_root='data/NighttimeDrivingTest/', + data_prefix=dict( + img_path='leftImg8bit/test/night', + seg_map_path='gtCoarse_daytime_trainvaltest/test/night'), + pipeline=test_pipeline)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py b/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..6baa31baede6b7dba1ed51fb5cf22436141ffa19 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..6ea27de906e4f35e94f67f19406db8fc02606aa5 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py b/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py new file mode 100644 index 0000000000000000000000000000000000000000..200679ffdf8f6f491c6a130126857e4dd5cc9c4f --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py @@ -0,0 +1,25 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(1920, 1080), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +test_dataloader = dict( + dataset=dict( + type='DarkZurichDataset', + data_root='data/dark_zurich/', + data_prefix=dict( + img_path='rgb_anon/val/night/GOPR0356', + seg_map_path='gt/val/night/GOPR0356'), + pipeline=test_pipeline)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py b/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py new file mode 100644 index 0000000000000000000000000000000000000000..517381375f35bbc32573557863150efb4fe6a411 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py @@ -0,0 +1,25 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(1920, 1080), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +test_dataloader = dict( + dataset=dict( + type='NightDrivingDataset', + data_root='data/NighttimeDrivingTest/', + data_prefix=dict( + img_path='leftImg8bit/test/night', + seg_map_path='gtCoarse_daytime_trainvaltest/test/night'), + pipeline=test_pipeline)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py b/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..d43d30a0b6149de8f9057df1b8dff5260ab687a1 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..3d9164f2e1ad002de36d7235ba053e02e4acb5d1 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..6185c2efee4a3488dc2836a3e02cd107a9f8b77a --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..8c1ba2ddf06a719fe54017cbd87b49eaff39e1c9 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/coco-stuff10k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0f60819313c059b175e210559b512937552b3eee --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2a9ce4c4f134e14b8e85b23cd3c60675e8481ea1 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_320k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..fae57b0dbcddeeaad7efde5e7a169bf3acf51ebd --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/coco-stuff10k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-40k_pascal-context-480x480.py b/configs/pspnet/pspnet_r50-d8_4xb4-40k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..08a214448cc48c25480e489d1cf6318f3c1b9a0e --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-40k_pascal-context-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-40k_pascal-context-59-480x480.py b/configs/pspnet/pspnet_r50-d8_4xb4-40k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..b6544957325d362d042a1aa5ac93547f590c9641 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-40k_pascal-context-59-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c4a46115303ca35a13fa608da17d65a0ffd3cfe5 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-80k_ade20k-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..bb12aed85c0e6383ae0f2ede9899f882779d59dd --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..954a653456ae39cfc8c55143572625a185fb37ca --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=171), + auxiliary_head=dict(num_classes=171)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-80k_isaid-896x896.py b/configs/pspnet/pspnet_r50-d8_4xb4-80k_isaid-896x896.py new file mode 100644 index 0000000000000000000000000000000000000000..63165b608ef1a8ce763160f3c893db3d438e8279 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-80k_isaid-896x896.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/isaid.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (896, 896) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=16), + auxiliary_head=dict(num_classes=16)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-80k_loveda-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-80k_loveda-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..920729d3be0a8648a68c17f29c66b49452ed0ca5 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-80k_loveda-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/loveda.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=7), + auxiliary_head=dict(num_classes=7)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-80k_pascal-context-480x480.py b/configs/pspnet/pspnet_r50-d8_4xb4-80k_pascal-context-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..a7d82478ce8ea1adf518899b86af32bdc347f423 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-80k_pascal-context-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-80k_pascal-context-59-480x480.py b/configs/pspnet/pspnet_r50-d8_4xb4-80k_pascal-context-59-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..b7abc1bdd39edb47fd1c353b6964fb4ca60f0e97 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-80k_pascal-context-59-480x480.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (480, 480) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-80k_potsdam-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-80k_potsdam-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..afb3977ad2a7119ca38f49a366f4aac46e40c935 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-80k_potsdam-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/potsdam.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6)) diff --git a/configs/pspnet/pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py b/configs/pspnet/pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..35322d2df08fce1fe959f451524d52e1291a5dfe --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/vaihingen.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6)) diff --git a/configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..64e55090acf4db747ac20543c82b6156b8b8b6a3 --- /dev/null +++ b/configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='torchvision://resnet50', + backbone=dict(type='ResNet', dilations=(1, 1, 2, 4), strides=(1, 2, 2, 2))) diff --git a/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py b/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..7dd64b332f10a1fd4a2f0da0178a3e2006665f45 --- /dev/null +++ b/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-769x769.py b/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..3875c092fed129e4bce29c363b1685d9b430d3c5 --- /dev/null +++ b/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/configs/resnest/README.md b/configs/resnest/README.md new file mode 100644 index 0000000000000000000000000000000000000000..304791abe9666099a9b25e8aa8cf5b4c7cdeeda7 --- /dev/null +++ b/configs/resnest/README.md @@ -0,0 +1,54 @@ +# ResNeSt + +> [ResNeSt: Split-Attention Networks](https://arxiv.org/abs/2004.08955) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +It is well known that featuremap attention and multi-path representation are important for visual recognition. In this paper, we present a modularized architecture, which applies the channel-wise attention on different network branches to leverage their success in capturing cross-feature interactions and learning diverse representations. Our design results in a simple and unified computation block, which can be parameterized using only a few variables. Our model, named ResNeSt, outperforms EfficientNet in accuracy and latency trade-off on image classification. In addition, ResNeSt has achieved superior transfer learning results on several public benchmarks serving as the backbone, and has been adopted by the winning entries of COCO-LVIS challenge. The source code for complete system and pretrained models are publicly available. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | S-101-D8 | 512x1024 | 80000 | 11.4 | 2.39 | V100 | 77.56 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | +| PSPNet | S-101-D8 | 512x1024 | 80000 | 11.8 | 2.52 | V100 | 78.57 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | +| DeepLabV3 | S-101-D8 | 512x1024 | 80000 | 11.9 | 1.88 | V100 | 79.67 | 80.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | +| DeepLabV3+ | S-101-D8 | 512x1024 | 80000 | 13.2 | 2.36 | V100 | 79.62 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | S-101-D8 | 512x512 | 160000 | 14.2 | 12.86 | V100 | 45.62 | 46.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | +| PSPNet | S-101-D8 | 512x512 | 160000 | 14.2 | 13.02 | V100 | 45.44 | 46.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | +| DeepLabV3 | S-101-D8 | 512x512 | 160000 | 14.6 | 9.28 | V100 | 45.71 | 46.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | +| DeepLabV3+ | S-101-D8 | 512x512 | 160000 | 16.2 | 11.96 | V100 | 46.47 | 47.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | + +## Citation + +```bibtex +@article{zhang2020resnest, +title={ResNeSt: Split-Attention Networks}, +author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, +journal={arXiv preprint arXiv:2004.08955}, +year={2020} +} +``` diff --git a/configs/resnest/metafile.yaml b/configs/resnest/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b8d41ebfd992542582d6f2c973373a2972bc977 --- /dev/null +++ b/configs/resnest/metafile.yaml @@ -0,0 +1,193 @@ +Models: +- Name: resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.56 + mIoU(ms+flip): 78.98 + Config: configs/resnest/resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - S-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 11.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.57 + mIoU(ms+flip): 79.19 + Config: configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - S-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 11.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.67 + mIoU(ms+flip): 80.51 + Config: configs/resnest/resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - S-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 11.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.62 + mIoU(ms+flip): 80.27 + Config: configs/resnest/resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - S-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.62 + mIoU(ms+flip): 46.16 + Config: configs/resnest/resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - S-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 14.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k-20200807_145416.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.44 + mIoU(ms+flip): 46.28 + Config: configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - S-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 14.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k-20200807_145416.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.71 + mIoU(ms+flip): 46.59 + Config: configs/resnest/resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - S-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 14.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k-20200807_144503.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.47 + mIoU(ms+flip): 47.27 + Config: configs/resnest/resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - S-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 16.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k-20200807_144503.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch diff --git a/configs/resnest/resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py b/configs/resnest/resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..7ece894b56703b0f11f472079f48997285aa7d72 --- /dev/null +++ b/configs/resnest/resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512.py b/configs/resnest/resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c2852301fcb01cd91231107a7ab03a326c8980a1 --- /dev/null +++ b/configs/resnest/resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py b/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..5c43a9547deaa554cedc741e6042b459b015cb92 --- /dev/null +++ b/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py' # noqa +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py b/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ce39d3709f4baf2b64f46e4bbeffa78498b759ba --- /dev/null +++ b/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024.py b/configs/resnest/resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..fc333e4ff07c00935f29d8ee5844370828857dd0 --- /dev/null +++ b/configs/resnest/resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,9 @@ +_base_ = '../fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512.py b/configs/resnest/resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..af127334442a027bb29dce543978d6bc96dc601d --- /dev/null +++ b/configs/resnest/resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = '../fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py b/configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..3aab5244497819c08fd56b0d65a40c20f5696cba --- /dev/null +++ b/configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py @@ -0,0 +1,9 @@ +_base_ = '../pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py b/configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..66e6639c183b09763cd1a1ba47a3af7c71b42f3a --- /dev/null +++ b/configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = '../pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/configs/san/README.md b/configs/san/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23e72aa65fe51e6b1d8070cb21ad31e3e543c169 --- /dev/null +++ b/configs/san/README.md @@ -0,0 +1,47 @@ +# SAN + +> [Side Adapter Network for Open-Vocabulary Semantic Segmentation](https://arxiv.org/abs/2302.12242) + +## Introduction + + + +Official Repo + +## Abstract + + + +This paper presents a new framework for open-vocabulary semantic segmentation with the pre-trained vision-language model, named Side Adapter Network (SAN). Our approach models the semantic segmentation task as a region recognition problem. A side network is attached to a frozen CLIP model with two branches: one for predicting mask proposals, and the other for predicting attention bias which is applied in the CLIP model to recognize the class of masks. This decoupled design has the benefit CLIP in recognizing the class of mask proposals. Since the attached side network can reuse CLIP features, it can be very light. In addition, the entire network can be trained end-to-end, allowing the side network to be adapted to the frozen CLIP model, which makes the predicted mask proposals CLIP-aware. Our approach is fast, accurate, and only adds a few additional trainable parameters. We evaluate our approach on multiple semantic segmentation benchmarks. Our method significantly outperforms other counterparts, with up to 18 times fewer trainable parameters and 19 times faster inference speed. We hope our approach will serve as a solid baseline and help ease future research in open-vocabulary semantic segmentation. + + + +
+ +
+ +## Results and models + +### COCO-Stuff164k + +| Method | Backbone | Pretrained | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | ------------ | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| SAN | ViT-B_16 | CLIP_ViT-B16 | 640x640 | 60000 | 12.61 | - | V100 | 41.93 | 41.77 | - | [model](https://download.openmmlab.com/mmsegmentation/v0.5/san/san-vit-b16_20230906-fd0a7684.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/san/san-vit-b16_20230906.log) | +| SAN | ViT-L_14 | CLIP_ViT-L14 | 640x640 | 60000 | 22.84 | - | V100 | 45.78 | 43.99 | - | [model](https://download.openmmlab.com/mmsegmentation/v0.5/san/san-vit-l14_20230907-a11e098f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/san/san-vit-l14_20230907.log) | + +## Notes + +git push +The pretrained weights in config files are converted from open_clip models using tools/model_converters/clip2mmseg.py. + +## Citation + +```bibtex +@inproceedings{xu2023side, + title={Side adapter network for open-vocabulary semantic segmentation}, + author={Xu, Mengde and Zhang, Zheng and Wei, Fangyun and Hu, Han and Bai, Xiang}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={2945--2954}, + year={2023} +} +``` diff --git a/configs/san/metafile.yaml b/configs/san/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..117d088af036f4b640d19b9a55c73027596414e5 --- /dev/null +++ b/configs/san/metafile.yaml @@ -0,0 +1,61 @@ +Collections: +- Name: SAN + License: Apache License 2.0 + Metadata: + Training Data: + - COCO-Stuff 164k + Paper: + Title: 'Side Adapter Network for Open-Vocabulary Semantic Segmentation' + URL: https://arxiv.org/abs/2302.12242 + README: configs/san/README.md + Frameworks: + - PyTorch +Models: +- Name: san-vit-b16_coco-stuff164k-640x640 + In Collection: SAN + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.93 + mIoU(ms+flip): 41.77 + Config: configs/san/san-vit-b16_coco-stuff164k-640x640.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - SAN + - ViT + Training Resources: 8x V100 GPUS + Memory (GB): 12.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/san/san-vit-b16_20230906-fd0a7684.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/san/san-vit-b16_20230906.log + Paper: + Title: 'Side Adapter Network for Open-Vocabulary Semantic Segmentation' + URL: https://arxiv.org/abs/2302.12242 + Code: https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/san_head.py#L470 + Framework: PyTorch +- Name: san-vit-l14_coco-stuff164k-640x640 + In Collection: SAN + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 45.78 + mIoU(ms+flip): 43.99 + Config: configs/san/san-vit-l14_coco-stuff164k-640x640.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - SAN + - ViT + Training Resources: 8x V100 GPUS + Memory (GB): 12.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/san/san-vit-l14_20230907-a11e098f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/san/san-vit-l14_20230907.log + Paper: + Title: 'Side Adapter Network for Open-Vocabulary Semantic Segmentation' + URL: https://arxiv.org/abs/2302.12242 + Code: https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/san_head.py#L470 + Framework: PyTorch diff --git a/configs/san/san-vit-b16_coco-stuff164k-640x640.py b/configs/san/san-vit-b16_coco-stuff164k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..40592486d1e10c295ffc2860412ef78b602c50fe --- /dev/null +++ b/configs/san/san-vit-b16_coco-stuff164k-640x640.py @@ -0,0 +1,82 @@ +_base_ = [ + '../_base_/models/san_vit-b16.py', '../_base_/datasets/coco-stuff164k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomChoiceResize', + scales=[int(640 * x * 0.1) for x in range(5, 16)], + resize_type='ResizeShortestEdge', + max_size=2560), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=1.0), + dict(type='PhotoMetricDistortion'), + dict(type='RandomFlip', prob=0.5), + dict(type='PackSegInputs') +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='ResizeShortestEdge', scale=crop_size, max_size=2560), + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] + +# By default, models are trained on 4 GPUs with 8 images per GPU +train_dataloader = dict(batch_size=8, dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/san/clip_vit-base-patch16-224_3rdparty-d08f8887.pth' # noqa +data_preprocessor = dict( + mean=[122.7709, 116.7460, 104.0937], + std=[68.5005, 66.6322, 70.3232], + size_divisor=640, + test_cfg=dict(size_divisor=32)) +model = dict( + pretrained=pretrained, + text_encoder=dict(dataset_name='coco-stuff164k'), + decode_head=dict(num_classes=171)) + +# training schedule for 60k +train_cfg = dict( + type='IterBasedTrainLoop', + max_iters=60000, + val_interval=500, + val_begin=55000) +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + by_epoch=False, + interval=10000, + save_best='mIoU')) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.0001), + paramwise_cfg=dict( + custom_keys={ + 'img_encoder': dict(lr_mult=0.1, decay_mult=1.0), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + }), + loss_scale='dynamic', + clip_grad=dict(max_norm=0.01, norm_type=2)) + +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=0, + end=60000, + by_epoch=False, + ) +] diff --git a/configs/san/san-vit-b16_pascal_context-640x640.py b/configs/san/san-vit-b16_pascal_context-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..b164fe41fd97c9bf035c6628ed92283e7f851b9b --- /dev/null +++ b/configs/san/san-vit-b16_pascal_context-640x640.py @@ -0,0 +1,56 @@ +_base_ = [ + '../_base_/models/san_vit-b16.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='ResizeShortestEdge', scale=crop_size, max_size=2560), + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +data_preprocessor = dict( + mean=[122.7709, 116.7460, 104.0937], + std=[68.5005, 66.6322, 70.3232], + size_divisor=640, + test_cfg=dict(size_divisor=32)) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='pretrain/vit_base_patch16_224.pth', + text_encoder=dict(dataset_name='pascal_context'), + decode_head=dict(num_classes=59)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] diff --git a/configs/san/san-vit-b16_voc12aug-640x640.py b/configs/san/san-vit-b16_voc12aug-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..62e9b26f0af3d8679680adcebac1f19aa005f776 --- /dev/null +++ b/configs/san/san-vit-b16_voc12aug-640x640.py @@ -0,0 +1,65 @@ +_base_ = [ + '../_base_/models/san_vit-b16.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) + +metainfo = dict( + classes=('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', + 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', + 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'), + palette=[[128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]) +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='ResizeShortestEdge', scale=crop_size, max_size=2560), + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict( + batch_size=1, dataset=dict(metainfo=metainfo, pipeline=test_pipeline)) +test_dataloader = val_dataloader + +data_preprocessor = dict( + mean=[122.7709, 116.7460, 104.0937], + std=[68.5005, 66.6322, 70.3232], + size_divisor=640, + test_cfg=dict(size_divisor=32)) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='pretrain/vit_base_patch16_224.pth', + text_encoder=dict(dataset_name='voc'), + decode_head=dict(num_classes=20)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] diff --git a/configs/san/san-vit-l14_coco-stuff164k-640x640.py b/configs/san/san-vit-l14_coco-stuff164k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..c34328db3feef2d8704caaef59426b072c9b8171 --- /dev/null +++ b/configs/san/san-vit-l14_coco-stuff164k-640x640.py @@ -0,0 +1,36 @@ +_base_ = ['./san-vit-b16_coco-stuff164k-640x640.py'] + +pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/san/clip_vit-large-patch14-336_3rdparty-0b5df9cb.pth' # noqa +model = dict( + type='MultimodalEncoderDecoder', + pretrained=pretrained, + encoder_resolution=0.7, + image_encoder=dict( + type='VisionTransformer', + img_size=(336, 336), + patch_size=14, + patch_pad=0, + embed_dims=1024, + num_layers=18, + num_heads=16, + out_indices=(5, 11, 17), + ), + text_encoder=dict( + type='CLIPTextEncoder', + embed_dims=768, + num_layers=12, + num_heads=12, + output_dims=768, + ), + decode_head=dict( + type='SideAdapterCLIPHead', + san_cfg=dict(clip_channels=1024, cfg_decoder=dict(num_heads=16)), + maskgen_cfg=dict( + num_layers=6, + embed_dims=1024, + num_heads=16, + out_dims=768, + ))) + +# By default, models are trained on 8 GPUs with 4 images per GPU +train_dataloader = dict(batch_size=4) diff --git a/configs/san/san-vit-l14_pascal_context-640x640.py b/configs/san/san-vit-l14_pascal_context-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..a9545fac8e04e3138f0f151d093ac22fa0590efb --- /dev/null +++ b/configs/san/san-vit-l14_pascal_context-640x640.py @@ -0,0 +1,32 @@ +_base_ = ['./san-vit-b16_pascal_context-640x640.py'] + +model = dict( + type='MultimodalEncoderDecoder', + pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth', + encoder_resolution=0.7, + image_encoder=dict( + type='VisionTransformer', + img_size=(336, 336), + patch_size=14, + patch_pad=0, + embed_dims=1024, + num_layers=18, + num_heads=16, + out_indices=(5, 11, 17), + ), + text_encoder=dict( + type='CLIPTextEncoder', + embed_dims=768, + num_layers=12, + num_heads=12, + output_dims=768, + ), + decode_head=dict( + type='SideAdapterCLIPHead', + san_cfg=dict(clip_channels=1024, cfg_decoder=dict(num_heads=16)), + maskgen_cfg=dict( + num_layers=6, + embed_dims=1024, + num_heads=16, + out_dims=768, + ))) diff --git a/configs/san/san-vit-l14_voc12aug-640x640.py b/configs/san/san-vit-l14_voc12aug-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..2f377150394df02b0af0fd8e5bcbfb127d5e403d --- /dev/null +++ b/configs/san/san-vit-l14_voc12aug-640x640.py @@ -0,0 +1,32 @@ +_base_ = ['./san-vit-b16_voc12aug-640x640.py'] + +model = dict( + type='MultimodalEncoderDecoder', + pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth', + encoder_resolution=0.7, + image_encoder=dict( + type='VisionTransformer', + img_size=(336, 336), + patch_size=14, + patch_pad=0, + embed_dims=1024, + num_layers=18, + num_heads=16, + out_indices=(5, 11, 17), + ), + text_encoder=dict( + type='CLIPTextEncoder', + embed_dims=768, + num_layers=12, + num_heads=12, + output_dims=768, + ), + decode_head=dict( + type='SideAdapterCLIPHead', + san_cfg=dict(clip_channels=1024, cfg_decoder=dict(num_heads=16)), + maskgen_cfg=dict( + num_layers=6, + embed_dims=1024, + num_heads=16, + out_dims=768, + ))) diff --git a/configs/segformer/README.md b/configs/segformer/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8999b0efa5034a115b64efba127cdf8e5c2a113 --- /dev/null +++ b/configs/segformer/README.md @@ -0,0 +1,101 @@ +# SegFormer + +> [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding, thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from different layers, and thus combining both local attention and global attention to render powerful representations. We show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters, being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C. Code will be released at: [this http URL](https://github.com/NVlabs/SegFormer). + + + +
+ +
+ +## Usage + +To use other repositories' pre-trained models, it is necessary to convert keys. + +We provide a script [`mit2mmseg.py`](../../tools/model_converters/mit2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/NVlabs/SegFormer) to MMSegmentation style. + +```shell +python tools/model_converters/mit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | -------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Segformer | MIT-B0 | 512x512 | 160000 | 2.1 | 51.32 | 1080 Ti | 37.41 | 38.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b0_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530-8ffa8fda.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530.log.json) | +| Segformer | MIT-B1 | 512x512 | 160000 | 2.6 | 47.66 | TITAN Xp | 40.97 | 42.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b1_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106-d70e859d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106.log.json) | +| Segformer | MIT-B2 | 512x512 | 160000 | 3.6 | 30.88 | TITAN Xp | 45.58 | 47.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b2_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103-cbd414ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103.log.json) | +| Segformer | MIT-B3 | 512x512 | 160000 | 4.8 | 22.11 | V100 | 47.82 | 48.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b3_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410-962b98d2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410.log.json) | +| Segformer | MIT-B4 | 512x512 | 160000 | 6.1 | 15.45 | V100 | 48.46 | 49.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b4_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055-7f509d7d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055.log.json) | +| Segformer | MIT-B5 | 512x512 | 160000 | 7.2 | 11.89 | V100 | 49.13 | 50.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235.log.json) | +| Segformer | MIT-B5 | 640x640 | 160000 | 11.5 | 11.30 | V100 | 49.62 | 50.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243.log.json) | + +Evaluation with AlignedResize: + +| Method | Backbone | Crop Size | Lr schd | mIoU | mIoU(ms+flip) | +| --------- | -------- | --------- | ------: | ----: | ------------- | +| Segformer | MIT-B0 | 512x512 | 160000 | 38.1 | 38.57 | +| Segformer | MIT-B1 | 512x512 | 160000 | 41.64 | 42.76 | +| Segformer | MIT-B2 | 512x512 | 160000 | 46.53 | 47.49 | +| Segformer | MIT-B3 | 512x512 | 160000 | 48.46 | 49.14 | +| Segformer | MIT-B4 | 512x512 | 160000 | 49.34 | 50.29 | +| Segformer | MIT-B5 | 512x512 | 160000 | 50.08 | 50.72 | +| Segformer | MIT-B5 | 640x640 | 160000 | 50.58 | 50.8 | + +We replace `AlignedResize` in original implementatiuon to `Resize + ResizeToMultiple`. If you want to test by +using `AlignedResize`, you can change the dataset pipeline like this: + +```python +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + # resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU. + dict(type='ResizeToMultiple', size_divisor=32), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +``` + +### Cityscapes + +The lower fps result is caused by the sliding window inference scheme (window size:1024x1024). + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Segformer | MIT-B0 | 1024x1024 | 160000 | 3.64 | 4.74 | V100 | 76.54 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857.log.json) | +| Segformer | MIT-B1 | 1024x1024 | 160000 | 4.49 | 4.3 | V100 | 78.56 | 79.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b1_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213.log.json) | +| Segformer | MIT-B2 | 1024x1024 | 160000 | 7.42 | 3.36 | V100 | 81.08 | 82.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b2_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205.log.json) | +| Segformer | MIT-B3 | 1024x1024 | 160000 | 10.86 | 2.53 | V100 | 81.94 | 83.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b3_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823.log.json) | +| Segformer | MIT-B4 | 1024x1024 | 160000 | 15.07 | 1.88 | V100 | 81.89 | 83.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b4_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709.log.json) | +| Segformer | MIT-B5 | 1024x1024 | 160000 | 18.00 | 1.39 | V100 | 82.25 | 83.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b5_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934.log.json) | + +## Citation + +```bibtex +@article{xie2021segformer, + title={SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers}, + author={Xie, Enze and Wang, Wenhai and Yu, Zhiding and Anandkumar, Anima and Alvarez, Jose M and Luo, Ping}, + journal={arXiv preprint arXiv:2105.15203}, + year={2021} +} +``` diff --git a/configs/segformer/metafile.yaml b/configs/segformer/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fb38d745baed41ef272451f690a4929c6548f94 --- /dev/null +++ b/configs/segformer/metafile.yaml @@ -0,0 +1,340 @@ +Collections: +- Name: Segformer + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + - Cityscapes + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + README: configs/segformer/README.md + Frameworks: + - PyTorch +Models: +- Name: segformer_mit-b0_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.41 + mIoU(ms+flip): 38.34 + Config: configs/segformer/segformer_mit-b0_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B0 + - Segformer + Training Resources: 8x 1080 Ti GPUS + Memory (GB): 2.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530-8ffa8fda.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b1_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.97 + mIoU(ms+flip): 42.54 + Config: configs/segformer/segformer_mit-b1_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B1 + - Segformer + Training Resources: 8x TITAN Xp GPUS + Memory (GB): 2.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106-d70e859d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b2_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.58 + mIoU(ms+flip): 47.03 + Config: configs/segformer/segformer_mit-b2_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B2 + - Segformer + Training Resources: 8x TITAN Xp GPUS + Memory (GB): 3.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103-cbd414ac.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b3_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.82 + mIoU(ms+flip): 48.81 + Config: configs/segformer/segformer_mit-b3_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B3 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 4.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410-962b98d2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b4_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.46 + mIoU(ms+flip): 49.76 + Config: configs/segformer/segformer_mit-b4_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B4 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055-7f509d7d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b5_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.13 + mIoU(ms+flip): 50.22 + Config: configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B5 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 7.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b5_8xb2-160k_ade20k-640x640 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.62 + mIoU(ms+flip): 50.36 + Config: configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B5 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 11.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b0_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.54 + mIoU(ms+flip): 78.22 + Config: configs/segformer/segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B0 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 3.64 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b1_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.56 + mIoU(ms+flip): 79.73 + Config: configs/segformer/segformer_mit-b1_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B1 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 4.49 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b2_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.08 + mIoU(ms+flip): 82.18 + Config: configs/segformer/segformer_mit-b2_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B2 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 7.42 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b3_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.94 + mIoU(ms+flip): 83.14 + Config: configs/segformer/segformer_mit-b3_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B3 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 10.86 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b4_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.89 + mIoU(ms+flip): 83.38 + Config: configs/segformer/segformer_mit-b4_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B4 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 15.07 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b5_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 82.25 + mIoU(ms+flip): 83.48 + Config: configs/segformer/segformer_mit-b5_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B5 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 18.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch diff --git a/configs/segformer/segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py b/configs/segformer/segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..1280047c9420181e5c4bcb2a18443804d282affc --- /dev/null +++ b/configs/segformer/segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py @@ -0,0 +1,41 @@ +_base_ = [ + '../_base_/models/segformer_mit-b0.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (1024, 1024) +data_preprocessor = dict(size=crop_size) +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=checkpoint)), + test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768))) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.), + 'head': dict(lr_mult=10.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] + +train_dataloader = dict(batch_size=1, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/segformer/segformer_mit-b0_8xb2-160k_ade20k-512x512.py b/configs/segformer/segformer_mit-b0_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..4a9476df684593ab5d52746c7e796eae9ff5e43b --- /dev/null +++ b/configs/segformer/segformer_mit-b0_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=checkpoint)), + decode_head=dict(num_classes=150)) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.), + 'head': dict(lr_mult=10.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/segformer/segformer_mit-b1_8xb1-160k_cityscapes-1024x1024.py b/configs/segformer/segformer_mit-b1_8xb1-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..85c126ead42d4d54195d6730e079ecdd2479ddd2 --- /dev/null +++ b/configs/segformer/segformer_mit-b1_8xb1-160k_cityscapes-1024x1024.py @@ -0,0 +1,9 @@ +_base_ = ['./segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b1_8xb2-160k_ade20k-512x512.py b/configs/segformer/segformer_mit-b1_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1ff21b8becf7453f4920f099e515ea2c510de8a0 --- /dev/null +++ b/configs/segformer/segformer_mit-b1_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,12 @@ +_base_ = ['./segformer_mit-b0_8xb2-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth' # noqa + +# model settings +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[2, 2, 2, 2]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b2_8xb1-160k_cityscapes-1024x1024.py b/configs/segformer/segformer_mit-b2_8xb1-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..c802f275b5776abfb49dd7773d0c14b97ac3e4d2 --- /dev/null +++ b/configs/segformer/segformer_mit-b2_8xb1-160k_cityscapes-1024x1024.py @@ -0,0 +1,10 @@ +_base_ = ['./segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_layers=[3, 4, 6, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b2_8xb2-160k_ade20k-512x512.py b/configs/segformer/segformer_mit-b2_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..0f4c1af061f55f7e2f8bc6845856dbc53327684d --- /dev/null +++ b/configs/segformer/segformer_mit-b2_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,12 @@ +_base_ = ['./segformer_mit-b0_8xb2-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth' # noqa + +# model settings +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 4, 6, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b3_8xb1-160k_cityscapes-1024x1024.py b/configs/segformer/segformer_mit-b3_8xb1-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..9b41ad0b3913e341bee9d2dd44585206677ec301 --- /dev/null +++ b/configs/segformer/segformer_mit-b3_8xb1-160k_cityscapes-1024x1024.py @@ -0,0 +1,10 @@ +_base_ = ['./segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_layers=[3, 4, 18, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b3_8xb2-160k_ade20k-512x512.py b/configs/segformer/segformer_mit-b3_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a2cc13d847d0a159377c8568be40caa4d16ad793 --- /dev/null +++ b/configs/segformer/segformer_mit-b3_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,12 @@ +_base_ = ['./segformer_mit-b0_8xb2-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth' # noqa + +# model settings +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 4, 18, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b4_8xb1-160k_cityscapes-1024x1024.py b/configs/segformer/segformer_mit-b4_8xb1-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..5fb16080dd57095dbd8d7263403e3254c51ec55a --- /dev/null +++ b/configs/segformer/segformer_mit-b4_8xb1-160k_cityscapes-1024x1024.py @@ -0,0 +1,10 @@ +_base_ = ['./segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_layers=[3, 8, 27, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b4_8xb2-160k_ade20k-512x512.py b/configs/segformer/segformer_mit-b4_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5f39c3010809148a9634ffb321f581139016de8f --- /dev/null +++ b/configs/segformer/segformer_mit-b4_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,12 @@ +_base_ = ['./segformer_mit-b0_8xb2-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth' # noqa + +# model settings +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 8, 27, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b5_8xb1-160k_cityscapes-1024x1024.py b/configs/segformer/segformer_mit-b5_8xb1-160k_cityscapes-1024x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..18c3c162588b2bd041aa07d84e0b6fda78a58096 --- /dev/null +++ b/configs/segformer/segformer_mit-b5_8xb1-160k_cityscapes-1024x1024.py @@ -0,0 +1,10 @@ +_base_ = ['./segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_layers=[3, 6, 40, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-512x512.py b/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1e9a209ebea8378371d36236c194c6417a4013ae --- /dev/null +++ b/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,12 @@ +_base_ = ['./segformer_mit-b0_8xb2-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth' # noqa + +# model settings +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 6, 40, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-640x640.py b/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..a32eb7c1e1adedf7d1dcb41d5fd4413b71e25932 --- /dev/null +++ b/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-640x640.py @@ -0,0 +1,41 @@ +_base_ = ['./segformer_mit-b0_8xb2-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth' # noqa + +# dataset settings +crop_size = (640, 640) +data_preprocessor = dict(size=crop_size) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(2048, 640), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 640), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(batch_size=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +# model settings +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 6, 40, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segmenter/README.md b/configs/segmenter/README.md new file mode 100644 index 0000000000000000000000000000000000000000..103b1254729e596494998f674e100f1c356ad4be --- /dev/null +++ b/configs/segmenter/README.md @@ -0,0 +1,76 @@ +# Segmenter + +> [Segmenter: Transformer for Semantic Segmentation](https://arxiv.org/abs/2105.05633) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Image segmentation is often ambiguous at the level of individual image patches and requires contextual information to reach label consensus. In this paper we introduce Segmenter, a transformer model for semantic segmentation. In contrast to convolution-based methods, our approach allows to model global context already at the first layer and throughout the network. We build on the recent Vision Transformer (ViT) and extend it to semantic segmentation. To do so, we rely on the output embeddings corresponding to image patches and obtain class labels from these embeddings with a point-wise linear decoder or a mask transformer decoder. We leverage models pre-trained for image classification and show that we can fine-tune them on moderate sized datasets available for semantic segmentation. The linear decoder allows to obtain excellent results already, but the performance can be further improved by a mask transformer generating class masks. We conduct an extensive ablation study to show the impact of the different parameters, in particular the performance is better for large models and small patch sizes. Segmenter attains excellent results for semantic segmentation. It outperforms the state of the art on both ADE20K and Pascal Context datasets and is competitive on Cityscapes. + + + +
+ +
+ +## Usage + +We have provided pretrained models converted from [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106). + +If you want to convert keys on your own to use the pre-trained ViT model from [Segmenter](https://github.com/rstrudel/segmenter), we also provide a script [`vitjax2mmseg.py`](../../tools/model_converters/vitjax2mmseg.py) in the tools directory to convert the key of models from [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106) to MMSegmentation style. + +```shell +python tools/model_converters/vitjax2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/vitjax2mmseg.py \ +Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz \ +pretrain/vit_tiny_p16_384.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +In our default setting, pretrained models and their corresponding [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106) models could be defined below: + +| pretrained models | original models | +| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| vit_tiny_p16_384.pth | [vit_tiny_patch16_384](https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz) | +| vit_small_p16_384.pth | [vit_small_patch16_384](https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz) | +| vit_base_p16_384.pth | [vit_base_patch16_384](https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz) | +| vit_large_p16_384.pth | [vit_large_patch16_384](https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz) | + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | -------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Segmenter Mask | ViT-T_16 | 512x512 | 160000 | 1.21 | 27.98 | V100 | 39.99 | 40.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706-ffcf7509.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | +| Segmenter Linear | ViT-S_16 | 512x512 | 160000 | 1.78 | 28.07 | V100 | 45.75 | 46.82 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713-39658c46.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713.log.json) | +| Segmenter Mask | ViT-S_16 | 512x512 | 160000 | 2.03 | 24.80 | V100 | 46.19 | 47.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706-511bb103.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | +| Segmenter Mask | ViT-B_16 | 512x512 | 160000 | 4.20 | 13.20 | V100 | 49.60 | 51.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-b_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706-bc533b08.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | +| Segmenter Mask | ViT-L_16 | 640x640 | 160000 | 16.56 | 2.62 | V100 | 52.16 | 53.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-l_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750-7ef345be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750.log.json) | + +## Citation + +```bibtex +@inproceedings{strudel2021segmenter, + title={Segmenter: Transformer for semantic segmentation}, + author={Strudel, Robin and Garcia, Ricardo and Laptev, Ivan and Schmid, Cordelia}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={7262--7272}, + year={2021} +} +``` diff --git a/configs/segmenter/metafile.yaml b/configs/segmenter/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff2aa448bb28e43ee61209650241e730d12d3160 --- /dev/null +++ b/configs/segmenter/metafile.yaml @@ -0,0 +1,138 @@ +Collections: +- Name: Segmenter + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + README: configs/segmenter/README.md + Frameworks: + - PyTorch +Models: +- Name: segmenter_vit-t_mask_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.99 + mIoU(ms+flip): 40.83 + Config: configs/segmenter/segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-T_16 + - Segmenter + - Mask + Training Resources: 8x V100 GPUS + Memory (GB): 1.21 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706-ffcf7509.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch +- Name: segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.75 + mIoU(ms+flip): 46.82 + Config: configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-S_16 + - Segmenter + - Linear + Training Resources: 8x V100 GPUS + Memory (GB): 1.78 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713-39658c46.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch +- Name: segmenter_vit-s_mask_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.19 + mIoU(ms+flip): 47.85 + Config: configs/segmenter/segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-S_16 + - Segmenter + - Mask + Training Resources: 8x V100 GPUS + Memory (GB): 2.03 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706-511bb103.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch +- Name: segmenter_vit-b_mask_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.6 + mIoU(ms+flip): 51.07 + Config: configs/segmenter/segmenter_vit-b_mask_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-B_16 + - Segmenter + - Mask + Training Resources: 8x V100 GPUS + Memory (GB): 4.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706-bc533b08.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch +- Name: segmenter_vit-l_mask_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.16 + mIoU(ms+flip): 53.65 + Config: configs/segmenter/segmenter_vit-l_mask_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-L_16 + - Segmenter + - Mask + Training Resources: 8x V100 GPUS + Memory (GB): 16.56 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750-7ef345be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch diff --git a/configs/segmenter/segmenter_vit-b_mask_8xb1-160k_ade20k-512x512.py b/configs/segmenter/segmenter_vit-b_mask_8xb1-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a4bae50648df3fbd8bc1b20778fae50da36a2156 --- /dev/null +++ b/configs/segmenter/segmenter_vit-b_mask_8xb1-160k_ade20k-512x512.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/segmenter_vit-b16_mask.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +optimizer = dict(lr=0.001, weight_decay=0.0) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict( + # num_gpus: 8 -> batch_size: 8 + batch_size=1) +val_dataloader = dict(batch_size=1) diff --git a/configs/segmenter/segmenter_vit-l_mask_8xb1-160k_ade20k-512x512.py b/configs/segmenter/segmenter_vit-l_mask_8xb1-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..302acdecb6ff37c241d5e3abcec5e6f3beb96dfc --- /dev/null +++ b/configs/segmenter/segmenter_vit-l_mask_8xb1-160k_ade20k-512x512.py @@ -0,0 +1,32 @@ +_base_ = [ + '../_base_/models/segmenter_vit-b16_mask.py', + '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) +data_preprocessor = dict(size=crop_size) +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_large_p16_384_20220308-d4efb41d.pth' # noqa + +model = dict( + data_preprocessor=data_preprocessor, + pretrained=checkpoint, + backbone=dict( + type='VisionTransformer', + img_size=(640, 640), + embed_dims=1024, + num_layers=24, + num_heads=16), + decode_head=dict( + type='SegmenterMaskTransformerHead', + in_channels=1024, + channels=1024, + num_heads=16, + embed_dims=1024), + test_cfg=dict(mode='slide', crop_size=(640, 640), stride=(608, 608))) + +optimizer = dict(lr=0.001, weight_decay=0.0) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict( + # num_gpus: 8 -> batch_size: 8 + batch_size=1) +val_dataloader = dict(batch_size=1) diff --git a/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py b/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..dc1e4c898539eaeffaa8a94cd8d9a80f06762a03 --- /dev/null +++ b/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py @@ -0,0 +1,14 @@ +_base_ = './segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py' + +model = dict( + decode_head=dict( + _delete_=True, + type='FCNHead', + in_channels=384, + channels=384, + num_convs=0, + dropout_ratio=0.0, + concat_input=False, + num_classes=150, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) diff --git a/configs/segmenter/segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py b/configs/segmenter/segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b19fd41409849374b3148e5edb2e01d299b925a5 --- /dev/null +++ b/configs/segmenter/segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py @@ -0,0 +1,36 @@ +_base_ = [ + '../_base_/models/segmenter_vit-b16_mask.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_small_p16_384_20220308-410f6037.pth' # noqa + +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + pretrained=checkpoint, + backbone=dict( + img_size=(512, 512), + embed_dims=384, + num_heads=6, + ), + decode_head=dict( + type='SegmenterMaskTransformerHead', + in_channels=384, + channels=384, + num_classes=150, + num_layers=2, + num_heads=6, + embed_dims=384, + dropout_ratio=0.0, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) + +optimizer = dict(lr=0.001, weight_decay=0.0) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict( + # num_gpus: 8 -> batch_size: 8 + batch_size=1) +val_dataloader = dict(batch_size=1) diff --git a/configs/segmenter/segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py b/configs/segmenter/segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..221a9f9a6df0c95b3de1fa19d46da23076e2185f --- /dev/null +++ b/configs/segmenter/segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py @@ -0,0 +1,26 @@ +_base_ = [ + '../_base_/models/segmenter_vit-b16_mask.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_tiny_p16_384_20220308-cce8c795.pth' # noqa + +model = dict( + data_preprocessor=data_preprocessor, + pretrained=checkpoint, + backbone=dict(embed_dims=192, num_heads=3), + decode_head=dict( + type='SegmenterMaskTransformerHead', + in_channels=192, + channels=192, + num_heads=3, + embed_dims=192)) + +optimizer = dict(lr=0.001, weight_decay=0.0) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict( + # num_gpus: 8 -> batch_size: 8 + batch_size=1) +val_dataloader = dict(batch_size=1) diff --git a/configs/segnext/README.md b/configs/segnext/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d7434a062134814dc4834b7d6445108881a07353 --- /dev/null +++ b/configs/segnext/README.md @@ -0,0 +1,63 @@ +# SegNeXt + +> [SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation](https://arxiv.org/abs/2209.08575) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We present SegNeXt, a simple convolutional network architecture for semantic segmentation. Recent transformer-based models have dominated the field of semantic segmentation due to the efficiency of self-attention in encoding spatial information. In this paper, we show that convolutional attention is a more efficient and effective way to encode contextual information than the self-attention mechanism in transformers. By re-examining the characteristics owned by successful segmentation models, we discover several key components leading to the performance improvement of segmentation models. This motivates us to design a novel convolutional attention network that uses cheap convolutional operations. Without bells and whistles, our SegNeXt significantly improves the performance of previous state-of-the-art methods on popular benchmarks, including ADE20K, Cityscapes, COCO-Stuff, Pascal VOC, Pascal Context, and iSAID. Notably, SegNeXt outperforms EfficientNet-L2 w/ NAS-FPN and achieves 90.6% mIoU on the Pascal VOC 2012 test leaderboard using only 1/10 parameters of it. On average, SegNeXt achieves about 2.0% mIoU improvements compared to the state-of-the-art methods on the ADE20K datasets with the same or fewer computations. Code is available at [this https URL](https://github.com/uyzhang/JSeg) (Jittor) and [this https URL](https://github.com/Visual-Attention-Network/SegNeXt) (Pytorch). + + + +
+ +
+ +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| SegNeXt | MSCAN-T | 512x512 | 160000 | 17.88 | 52.38 | A100 | 41.50 | 42.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k_20230210_140244-05bd8466.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k_20230210_140244.log.json) | +| SegNeXt | MSCAN-S | 512x512 | 160000 | 21.47 | 42.27 | A100 | 44.16 | 45.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k_20230214_113014-43013668.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k_20230214_113014.log.json) | +| SegNeXt | MSCAN-B | 512x512 | 160000 | 31.03 | 35.15 | A100 | 48.03 | 49.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k_20230209_172053-b6f6c70c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k_20230209_172053.log.json) | +| SegNeXt | MSCAN-L | 512x512 | 160000 | 43.32 | 22.91 | A100 | 50.99 | 52.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k_20230209_172055-19b14b63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k_20230209_172055.log.json) | + +Note: + +- When we integrated SegNeXt into MMSegmentation, we modified some layers' names to make them more precise and concise without changing the model architecture. Therefore, the keys of pre-trained weights are different from the [original weights](https://cloud.tsinghua.edu.cn/d/c15b25a6745946618462/), but don't worry about these changes. we have converted them and uploaded the checkpoints, you might find URL of pre-trained checkpoints in config files and can use them directly for training. + +- The total batch size is 16. We trained for SegNeXt with a single GPU as the performance degrades significantly when using`SyncBN` (mainly in `OverlapPatchEmbed` modules of `MSCAN`) of PyTorch 1.9. + +- There will be subtle differences when model testing as Non-negative Matrix Factorization (NMF) in `LightHamHead` will be initialized randomly. To control this randomness, please set the random seed when model testing. You can modify [`./tools/test.py`](https://github.com/open-mmlab/mmsegmentation/blob/main/tools/test.py) like: + +```python +def main(): + from mmengine.runner import seg_random_seed + random_seed = xxx # set random seed recorded in training log + set_random_seed(random_seed, deterministic=False) + ... +``` + +- This model performance is sensitive to the seed values used, please refer to the log file for the specific settings of the seed. If you choose a different seed, the results might differ from the table results. Take SegNeXt Large for example, its results range from 49.60 to 51.0. + +## Citation + +```bibtex +@article{guo2022segnext, + title={SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation}, + author={Guo, Meng-Hao and Lu, Cheng-Ze and Hou, Qibin and Liu, Zhengning and Cheng, Ming-Ming and Hu, Shi-Min}, + journal={arXiv preprint arXiv:2209.08575}, + year={2022} +} +``` diff --git a/configs/segnext/metafile.yaml b/configs/segnext/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c8ff5bb92cbfb784790a0f08ec12bba83610f27 --- /dev/null +++ b/configs/segnext/metafile.yaml @@ -0,0 +1,109 @@ +Collections: +- Name: SegNeXt + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + README: configs/segnext/README.md + Frameworks: + - PyTorch +Models: +- Name: segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512 + In Collection: SegNeXt + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.5 + mIoU(ms+flip): 42.59 + Config: configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MSCAN-T + - SegNeXt + Training Resources: 1x A100 GPUS + Memory (GB): 17.88 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k_20230210_140244-05bd8466.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k_20230210_140244.log.json + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/mscan.py#L328 + Framework: PyTorch +- Name: segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512 + In Collection: SegNeXt + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.16 + mIoU(ms+flip): 45.81 + Config: configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MSCAN-S + - SegNeXt + Training Resources: 1x A100 GPUS + Memory (GB): 21.47 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k_20230214_113014-43013668.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k_20230214_113014.log.json + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/mscan.py#L328 + Framework: PyTorch +- Name: segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512 + In Collection: SegNeXt + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.03 + mIoU(ms+flip): 49.68 + Config: configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MSCAN-B + - SegNeXt + Training Resources: 1x A100 GPUS + Memory (GB): 31.03 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k_20230209_172053-b6f6c70c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k_20230209_172053.log.json + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/mscan.py#L328 + Framework: PyTorch +- Name: segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512 + In Collection: SegNeXt + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 50.99 + mIoU(ms+flip): 52.1 + Config: configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MSCAN-L + - SegNeXt + Training Resources: 1x A100 GPUS + Memory (GB): 43.32 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k_20230209_172055-19b14b63.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k_20230209_172055.log.json + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/mscan.py#L328 + Framework: PyTorch diff --git a/configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py b/configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..000f4484838f0d1a4491d867c5f01f1777a8ad62 --- /dev/null +++ b/configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py @@ -0,0 +1,28 @@ +_base_ = './segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py' + +# model settings +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_b_20230227-3ab7d230.pth' # noqa +ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + embed_dims=[64, 128, 320, 512], + depths=[3, 3, 12, 3], + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + drop_path_rate=0.1, + norm_cfg=dict(type='BN', requires_grad=True)), + decode_head=dict( + type='LightHamHead', + in_channels=[128, 320, 512], + in_index=[1, 2, 3], + channels=512, + ham_channels=512, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=ham_norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py b/configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..212d0a8557c5806a4010b62578fe78d1bbad8e9d --- /dev/null +++ b/configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py @@ -0,0 +1,27 @@ +_base_ = './segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py' +# model settings +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_l_20230227-cef260d4.pth' # noqa +ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + embed_dims=[64, 128, 320, 512], + depths=[3, 5, 27, 3], + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + drop_path_rate=0.3, + norm_cfg=dict(type='BN', requires_grad=True)), + decode_head=dict( + type='LightHamHead', + in_channels=[128, 320, 512], + in_index=[1, 2, 3], + channels=1024, + ham_channels=1024, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=ham_norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py b/configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9a90779a605fb67b73df996349f01b5c445890af --- /dev/null +++ b/configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py @@ -0,0 +1,27 @@ +_base_ = './segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py' +# model settings +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_s_20230227-f33ccdf2.pth' # noqa +ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + embed_dims=[64, 128, 320, 512], + depths=[2, 2, 4, 2], + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + norm_cfg=dict(type='BN', requires_grad=True)), + decode_head=dict( + type='LightHamHead', + in_channels=[128, 320, 512], + in_index=[1, 2, 3], + channels=256, + ham_channels=256, + ham_kwargs=dict(MD_R=16), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=ham_norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py b/configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c8d6da85ff488fae24b25d0fb0c92d87e87e395d --- /dev/null +++ b/configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py @@ -0,0 +1,84 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py', + '../_base_/datasets/ade20k.py' +] +# model settings +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_t_20230227-119e8c9f.pth' # noqa +ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +crop_size = (512, 512) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=(512, 512), + test_cfg=dict(size_divisor=32)) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='MSCAN', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + embed_dims=[32, 64, 160, 256], + mlp_ratios=[8, 8, 4, 4], + drop_rate=0.0, + drop_path_rate=0.1, + depths=[3, 3, 5, 2], + attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]], + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='BN', requires_grad=True)), + decode_head=dict( + type='LightHamHead', + in_channels=[64, 160, 256], + in_index=[1, 2, 3], + channels=256, + ham_channels=256, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=ham_norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ham_kwargs=dict( + MD_S=1, + MD_R=16, + train_steps=6, + eval_steps=7, + inv_t=100, + rand_init=True)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# dataset settings +train_dataloader = dict(batch_size=16) + +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.), + 'head': dict(lr_mult=10.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] diff --git a/configs/sem_fpn/README.md b/configs/sem_fpn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..697cf506e2054ec8d2ae7524ae4a9072abd22424 --- /dev/null +++ b/configs/sem_fpn/README.md @@ -0,0 +1,51 @@ +# Semantic FPN + +> [Panoptic Feature Pyramid Networks](https://arxiv.org/abs/1901.02446) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The recently introduced panoptic segmentation task has renewed our community's interest in unifying the tasks of instance segmentation (for thing classes) and semantic segmentation (for stuff classes). However, current state-of-the-art methods for this joint task use separate and dissimilar networks for instance and semantic segmentation, without performing any shared computation. In this work, we aim to unify these methods at the architectural level, designing a single network for both tasks. Our approach is to endow Mask R-CNN, a popular instance segmentation method, with a semantic segmentation branch using a shared Feature Pyramid Network (FPN) backbone. Surprisingly, this simple baseline not only remains effective for instance segmentation, but also yields a lightweight, top-performing method for semantic segmentation. In this work, we perform a detailed study of this minimally extended version of Mask R-CNN with FPN, which we refer to as Panoptic FPN, and show it is a robust and accurate baseline for both tasks. Given its effectiveness and conceptual simplicity, we hope our method can serve as a strong baseline and aid future research in panoptic segmentation. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FPN | R-50 | 512x1024 | 80000 | 2.8 | 13.54 | V100 | 74.52 | 76.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn/fpn_r50_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes-20200717_021437.log.json) | +| FPN | R-101 | 512x1024 | 80000 | 3.9 | 10.29 | V100 | 75.80 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn/fpn_r101_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes-20200717_012416.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FPN | R-50 | 512x512 | 160000 | 4.9 | 55.77 | V100 | 37.49 | 39.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn/fpn_r50_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k-20200718_131734.log.json) | +| FPN | R-101 | 512x512 | 160000 | 5.9 | 40.58 | V100 | 39.35 | 40.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn/fpn_r101_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k-20200718_131734.log.json) | + +## Citation + +```bibtex +@inproceedings{kirillov2019panoptic, + title={Panoptic feature pyramid networks}, + author={Kirillov, Alexander and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={6399--6408}, + year={2019} +} +``` diff --git a/configs/sem_fpn/fpn_r101_4xb2-80k_cityscapes-512x1024.py b/configs/sem_fpn/fpn_r101_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..1e9bcfbb5978fc808e93dcd8235044290f26925d --- /dev/null +++ b/configs/sem_fpn/fpn_r101_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './fpn_r50_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/sem_fpn/fpn_r101_4xb4-160k_ade20k-512x512.py b/configs/sem_fpn/fpn_r101_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..adad1a4f38508a8eec50d436c52e190a3e4ce931 --- /dev/null +++ b/configs/sem_fpn/fpn_r101_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,5 @@ +_base_ = './fpn_r50_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/sem_fpn/fpn_r50_4xb2-80k_cityscapes-512x1024.py b/configs/sem_fpn/fpn_r50_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..bf71d388e9cf6c0dd93a3800195634a442a0485b --- /dev/null +++ b/configs/sem_fpn/fpn_r50_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fpn_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/sem_fpn/fpn_r50_4xb4-160k_ade20k-512x512.py b/configs/sem_fpn/fpn_r50_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..4e4bc57788a8ea0584607596b4d789c1bc0f8edb --- /dev/null +++ b/configs/sem_fpn/fpn_r50_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fpn_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, decode_head=dict(num_classes=150)) diff --git a/configs/sem_fpn/metafile.yaml b/configs/sem_fpn/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e734897245775c35fdcac713b8a4173e6f89d0d4 --- /dev/null +++ b/configs/sem_fpn/metafile.yaml @@ -0,0 +1,110 @@ +Collections: +- Name: FPN + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + README: configs/sem_fpn/README.md + Frameworks: + - PyTorch +Models: +- Name: fpn_r50_4xb2-80k_cityscapes-512x1024 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.52 + mIoU(ms+flip): 76.08 + Config: configs/sem_fpn/fpn_r50_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - FPN + Training Resources: 4x V100 GPUS + Memory (GB): 2.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes-20200717_021437.log.json + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 + Framework: PyTorch +- Name: fpn_r101_4xb2-80k_cityscapes-512x1024 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.8 + mIoU(ms+flip): 77.4 + Config: configs/sem_fpn/fpn_r101_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - FPN + Training Resources: 4x V100 GPUS + Memory (GB): 3.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes-20200717_012416.log.json + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 + Framework: PyTorch +- Name: fpn_r50_4xb4-160k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.49 + mIoU(ms+flip): 39.09 + Config: configs/sem_fpn/fpn_r50_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50 + - FPN + Training Resources: 4x V100 GPUS + Memory (GB): 4.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k-20200718_131734.log.json + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 + Framework: PyTorch +- Name: fpn_r101_4xb4-160k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.35 + mIoU(ms+flip): 40.72 + Config: configs/sem_fpn/fpn_r101_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101 + - FPN + Training Resources: 4x V100 GPUS + Memory (GB): 5.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k-20200718_131734.log.json + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 + Framework: PyTorch diff --git a/configs/setr/README.md b/configs/setr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..15be6ec099a329adea635d741fa8b2db69b175cf --- /dev/null +++ b/configs/setr/README.md @@ -0,0 +1,74 @@ +# SETR + +> [Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers](https://arxiv.org/abs/2012.15840) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Most recent semantic segmentation methods adopt a fully-convolutional network (FCN) with an encoder-decoder architecture. The encoder progressively reduces the spatial resolution and learns more abstract/semantic visual concepts with larger receptive fields. Since context modeling is critical for segmentation, the latest efforts have been focused on increasing the receptive field, through either dilated/atrous convolutions or inserting attention modules. However, the encoder-decoder based FCN architecture remains unchanged. In this paper, we aim to provide an alternative perspective by treating semantic segmentation as a sequence-to-sequence prediction task. Specifically, we deploy a pure transformer (ie, without convolution and resolution reduction) to encode an image as a sequence of patches. With the global context modeled in every layer of the transformer, this encoder can be combined with a simple decoder to provide a powerful segmentation model, termed SEgmentation TRansformer (SETR). Extensive experiments show that SETR achieves new state of the art on ADE20K (50.28% mIoU), Pascal Context (55.83% mIoU) and competitive results on Cityscapes. Particularly, we achieve the first position in the highly competitive ADE20K test server leaderboard on the day of submission. + + + +
+ +
+ +```None +This head has two version head. +``` + +## Usage + +You can download the pretrain from [here](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_384-b3be5167.pth). Then you can convert its keys with the script `vit2mmseg.py` in the tools directory. + +```shell +python tools/model_converters/vit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/vit2mmseg.py \ +jx_vit_large_p16_384-b3be5167.pth pretrain/vit_large_p16.pth +``` + +This script convert the model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| SETR Naive | ViT-L | 512x512 | 16 | 160000 | 18.40 | 4.72 | V100 | 48.28 | 49.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258.log.json) | +| SETR PUP | ViT-L | 512x512 | 16 | 160000 | 19.54 | 4.50 | V100 | 48.24 | 49.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343.log.json) | +| SETR MLA | ViT-L | 512x512 | 8 | 160000 | 10.96 | - | V100 | 47.34 | 49.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118.log.json) | +| SETR MLA | ViT-L | 512x512 | 16 | 160000 | 17.30 | 5.25 | V100 | 47.39 | 49.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json) | + +### Cityscapes + +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| SETR Naive | ViT-L | 768x768 | 8 | 80000 | 24.06 | 0.39 | V100 | 78.10 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_naive_8xb1-80k_cityscapes-768x768.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505.log.json) | +| SETR PUP | ViT-L | 768x768 | 8 | 80000 | 27.96 | 0.37 | V100 | 79.21 | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_pup_8xb1-80k_cityscapes-768x768.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115.log.json) | +| SETR MLA | ViT-L | 768x768 | 8 | 80000 | 24.10 | 0.41 | V100 | 77.00 | 79.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_mla_8xb1-80k_cityscapes-768x768.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003.log.json) | + +## Citation + +```bibtex +@article{zheng2020rethinking, + title={Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers}, + author={Zheng, Sixiao and Lu, Jiachen and Zhao, Hengshuang and Zhu, Xiatian and Luo, Zekun and Wang, Yabiao and Fu, Yanwei and Feng, Jianfeng and Xiang, Tao and Torr, Philip HS and others}, + journal={arXiv preprint arXiv:2012.15840}, + year={2020} +} +``` diff --git a/configs/setr/metafile.yaml b/configs/setr/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e6bc087dd326b91cd1c9289a3dbf6535d850e1f --- /dev/null +++ b/configs/setr/metafile.yaml @@ -0,0 +1,197 @@ +Collections: +- Name: SETR + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + - Cityscapes + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + README: configs/setr/README.md + Frameworks: + - PyTorch +Models: +- Name: setr_vit-l_naive_8xb2-160k_ade20k-512x512 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.28 + mIoU(ms+flip): 49.56 + Config: configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-L + - SETR + - Naive + Training Resources: 8x V100 GPUS + Memory (GB): 18.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_pup_8xb2-160k_ade20k-512x512 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.24 + mIoU(ms+flip): 49.99 + Config: configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-L + - SETR + - PUP + Training Resources: 8x V100 GPUS + Memory (GB): 19.54 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l-mla_8xb1-160k_ade20k-512x512 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.34 + mIoU(ms+flip): 49.05 + Config: configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-L + - SETR + - MLA + Training Resources: 8x V100 GPUS + Memory (GB): 10.96 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_mla_8xb2-160k_ade20k-512x512 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.39 + mIoU(ms+flip): 49.37 + Config: configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-L + - SETR + - MLA + Training Resources: 8x V100 GPUS + Memory (GB): 17.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_naive_8xb1-80k_cityscapes-768x768 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.1 + mIoU(ms+flip): 80.22 + Config: configs/setr/setr_vit-l_naive_8xb1-80k_cityscapes-768x768.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - ViT-L + - SETR + - Naive + Training Resources: 8x V100 GPUS + Memory (GB): 24.06 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_pup_8xb1-80k_cityscapes-768x768 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.21 + mIoU(ms+flip): 81.02 + Config: configs/setr/setr_vit-l_pup_8xb1-80k_cityscapes-768x768.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - ViT-L + - SETR + - PUP + Training Resources: 8x V100 GPUS + Memory (GB): 27.96 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_mla_8xb1-80k_cityscapes-768x768 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.0 + mIoU(ms+flip): 79.59 + Config: configs/setr/setr_vit-l_mla_8xb1-80k_cityscapes-768x768.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - ViT-L + - SETR + - MLA + Training Resources: 8x V100 GPUS + Memory (GB): 24.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch diff --git a/configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py b/configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1c6e2845f9e3cf9b270bccf0cda7c2adcdccc600 --- /dev/null +++ b/configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py @@ -0,0 +1,90 @@ +_base_ = [ + '../_base_/models/setr_mla.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + img_size=(512, 512), + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + decode_head=dict(num_classes=150), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=0, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=1, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=2, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=3, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict(lr=0.001, weight_decay=0.0) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) +# num_gpus: 8 -> batch_size: 8 +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_vit-l_mla_8xb1-80k_cityscapes-768x768.py b/configs/setr/setr_vit-l_mla_8xb1-80k_cityscapes-768x768.py new file mode 100644 index 0000000000000000000000000000000000000000..026557f505f118f5c5cab0421b77095981f6c1de --- /dev/null +++ b/configs/setr/setr_vit-l_mla_8xb1-80k_cityscapes-768x768.py @@ -0,0 +1,23 @@ +_base_ = [ + '../_base_/models/setr_mla.py', '../_base_/datasets/cityscapes_768x768.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (768, 768) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + drop_rate=0, + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + test_cfg=dict(mode='slide', crop_size=(768, 768), stride=(512, 512))) + +optimizer = dict(lr=0.002, weight_decay=0.0) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py b/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..4d3fb7d4e1d51137f2ae63eb1290f496ce5af5e6 --- /dev/null +++ b/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,6 @@ +_base_ = ['./setr_vit-l-mla_8xb1-160k_ade20k-512x512.py'] + +# num_gpus: 8 -> batch_size: 16 +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_vit-l_naive_8xb1-80k_cityscapes-768x768.py b/configs/setr/setr_vit-l_naive_8xb1-80k_cityscapes-768x768.py new file mode 100644 index 0000000000000000000000000000000000000000..db49317301300ad28fd34c6969335b0067801858 --- /dev/null +++ b/configs/setr/setr_vit-l_naive_8xb1-80k_cityscapes-768x768.py @@ -0,0 +1,24 @@ +_base_ = [ + '../_base_/models/setr_naive.py', + '../_base_/datasets/cityscapes_768x768.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (768, 768) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + test_cfg=dict(mode='slide', crop_size=(768, 768), stride=(512, 512))) + +optimizer = dict(weight_decay=0.0) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py b/configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..109996c1b6fc42ce429ebf2a215d46bafca975e4 --- /dev/null +++ b/configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,72 @@ +_base_ = [ + '../_base_/models/setr_naive.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + img_size=(512, 512), + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + decode_head=dict(num_classes=150), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict(lr=0.01, weight_decay=0.0) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) +# num_gpus: 8 -> batch_size: 16 +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_vit-l_pup_8xb1-80k_cityscapes-768x768.py b/configs/setr/setr_vit-l_pup_8xb1-80k_cityscapes-768x768.py new file mode 100644 index 0000000000000000000000000000000000000000..999ab180382914d6631ca42784a09fca74dfacd4 --- /dev/null +++ b/configs/setr/setr_vit-l_pup_8xb1-80k_cityscapes-768x768.py @@ -0,0 +1,70 @@ +_base_ = [ + '../_base_/models/setr_pup.py', '../_base_/datasets/cityscapes_768x768.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (768, 768) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +crop_size = (768, 768) +model = dict( + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=2, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=2, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=2, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(512, 512))) + +optimizer = dict(weight_decay=0.0) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +train_dataloader = dict(batch_size=1) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py b/configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..e9bfb2201d790ac6490faed4676a43f2ecccaaca --- /dev/null +++ b/configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,72 @@ +_base_ = [ + '../_base_/models/setr_pup.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + img_size=(512, 512), + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + decode_head=dict(num_classes=150), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict(lr=0.001, weight_decay=0.0) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) +# num_gpus: 8 -> batch_size: 16 +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/snnet/setr_naive_512x512_160k_b16_ade20k_deit_3_b_l_224_snnetv2.py b/configs/snnet/setr_naive_512x512_160k_b16_ade20k_deit_3_b_l_224_snnetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..2de326da180279d3f04574a226cd4597d09061c5 --- /dev/null +++ b/configs/snnet/setr_naive_512x512_160k_b16_ade20k_deit_3_b_l_224_snnetv2.py @@ -0,0 +1,135 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='SNNetPlus', + include_ls=True, + include_sl=True, + include_lsl=True, + include_sls=True, + lora_r=4, + anchors=[ + dict( + # type='MAE', + pretrained='pretrained/deit_3_base_224_21k_mmseg.pth', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + init_values=1.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + is_deit_3=True, # new + is_anchor=True, # new + adapt_tune=False, # new + out_indices=(4, 7, 9, 11) + ), + dict( + # type='MAE', + pretrained='pretrained/deit_3_large_224_21k_mmseg.pth', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + mlp_ratio=4, + init_values=1.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + is_deit_3=True, # new + is_anchor=True, # new + adapt_tune=False, # new + out_indices=(9, 14, 19, 23) + ) + ] + ), + decode_head=dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=3, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict( + lr=0.01, + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +# num_gpus: 8 -> batch_size: 16 +train_dataloader = dict(batch_size=2, num_workers=2) + +find_unused_parameters = True + +custom_hooks = [dict(type='SNNetHook', priority='ABOVE_NORMAL')] \ No newline at end of file diff --git a/configs/snnet/setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.py b/configs/snnet/setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..a7358908b9913c9cdcefafa6a18c533b4b4f0044 --- /dev/null +++ b/configs/snnet/setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.py @@ -0,0 +1,143 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + + +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=(512, 512)) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='SNNetv2', + include_ls=True, + include_sl=True, + include_lsl=True, + include_sls=True, + lora_r=16, + anchors=[ + dict( + # type='MAE', + pretrained='pretrained/deit_3_small_224_21k_mmseg.pth', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=384, + num_layers=12, + num_heads=6, + mlp_ratio=4, + init_values=1.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + is_deit_3=True, # new + is_anchor=True, # new + out_indices=(4, 7, 9, 11) + ), + dict( + # type='MAE', + pretrained='pretrained/deit_3_large_224_21k_mmseg.pth', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + mlp_ratio=4, + init_values=1.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + is_deit_3=True, # new + is_anchor=True, # new + out_indices=(9, 14, 19, 23) + ) + ] + ), + decode_head=dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=3, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict( + lr=0.01, + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +# num_gpus: 8 -> batch_size: 16 +train_dataloader = dict(batch_size=2, num_workers=2, persistent_workers=True) + +find_unused_parameters = True + +custom_hooks = [dict(type='SNNetHook', priority='ABOVE_NORMAL')] \ No newline at end of file diff --git a/configs/snnet/setr_naive_512x512_80k_b16_coco_stuff10k_deit_3_b_l_224_snnetv2.py b/configs/snnet/setr_naive_512x512_80k_b16_coco_stuff10k_deit_3_b_l_224_snnetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..e23e40ec8374981b73541d400c95287d8091716f --- /dev/null +++ b/configs/snnet/setr_naive_512x512_80k_b16_coco_stuff10k_deit_3_b_l_224_snnetv2.py @@ -0,0 +1,133 @@ +_base_ = [ + '../_base_/datasets/coco-stuff10k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='SNNetv2', + include_ls=True, + include_sl=True, + include_lsl=True, + include_sls=True, + lora_r=4, + anchors=[ + dict( + # type='MAE', + pretrained='pretrained/deit_3_base_224_21k_mmseg.pth', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + init_values=1.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + is_deit_3=True, # new + is_anchor=True, # new + out_indices=(4, 7, 9, 11) + ), + dict( + # type='MAE', + pretrained='pretrained/deit_3_large_224_21k_mmseg.pth', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + mlp_ratio=4, + init_values=1.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + is_deit_3=True, # new + is_anchor=True, # new + out_indices=(9, 14, 19, 23) + ) + ] + ), + decode_head=dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=3, + num_classes=171, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=171, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=171, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=171, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict( + lr=0.01, + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +# num_gpus: 8 -> batch_size: 16 +train_dataloader = dict(batch_size=2, num_workers=2, persistent_workers=True) + +find_unused_parameters = True + +custom_hooks = [dict(type='SNNetHook', priority='ABOVE_NORMAL')] \ No newline at end of file diff --git a/configs/snnet/setr_naive_512x512_80k_b16_coco_stuff10k_deit_3_s_l_224_snnetv2.py b/configs/snnet/setr_naive_512x512_80k_b16_coco_stuff10k_deit_3_s_l_224_snnetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..0b9d0ea7acd8765f7c2a9cd9efea6466e0a28bbf --- /dev/null +++ b/configs/snnet/setr_naive_512x512_80k_b16_coco_stuff10k_deit_3_s_l_224_snnetv2.py @@ -0,0 +1,133 @@ +_base_ = [ + '../_base_/datasets/coco-stuff10k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='SNNetv2', + include_ls=True, + include_sl=True, + include_lsl=True, + include_sls=True, + lora_r=16, + anchors=[ + dict( + # type='MAE', + pretrained='pretrained/deit_3_small_224_21k_mmseg.pth', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=384, + num_layers=12, + num_heads=6, + mlp_ratio=4, + init_values=1.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + is_deit_3=True, # new + is_anchor=True, # new + out_indices=(4, 7, 9, 11) + ), + dict( + # type='MAE', + pretrained='pretrained/deit_3_large_224_21k_mmseg.pth', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + mlp_ratio=4, + init_values=1.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + is_deit_3=True, # new + is_anchor=True, # new + out_indices=(9, 14, 19, 23) + ) + ] + ), + decode_head=dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=3, + num_classes=171, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=171, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=171, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=171, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict( + lr=0.01, + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +# num_gpus: 8 -> batch_size: 16 +train_dataloader = dict(batch_size=2, num_workers=2, persistent_workers=True) + +find_unused_parameters = True + +custom_hooks = [dict(type='SNNetHook', priority='ABOVE_NORMAL')] \ No newline at end of file diff --git a/configs/stdc/README.md b/configs/stdc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3e8bf606880287ccddf1c23f87cb52a7cade7021 --- /dev/null +++ b/configs/stdc/README.md @@ -0,0 +1,73 @@ +# STDC + +> [Rethinking BiSeNet For Real-time Semantic Segmentation](https://arxiv.org/abs/2104.13188) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +BiSeNet has been proved to be a popular two-stream network for real-time segmentation. However, its principle of adding an extra path to encode spatial information is time-consuming, and the backbones borrowed from pretrained tasks, e.g., image classification, may be inefficient for image segmentation due to the deficiency of task-specific design. To handle these problems, we propose a novel and efficient structure named Short-Term Dense Concatenate network (STDC network) by removing structure redundancy. Specifically, we gradually reduce the dimension of feature maps and use the aggregation of them for image representation, which forms the basic module of STDC network. In the decoder, we propose a Detail Aggregation module by integrating the learning of spatial information into low-level layers in single-stream manner. Finally, the low-level features and deep features are fused to predict the final segmentation results. Extensive experiments on Cityscapes and CamVid dataset demonstrate the effectiveness of our method by achieving promising trade-off between segmentation accuracy and inference speed. On Cityscapes, we achieve 71.9% mIoU on the test set with a speed of 250.4 FPS on NVIDIA GTX 1080Ti, which is 45.2% faster than the latest methods, and achieve 76.8% mIoU with 97.0 FPS while inferring on higher resolution images. + + + +
+ +
+ +## Usage + +We have provided [ImageNet Pretrained STDCNet Weights](https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1) models converted from [official repo](https://github.com/MichaelFan01/STDC-Seg). + +If you want to convert keys on your own to use official repositories' pre-trained models, we also provide a script [`stdc2mmseg.py`](../../tools/model_converters/stdc2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/MichaelFan01/STDC-Seg) to MMSegmentation style. + +```shell +python tools/model_converters/stdc2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} ${STDC_TYPE} +``` + +E.g. + +```shell +python tools/model_converters/stdc2mmseg.py ./STDCNet813M_73.91.tar ./pretrained/stdc1.pth STDC1 + +python tools/model_converters/stdc2mmseg.py ./STDCNet1446_76.47.tar ./pretrained/stdc2.pth STDC2 +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| STDC | STDC1 (No Pretrain) | 512x1024 | 80000 | 7.15 | 23.06 | V100 | 71.82 | 73.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/stdc/stdc1_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048-74e6920a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048.log.json) | +| STDC | STDC1 | 512x1024 | 80000 | - | - | V100 | 74.94 | 76.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/stdc/stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648-3d4c2981.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648.log.json) | +| STDC | STDC2 (No Pretrain) | 512x1024 | 80000 | 8.27 | 23.71 | V100 | 73.15 | 76.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/stdc/stdc2_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015-fb1e3a1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015.log.json) | +| STDC | STDC2 | 512x1024 | 80000 | - | - | V100 | 76.67 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/stdc/stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048-1f8f0f6c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048.log.json) | + +Note: + +- For STDC on Cityscapes dataset, default setting is 4 GPUs with 12 samples per GPU in training. +- `No Pretrain` means the model is trained from scratch. +- The FPS is for reference only. The environment is also different from paper setting, whose input size is `512x1024` and `768x1536`, i.e., 50% and 75% of our input size, respectively and using TensorRT. +- The parameter `fusion_kernel` in `STDCHead` is not learnable. In official repo, `find_unused_parameters=True` is set [here](https://github.com/MichaelFan01/STDC-Seg/blob/59ff37fbd693b99972c76fcefe97caa14aeb619f/train.py#L220). You may check it by printing model parameters of original repo on your own. + +## Citation + +```bibtex +@inproceedings{fan2021rethinking, + title={Rethinking BiSeNet For Real-time Semantic Segmentation}, + author={Fan, Mingyuan and Lai, Shenqi and Huang, Junshi and Wei, Xiaoming and Chai, Zhenhua and Luo, Junfeng and Wei, Xiaolin}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={9716--9725}, + year={2021} +} +``` diff --git a/configs/stdc/metafile.yaml b/configs/stdc/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93cb14f50b1286a25f31069e28c888a9a1c2d640 --- /dev/null +++ b/configs/stdc/metafile.yaml @@ -0,0 +1,107 @@ +Collections: +- Name: STDC + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + README: configs/stdc/README.md + Frameworks: + - PyTorch +Models: +- Name: stdc1_4xb12-80k_cityscapes-512x1024 + In Collection: STDC + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.82 + mIoU(ms+flip): 73.89 + Config: configs/stdc/stdc1_4xb12-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 48 + Architecture: + - STDC1 + - STDC + Training Resources: 4x V100 GPUS + Memory (GB): 7.15 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048-74e6920a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048.log.json + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 + Framework: PyTorch +- Name: stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024 + In Collection: STDC + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.94 + mIoU(ms+flip): 76.97 + Config: configs/stdc/stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 48 + Architecture: + - STDC1 + - STDC + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648-3d4c2981.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648.log.json + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 + Framework: PyTorch +- Name: stdc2_4xb12-80k_cityscapes-512x1024 + In Collection: STDC + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.15 + mIoU(ms+flip): 76.13 + Config: configs/stdc/stdc2_4xb12-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 48 + Architecture: + - STDC2 + - STDC + Training Resources: 4x V100 GPUS + Memory (GB): 8.27 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015-fb1e3a1a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015.log.json + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 + Framework: PyTorch +- Name: stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024 + In Collection: STDC + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.67 + mIoU(ms+flip): 78.67 + Config: configs/stdc/stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 48 + Architecture: + - STDC2 + - STDC + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048-1f8f0f6c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048.log.json + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 + Framework: PyTorch diff --git a/configs/stdc/stdc1_4xb12-80k_cityscapes-512x1024.py b/configs/stdc/stdc1_4xb12-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..20aec3d5bf62d327443bf00e548534dc5abd5e80 --- /dev/null +++ b/configs/stdc/stdc1_4xb12-80k_cityscapes-512x1024.py @@ -0,0 +1,21 @@ +_base_ = [ + '../_base_/models/stdc.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=80000, + by_epoch=False, + ) +] +train_dataloader = dict(batch_size=12, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/stdc/stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024.py b/configs/stdc/stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..15e807f9edabf85eea487068c4950d6e981fde7c --- /dev/null +++ b/configs/stdc/stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024.py @@ -0,0 +1,6 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/stdc/stdc1_20220308-5368626c.pth' # noqa +_base_ = './stdc1_4xb12-80k_cityscapes-512x1024.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint)))) diff --git a/configs/stdc/stdc2_4xb12-80k_cityscapes-512x1024.py b/configs/stdc/stdc2_4xb12-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..5657351698dc063a8f6b26f12f53bd4d75f1e662 --- /dev/null +++ b/configs/stdc/stdc2_4xb12-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './stdc1_4xb12-80k_cityscapes-512x1024.py' +model = dict(backbone=dict(backbone_cfg=dict(stdc_type='STDCNet2'))) diff --git a/configs/stdc/stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024.py b/configs/stdc/stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..05a202b74c7e1bb0284e1b8be28452edb3d96fc2 --- /dev/null +++ b/configs/stdc/stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024.py @@ -0,0 +1,6 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/stdc/stdc2_20220308-7dbd9127.pth' # noqa +_base_ = './stdc2_4xb12-80k_cityscapes-512x1024.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint)))) diff --git a/configs/swin/README.md b/configs/swin/README.md new file mode 100644 index 0000000000000000000000000000000000000000..18fcbae8bcfaecc1fedcc24bc205d46c488b4d12 --- /dev/null +++ b/configs/swin/README.md @@ -0,0 +1,76 @@ +# Swin Transformer + +> [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains, such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text. To address these differences, we propose a hierarchical Transformer whose representation is computed with Shifted windows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation (53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and +2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones. The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures. The code and models are publicly available at [this https URL](https://github.com/microsoft/Swin-Transformer). + + + +
+ +
+ +## Usage + +We have provided pretrained models converted from [official repo](https://github.com/microsoft/Swin-Transformer). + +If you want to convert keys on your own to use official repositories' pre-trained models, we also provide a script [`swin2mmseg.py`](../../tools/model_converters/swin2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/SwinTransformer/Swin-Transformer-Semantic-Segmentation) to MMSegmentation style. + +```shell +python tools/model_converters/swin2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/swin2mmseg.py https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth pretrain/swin_base_patch4_window7_224.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +In our default setting, pretrained models and their corresponding [original models](https://github.com/microsoft/Swin-Transforme) models could be defined below: + +| pretrained models | original models | +| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | +| pretrain/swin_tiny_patch4_window7_224.pth | [swin_tiny_patch4_window7_224.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth) | +| pretrain/swin_small_patch4_window7_224.pth | [swin_small_patch4_window7_224.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth) | +| pretrain/swin_base_patch4_window7_224.pth | [swin_base_patch4_window7_224.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth) | +| pretrain/swin_base_patch4_window7_224_22k.pth | [swin_base_patch4_window7_224_22k.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth) | +| pretrain/swin_base_patch4_window12_384.pth | [swin_base_patch4_window12_384.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth) | +| pretrain/swin_base_patch4_window12_384_22k.pth | [swin_base_patch4_window12_384_22k.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth) | + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------------ | ----------------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | Swin-T | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 5.02 | 21.06 | V100 | 44.41 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542.log.json) | +| UPerNet | Swin-S | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 6.17 | 14.72 | V100 | 47.72 | 49.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 7.61 | 12.65 | V100 | 47.99 | 49.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-22K | 224x224 | 16 | 160000 | - | - | V100 | 50.13 | 51.9 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-1K | 384x384 | 16 | 160000 | 8.52 | 12.10 | V100 | 48.35 | 49.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-22K | 384x384 | 16 | 160000 | - | - | V100 | 50.76 | 52.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459.log.json) | + +## Citation + +```bibtex +@article{liu2021Swin, + title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows}, + author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining}, + journal={arXiv preprint arXiv:2103.14030}, + year={2021} +} +``` diff --git a/configs/swin/metafile.yaml b/configs/swin/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67a4e075514d8a0cb3f0f78f2cfaf7a7be2ec9d4 --- /dev/null +++ b/configs/swin/metafile.yaml @@ -0,0 +1,143 @@ +Models: +- Name: swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.41 + mIoU(ms+flip): 45.79 + Config: configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-T + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 5.02 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.72 + mIoU(ms+flip): 49.24 + Config: configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 6.17 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.99 + mIoU(ms+flip): 49.57 + Config: configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 50.13 + mIoU(ms+flip): 51.9 + Config: configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - UPerNet + Training Resources: 8x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.35 + mIoU(ms+flip): 49.65 + Config: configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 8.52 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 50.76 + mIoU(ms+flip): 52.4 + Config: configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - UPerNet + Training Resources: 8x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch diff --git a/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py b/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..11cea36703c597863220298bc9feee4672579f01 --- /dev/null +++ b/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,14 @@ +_base_ = [ + 'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + pretrain_img_size=384, + embed_dims=128, + depths=[2, 2, 18, 2], + num_heads=[4, 8, 16, 32], + window_size=12), + decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), + auxiliary_head=dict(in_channels=512, num_classes=150)) diff --git a/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py b/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5c1171646e4d22116fbafadb57b397f651c8ad64 --- /dev/null +++ b/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,7 @@ +_base_ = [ + './swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py' # noqa +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_22k_20220317-e5c09f74.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file))) diff --git a/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py b/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..73bf6166ef22e8e3a1a08b56a6fc809c72c64f58 --- /dev/null +++ b/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,12 @@ +_base_ = [ + './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_20220317-e9b98025.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + embed_dims=128, + depths=[2, 2, 18, 2], + num_heads=[4, 8, 16, 32]), + decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), + auxiliary_head=dict(in_channels=512, num_classes=150)) diff --git a/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py b/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..96148cd71d9a1a87ca5065788d8e4aae20b44326 --- /dev/null +++ b/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,7 @@ +_base_ = [ + './swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_22k_20220317-4f79f7c0.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file))) diff --git a/configs/swin/swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py b/configs/swin/swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a0a654e026f7450b33a7d9a118c4797bbc302355 --- /dev/null +++ b/configs/swin/swin-large-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + 'swin-large-patch4-window7-in22k-pre_upernet_' + '8xb2-160k_ade20k-512x512.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + pretrain_img_size=384, + window_size=12)) diff --git a/configs/swin/swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py b/configs/swin/swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c93cdfeaaeb91a3aaefe8019afffd0d380b8f761 --- /dev/null +++ b/configs/swin/swin-large-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,15 @@ +_base_ = [ + 'swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_' + 'ade20k-512x512.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220412-aeecf2aa.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + pretrain_img_size=224, + embed_dims=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=7), + decode_head=dict(in_channels=[192, 384, 768, 1536], num_classes=150), + auxiliary_head=dict(in_channels=768, num_classes=150)) diff --git a/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py b/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..19863dfc82b92a9c304819cd27f23a121f9e979d --- /dev/null +++ b/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + './swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + depths=[2, 2, 18, 2]), + decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150), + auxiliary_head=dict(in_channels=384, num_classes=150)) diff --git a/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py b/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..f61a27627717b2eebfd26f71dc34d4255fd5b6a1 --- /dev/null +++ b/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,52 @@ +_base_ = [ + '../_base_/models/upernet_swin.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth' # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + embed_dims=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + use_abs_pos_embed=False, + drop_path_rate=0.3, + patch_norm=True), + decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150), + auxiliary_head=dict(in_channels=384, num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/swin/swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py b/configs/swin/swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..663f769d731889c8c0bcb8322fa08b6660c4ba65 --- /dev/null +++ b/configs/swin/swin-tiny-patch4-window7_upernet_1xb8-20k_levir-256x256.py @@ -0,0 +1,56 @@ +_base_ = [ + '../_base_/models/upernet_swin.py', '../_base_/datasets/levir_256x256.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +crop_size = (256, 256) +norm_cfg = dict(type='BN', requires_grad=True) +data_preprocessor = dict( + size=crop_size, + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53, 123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375, 58.395, 57.12, 57.375]) + +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + in_channels=6, + embed_dims=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + use_abs_pos_embed=False, + drop_path_rate=0.3, + patch_norm=True), + decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=2), + auxiliary_head=dict(in_channels=384, num_classes=2)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=20000, + by_epoch=False, + ) +] + +train_dataloader = dict(batch_size=4) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/twins/README.md b/configs/twins/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4b3735b002fa65921503863c81063edb12ae37e --- /dev/null +++ b/configs/twins/README.md @@ -0,0 +1,76 @@ +# Twins + +> [Twins: Revisiting the Design of Spatial Attention in Vision Transformers](https://arxiv.org/pdf/2104.13840.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Very recently, a variety of vision transformer architectures for dense prediction tasks have been proposed and they show that the design of spatial attention is critical to their success in these tasks. In this work, we revisit the design of the spatial attention and demonstrate that a carefully-devised yet simple spatial attention mechanism performs favourably against the state-of-the-art schemes. As a result, we propose two vision transformer architectures, namely, Twins-PCPVT and Twins-SVT. Our proposed architectures are highly-efficient and easy to implement, only involving matrix multiplications that are highly optimized in modern deep learning frameworks. More importantly, the proposed architectures achieve excellent performance on a wide range of visual tasks, including image level classification as well as dense detection and segmentation. The simplicity and strong performance suggest that our proposed architectures may serve as stronger backbones for many vision tasks. Our code is released at [this https URL](https://github.com/Meituan-AutoML/Twins). + + + +
+ +
+ +## Usage + +We have provided pretrained models converted from [official repo](https://github.com/Meituan-AutoML/Twins). + +If you want to convert keys on your own to use official repositories' pre-trained models, we also provide a script [`twins2mmseg.py`](../../tools/model_converters/twins2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/Meituan-AutoML/Twins) to MMSegmentation style. + +```shell +python tools/model_converters/twins2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} ${MODEL_TYPE} +``` + +This script convert `pcpvt` or `svt` pretrained model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +For example, + +```shell +python tools/model_converters/twins2mmseg.py ./alt_gvt_base.pth ./pretrained/alt_gvt_base.pth svt +``` + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | ------------------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FPN | Twins-PCPVT-S | 512x512 | 80000 | 6.60 | 27.15 | V100 | 43.26 | 44.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132-41acd132.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132.log.json) | +| UPerNet | Twins-PCPVT-S | 512x512 | 160000 | 9.67 | 14.24 | V100 | 46.04 | 46.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537-8e99c07a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537.log.json) | +| FPN | Twins-PCPVT-B | 512x512 | 80000 | 8.41 | 19.67 | V100 | 45.66 | 46.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019-d396db72.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019.log.json) | +| UPerNet | Twins-PCPVT-B (8x2) | 512x512 | 160000 | 6.46 | 12.04 | V100 | 47.91 | 48.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020-02094ea5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020.log.json) | +| FPN | Twins-PCPVT-L | 512x512 | 80000 | 10.78 | 14.32 | V100 | 45.94 | 46.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226-bc6d61dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226.log.json) | +| UPerNet | Twins-PCPVT-L (8x2) | 512x512 | 160000 | 7.82 | 10.70 | V100 | 49.35 | 50.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053-c6095c07.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053.log.json) | +| FPN | Twins-SVT-S | 512x512 | 80000 | 5.80 | 29.79 | V100 | 44.47 | 45.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006-0a0d3317.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006.log.json) | +| UPerNet | SVT-S (8x2) | 512x512 | 160000 | 4.93 | 15.09 | V100 | 46.08 | 46.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005-e48a2d94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json) | +| FPN | Twins-SVT-B | 512x512 | 80000 | 8.75 | 21.10 | V100 | 46.77 | 47.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849-88b2907c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849.log.json) | +| UPerNet | Twins-SVT-B (8x2) | 512x512 | 160000 | 6.77 | 12.66 | V100 | 48.04 | 48.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-b_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826-0943a1f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826.log.json) | +| FPN | Twins-SVT-L | 512x512 | 80000 | 11.20 | 17.80 | V100 | 46.55 | 47.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005-1d59bee2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005.log.json) | +| UPerNet | Twins-SVT-L (8x2) | 512x512 | 160000 | 8.41 | 10.73 | V100 | 49.65 | 50.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005-3e2cae61.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json) | + +Note: + +- `8x2` means 8 GPUs with 2 samples per GPU in training. Default setting of Twins on ADE20K is 8 GPUs with 4 samples per GPU in training. +- `UPerNet` and `FPN` are decoder heads utilized in corresponding Twins model, which is `UPerHead` and `FPNHead`, respectively. Specifically, models in [official repo](https://github.com/Meituan-AutoML/Twins) all use `UPerHead`. + +## Citation + +```bibtex +@article{chu2021twins, + title={Twins: Revisiting spatial attention design in vision transformers}, + author={Chu, Xiangxiang and Tian, Zhi and Wang, Yuqing and Zhang, Bo and Ren, Haibing and Wei, Xiaolin and Xia, Huaxia and Shen, Chunhua}, + journal={arXiv preprint arXiv:2104.13840}, + year={2021}altgvt +} +``` diff --git a/configs/twins/metafile.yaml b/configs/twins/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0de78d9d2e8923fea4963fdacc1096683091f2ba --- /dev/null +++ b/configs/twins/metafile.yaml @@ -0,0 +1,289 @@ +Models: +- Name: twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.26 + mIoU(ms+flip): 44.11 + Config: configs/twins/twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-PCPVT-S + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 6.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132-41acd132.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.04 + mIoU(ms+flip): 46.92 + Config: configs/twins/twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-PCPVT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537-8e99c07a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.66 + mIoU(ms+flip): 46.48 + Config: configs/twins/twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-PCPVT-B + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 8.41 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019-d396db72.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.91 + mIoU(ms+flip): 48.64 + Config: configs/twins/twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Twins-PCPVT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 6.46 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020-02094ea5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.94 + mIoU(ms+flip): 46.7 + Config: configs/twins/twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-PCPVT-L + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 10.78 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226-bc6d61dc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.35 + mIoU(ms+flip): 50.08 + Config: configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Twins-PCPVT-L + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.82 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053-c6095c07.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.47 + mIoU(ms+flip): 45.42 + Config: configs/twins/twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-SVT-S + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 5.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006-0a0d3317.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-s_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.08 + mIoU(ms+flip): 46.96 + Config: configs/twins/twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - SVT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 4.93 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005-e48a2d94.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.77 + mIoU(ms+flip): 47.47 + Config: configs/twins/twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-SVT-B + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 8.75 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849-88b2907c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-b_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.04 + mIoU(ms+flip): 48.87 + Config: configs/twins/twins_svt-b_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Twins-SVT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 6.77 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826-0943a1f1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.55 + mIoU(ms+flip): 47.74 + Config: configs/twins/twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-SVT-L + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 11.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005-1d59bee2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.65 + mIoU(ms+flip): 50.63 + Config: configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Twins-SVT-L + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 8.41 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005-3e2cae61.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch diff --git a/configs/twins/twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py b/configs/twins/twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..4739ad4b0a3a4e3352862e907a0d623e3abe1a16 --- /dev/null +++ b/configs/twins/twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py @@ -0,0 +1,8 @@ +_base_ = ['./twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_base_20220308-0621964c.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + depths=[3, 4, 18, 3]), ) diff --git a/configs/twins/twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512.py b/configs/twins/twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ba9748547d8877fb567fc753e704a91cde234297 --- /dev/null +++ b/configs/twins/twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,13 @@ +_base_ = ['./twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_base_20220308-0621964c.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + depths=[3, 4, 18, 3], + drop_path_rate=0.3)) + +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/twins/twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py b/configs/twins/twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..bff7c41946032a76790d987b185a1689ba4e4bea --- /dev/null +++ b/configs/twins/twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py @@ -0,0 +1,8 @@ +_base_ = ['./twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_large_20220308-37579dc6.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + depths=[3, 8, 27, 3])) diff --git a/configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py b/configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..666ff5b69c7345b1372c8512510e8a3285b0fa76 --- /dev/null +++ b/configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,13 @@ +_base_ = ['./twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_large_20220308-37579dc6.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + depths=[3, 8, 27, 3], + drop_path_rate=0.3)) + +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/twins/twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py b/configs/twins/twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..3b480b9f99e16d25571e174012feb20045eceb46 --- /dev/null +++ b/configs/twins/twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/twins_pcpvt-s_fpn.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001), + clip_grad=None) diff --git a/configs/twins/twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py b/configs/twins/twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..387cf60228942ccc5b50a27957a59263070c980d --- /dev/null +++ b/configs/twins/twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py @@ -0,0 +1,31 @@ +_base_ = [ + '../_base_/models/twins_pcpvt-s_upernet.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict(custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] diff --git a/configs/twins/twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py b/configs/twins/twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5e9fa00f887800028d62d96441445307eb764ba7 --- /dev/null +++ b/configs/twins/twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py @@ -0,0 +1,12 @@ +_base_ = ['./twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_base_20220308-1b7eb711.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[96, 192, 384, 768], + num_heads=[3, 6, 12, 24], + depths=[2, 2, 18, 2]), + neck=dict(in_channels=[96, 192, 384, 768]), +) diff --git a/configs/twins/twins_svt-b_uperhead_8xb2-160k_ade20k-512x512.py b/configs/twins/twins_svt-b_uperhead_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..6ce2361f5feb8f6a93bcb737729760969dc3e618 --- /dev/null +++ b/configs/twins/twins_svt-b_uperhead_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,12 @@ +_base_ = ['./twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_base_20220308-1b7eb711.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[96, 192, 384, 768], + num_heads=[3, 6, 12, 24], + depths=[2, 2, 18, 2]), + decode_head=dict(in_channels=[96, 192, 384, 768]), + auxiliary_head=dict(in_channels=384)) diff --git a/configs/twins/twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py b/configs/twins/twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..b7e5f9cdb834b00edad01cb9dacb25904dcf4d01 --- /dev/null +++ b/configs/twins/twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py @@ -0,0 +1,13 @@ +_base_ = ['./twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_large_20220308-fb5936f3.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[128, 256, 512, 1024], + num_heads=[4, 8, 16, 32], + depths=[2, 2, 18, 2], + drop_path_rate=0.3), + neck=dict(in_channels=[128, 256, 512, 1024]), +) diff --git a/configs/twins/twins_svt-l_uperhead_8xb2-160k_ade20k-512x512.py b/configs/twins/twins_svt-l_uperhead_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..69c69df3b5bb95da472ca4366facc8918f791ad9 --- /dev/null +++ b/configs/twins/twins_svt-l_uperhead_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,13 @@ +_base_ = ['./twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_large_20220308-fb5936f3.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[128, 256, 512, 1024], + num_heads=[4, 8, 16, 32], + depths=[2, 2, 18, 2], + drop_path_rate=0.3), + decode_head=dict(in_channels=[128, 256, 512, 1024]), + auxiliary_head=dict(in_channels=512)) diff --git a/configs/twins/twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py b/configs/twins/twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c1aad83bc1a6a735b828d26c9adc6977704f4029 --- /dev/null +++ b/configs/twins/twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py @@ -0,0 +1,28 @@ +_base_ = [ + '../_base_/models/twins_pcpvt-s_fpn.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_small_20220308-7e1c3695.pth' # noqa + +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + type='SVT', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[64, 128, 256, 512], + num_heads=[2, 4, 8, 16], + mlp_ratios=[4, 4, 4, 4], + depths=[2, 2, 10, 4], + windiow_sizes=[7, 7, 7, 7], + norm_after_stage=True), + neck=dict(in_channels=[64, 128, 256, 512], out_channels=256, num_outs=4), + decode_head=dict(num_classes=150), +) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001), + clip_grad=None) diff --git a/configs/twins/twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py b/configs/twins/twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..3846795509a1e259a929bbdbb2bab968c1206396 --- /dev/null +++ b/configs/twins/twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,49 @@ +_base_ = [ + '../_base_/models/twins_pcpvt-s_upernet.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_small_20220308-7e1c3695.pth' # noqa + +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + type='SVT', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[64, 128, 256, 512], + num_heads=[2, 4, 8, 16], + mlp_ratios=[4, 4, 4, 4], + depths=[2, 2, 10, 4], + windiow_sizes=[7, 7, 7, 7], + norm_after_stage=True), + decode_head=dict(in_channels=[64, 128, 256, 512]), + auxiliary_head=dict(in_channels=256)) + +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict(custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] + +train_dataloader = dict(batch_size=2, num_workers=2) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/unet/README.md b/configs/unet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7225fbbf68f8a94861418b248ad4bd0263b8e424 --- /dev/null +++ b/configs/unet/README.md @@ -0,0 +1,92 @@ +# UNet + +> [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +There is large consent that successful training of deep networks requires many thousand annotated training samples. In this paper, we present a network and training strategy that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization. We show that such a network can be trained end-to-end from very few images and outperforms the prior best method (a sliding-window convolutional network) on the ISBI challenge for segmentation of neuronal structures in electron microscopic stacks. Using the same network trained on transmitted light microscopy images (phase contrast and DIC) we won the ISBI cell tracking challenge 2015 in these categories by a large margin. Moreover, the network is fast. Segmentation of a 512x512 image takes less than a second on a recent GPU. The full implementation (based on Caffe) and the trained networks are available at [this http URL](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Loss | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | ----------- | ------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 512x1024 | 160000 | 17.91 | 3.05 | V100 | 69.10 | 71.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204.log.json) | + +### DRIVE + +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Device | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ------ | ----: | ----: | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.680 | - | V100 | 88.38 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-5daf6d3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_64x64_40k_drive/unet_s5-d16_64x64_40k_drive-20201223_191051.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.582 | - | V100 | 88.71 | 79.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820-785de5c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.599 | - | V100 | 88.35 | 78.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.585 | - | V100 | 88.76 | 79.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821-22b3e3ba.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.596 | - | V100 | 88.38 | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.582 | - | V100 | 88.84 | 79.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825-6bf0efd7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825.log.json) | + +### STARE + +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Device | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ------ | ----: | ----: | --------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.968 | - | V100 | 89.78 | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-7d77e78b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_stare/unet_s5-d16_128x128_40k_stare-20201223_191051.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 0.986 | - | V100 | 90.65 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821-f75705a9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.982 | - | V100 | 89.89 | 81.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 1.028 | - | V100 | 90.72 | 82.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823-f1063ef7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.999 | - | V100 | 89.73 | 80.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 1.010 | - | V100 | 90.65 | 82.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825-21db614c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825.log.json) | + +### CHASE_DB1 + +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Device | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ------ | ----: | ----: | ------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.968 | - | V100 | 89.46 | 80.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-11543527.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_chase_db1/unet_s5-d16_128x128_40k_chase_db1-20201223_191051.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 0.986 | - | V100 | 89.52 | 80.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821-1c4eb7cf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.982 | - | V100 | 89.52 | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 1.028 | - | V100 | 89.45 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823-c0802c4d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.999 | - | V100 | 89.57 | 80.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 1.010 | - | V100 | 89.49 | 80.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825-4ef29df5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825.log.json) | + +### HRF + +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Device | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | ------: | ------- | -------- | -------------: | ------ | ----: | ----: | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.525 | - | V100 | 88.92 | 79.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-d89cf1ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_256x256_40k_hrf/unet_s5-d16_256x256_40k_hrf-20201223_173724.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.623 | - | V100 | 89.64 | 80.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821-c314da8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.588 | - | V100 | 89.24 | 80.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.798 | - | V100 | 89.69 | 80.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823-53d492fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.604 | - | V100 | 89.32 | 80.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.607 | - | V100 | 89.56 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032-59daf7a4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032.log.json) | + +Note: + +- In `DRIVE`, `STARE`, `CHASE_DB1`, and `HRF` dataset, `mDice` is mean dice of background and vessel, while `Dice` is dice metric of vessel(foreground) only. + +## Citation + +```bibtex +@inproceedings{ronneberger2015u, + title={U-net: Convolutional networks for biomedical image segmentation}, + author={Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas}, + booktitle={International Conference on Medical image computing and computer-assisted intervention}, + pages={234--241}, + year={2015}, + organization={Springer} +} +``` diff --git a/configs/unet/metafile.yaml b/configs/unet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1eafbc6d0885f64527bade9317be6fc5ad177107 --- /dev/null +++ b/configs/unet/metafile.yaml @@ -0,0 +1,642 @@ +Collections: +- Name: UNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - DRIVE + - STARE + - CHASE_DB1 + - HRF + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + README: configs/unet/README.md + Frameworks: + - PyTorch +Models: +- Name: unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 69.1 + mIoU(ms+flip): 71.05 + Config: configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 17.91 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.38 + Dice: 78.67 + Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.68 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-5daf6d3b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_64x64_40k_drive/unet_s5-d16_64x64_40k_drive-20201223_191051.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.71 + Dice: 79.32 + Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.582 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820-785de5c2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.35 + Dice: 78.62 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 0.599 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive-20201227_181818.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.76 + Dice: 79.42 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 0.585 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821-22b3e3ba.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.38 + Dice: 78.69 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 0.596 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive-20201226_094047.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.84 + Dice: 79.56 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 0.582 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825-6bf0efd7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 89.78 + Dice: 81.02 + Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.968 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-7d77e78b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_stare/unet_s5-d16_128x128_40k_stare-20201223_191051.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 90.65 + Dice: 82.7 + Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.986 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821-f75705a9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 89.89 + Dice: 81.22 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 0.982 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare-20201227_181818.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 90.72 + Dice: 82.84 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.028 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823-f1063ef7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 89.73 + Dice: 80.93 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 0.999 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare-20201226_094047.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 90.65 + Dice: 82.71 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.01 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825-21db614c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.46 + Dice: 80.24 + Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.968 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-11543527.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_chase_db1/unet_s5-d16_128x128_40k_chase_db1-20201223_191051.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.52 + Dice: 80.4 + Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.986 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821-1c4eb7cf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.52 + Dice: 80.36 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 0.982 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1-20201227_181818.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.45 + Dice: 80.28 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.028 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823-c0802c4d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.57 + Dice: 80.47 + Config: configs/unet/unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 0.999 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1-20201226_094047.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.49 + Dice: 80.37 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.01 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825-4ef29df5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 88.92 + Dice: 79.45 + Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.525 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-d89cf1ed.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_256x256_40k_hrf/unet_s5-d16_256x256_40k_hrf-20201223_173724.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.64 + Dice: 80.87 + Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.623 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821-c314da8a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.24 + Dice: 80.07 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 2.588 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf-20201227_181818.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.69 + Dice: 80.96 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 2.798 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823-53d492fa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.32 + Dice: 80.21 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 2.604 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf-20201226_094047.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.56 + Dice: 80.71 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 2.607 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032-59daf7a4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch diff --git a/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py b/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py new file mode 100644 index 0000000000000000000000000000000000000000..e4af542bfa61ec5aaf9f923fa5298b532e84d128 --- /dev/null +++ b/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/drive.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (64, 64) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) diff --git a/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py b/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..b45405fe35e769cb0e94730ef52c90528cb0fd16 --- /dev/null +++ b/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/hrf.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (256, 256) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) diff --git a/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py b/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..554caca96f53055b2f3a898681da7c65c32c8c80 --- /dev/null +++ b/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/stare.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (128, 128) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py b/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..4f30bba9a7c64fb3e475a7aecf4c2ad1e151fbf0 --- /dev/null +++ b/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py @@ -0,0 +1,6 @@ +_base_ = './unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py b/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py new file mode 100644 index 0000000000000000000000000000000000000000..823fc6dc51febe4c82712282a756adb8ee5d74c6 --- /dev/null +++ b/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py b/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..174eaf8d93ebbc6e4be190239b4e60f73a75c391 --- /dev/null +++ b/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py b/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..35972bea938086b428317c3174afa719405feb4f --- /dev/null +++ b/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py b/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..c2e995dd21da3e072fa8bf2b8e679ec409adfbe5 --- /dev/null +++ b/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py @@ -0,0 +1,16 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=19), + auxiliary_head=dict(num_classes=19), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=1, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/unet/unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py b/configs/unet/unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..bfc2109e224754f96ebfd39ae16771f25e4d7431 --- /dev/null +++ b/configs/unet/unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/chase_db1.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (128, 128) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/unet-s5-d16_fcn_4xb4-40k_drive-64x64.py b/configs/unet/unet-s5-d16_fcn_4xb4-40k_drive-64x64.py new file mode 100644 index 0000000000000000000000000000000000000000..10a45d1f7fe6f040ed4f21b8074106d9020908dd --- /dev/null +++ b/configs/unet/unet-s5-d16_fcn_4xb4-40k_drive-64x64.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/drive.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (64, 64) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) diff --git a/configs/unet/unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py b/configs/unet/unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..7de57f2c2f76fddafaae36c8273bd6860d2eea81 --- /dev/null +++ b/configs/unet/unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/hrf.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (256, 256) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) diff --git a/configs/unet/unet-s5-d16_fcn_4xb4-40k_stare-128x128.py b/configs/unet/unet-s5-d16_fcn_4xb4-40k_stare-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..8eeef77628ecd4e777a3938366e106f7df6ba71b --- /dev/null +++ b/configs/unet/unet-s5-d16_fcn_4xb4-40k_stare-128x128.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/stare.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (128, 128) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py b/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..5a26ccbf96e0834e4e1967f3a401c9c9893792a5 --- /dev/null +++ b/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py b/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py new file mode 100644 index 0000000000000000000000000000000000000000..c3b1488ad56d1454c4e1fd8d7956d3341a265cbb --- /dev/null +++ b/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_fcn_4xb4-40k_drive-64x64.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py b/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..dd3a6afc028c99fd42fe205dd0d80727e856e07c --- /dev/null +++ b/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py b/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..c8fecf34e9dc51886040ce1e1020596f6d958b72 --- /dev/null +++ b/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_fcn_4xb4-40k_stare-128x128.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py b/configs/unet/unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..ca6e5132fa05626b79745e767744fd9560c31a2d --- /dev/null +++ b/configs/unet/unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', + '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (128, 128) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py b/configs/unet/unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py new file mode 100644 index 0000000000000000000000000000000000000000..503b90136d89a8c96b8839ab11831ef4107fcafc --- /dev/null +++ b/configs/unet/unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/drive.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (64, 64) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) diff --git a/configs/unet/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py b/configs/unet/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..245365ca8dab5d2439a3bb0a079b18a8955d6b20 --- /dev/null +++ b/configs/unet/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/hrf.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (256, 256) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) diff --git a/configs/unet/unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py b/configs/unet/unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..c1eeeb96f8a0a7bd1c09dcf7abe12a6c9178a02b --- /dev/null +++ b/configs/unet/unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/stare.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (128, 128) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py b/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..69a4bbaf82d131d16f4ef781a8378085bc3c79fc --- /dev/null +++ b/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py b/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py new file mode 100644 index 0000000000000000000000000000000000000000..1abbd53d8c974046afc3cac75bb13ae7dde1d149 --- /dev/null +++ b/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py b/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..b3256d759bb63da4f2a4253da0efc8ca9f896bd4 --- /dev/null +++ b/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py b/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..82aa3da616d3c6726f71c8abf828adbea7593c89 --- /dev/null +++ b/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py @@ -0,0 +1,6 @@ +_base_ = './unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/configs/unet/unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py b/configs/unet/unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py new file mode 100644 index 0000000000000000000000000000000000000000..82494f30924f637d2cf0ad6f731c415ce18cf190 --- /dev/null +++ b/configs/unet/unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', + '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (128, 128) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) diff --git a/configs/upernet/README.md b/configs/upernet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c2babbd2a71084d9f641b61337e9fbed373273a4 --- /dev/null +++ b/configs/upernet/README.md @@ -0,0 +1,68 @@ +# UPerNet + +> [Unified Perceptual Parsing for Scene Understanding](https://arxiv.org/pdf/1807.10221.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes. Models are available at [this https URL](https://github.com/CSAILVision/unifiedparsing). + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-50 | 512x1024 | 40000 | 6.4 | 4.25 | V100 | 77.10 | 78.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json) | +| UPerNet | R-101 | 512x1024 | 40000 | 7.4 | 3.79 | V100 | 78.69 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json) | +| UPerNet | R-50 | 769x769 | 40000 | 7.2 | 1.76 | V100 | 77.98 | 79.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json) | +| UPerNet | R-101 | 769x769 | 40000 | 8.4 | 1.56 | V100 | 79.03 | 80.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json) | +| UPerNet | R-50 | 512x1024 | 80000 | - | - | V100 | 78.19 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json) | +| UPerNet | R-101 | 512x1024 | 80000 | - | - | V100 | 79.40 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json) | +| UPerNet | R-50 | 769x769 | 80000 | - | - | V100 | 79.39 | 80.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json) | +| UPerNet | R-101 | 769x769 | 80000 | - | - | V100 | 80.10 | 81.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-50 | 512x512 | 80000 | 8.1 | 23.40 | V100 | 40.70 | 41.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json) | +| UPerNet | R-101 | 512x512 | 80000 | 9.1 | 20.34 | V100 | 42.91 | 43.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json) | +| UPerNet | R-50 | 512x512 | 160000 | - | - | V100 | 42.05 | 42.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json) | +| UPerNet | R-101 | 512x512 | 160000 | - | - | V100 | 43.82 | 44.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-50 | 512x512 | 20000 | 6.4 | 23.17 | V100 | 74.82 | 76.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json) | +| UPerNet | R-101 | 512x512 | 20000 | 7.5 | 19.98 | V100 | 77.10 | 78.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json) | +| UPerNet | R-50 | 512x512 | 40000 | - | - | V100 | 75.92 | 77.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json) | +| UPerNet | R-101 | 512x512 | 40000 | - | - | V100 | 77.43 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json) | + +## Citation + +```bibtex +@inproceedings{xiao2018unified, + title={Unified perceptual parsing for scene understanding}, + author={Xiao, Tete and Liu, Yingcheng and Zhou, Bolei and Jiang, Yuning and Sun, Jian}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + pages={418--434}, + year={2018} +} +``` diff --git a/configs/upernet/metafile.yaml b/configs/upernet/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6ad8187f21ebe7c4d172d581e23b43ae02894d2 --- /dev/null +++ b/configs/upernet/metafile.yaml @@ -0,0 +1,391 @@ +Collections: +- Name: UPerNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + README: configs/upernet/README.md + Frameworks: + - PyTorch +Models: +- Name: upernet_r50_4xb2-40k_cityscapes-512x1024 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.1 + mIoU(ms+flip): 78.37 + Config: configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb2-40k_cityscapes-512x1024 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.69 + mIoU(ms+flip): 80.11 + Config: configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb2-40k_cityscapes-769x769 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.98 + mIoU(ms+flip): 79.7 + Config: configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb2-40k_cityscapes-769x769 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.03 + mIoU(ms+flip): 80.77 + Config: configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb2-80k_cityscapes-512x1024 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.19 + mIoU(ms+flip): 79.19 + Config: configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb2-80k_cityscapes-512x1024 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.4 + mIoU(ms+flip): 80.46 + Config: configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb2-80k_cityscapes-769x769 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.39 + mIoU(ms+flip): 80.92 + Config: configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb2-80k_cityscapes-769x769 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.1 + mIoU(ms+flip): 81.49 + Config: configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb4-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.7 + mIoU(ms+flip): 41.81 + Config: configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb4-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.91 + mIoU(ms+flip): 43.96 + Config: configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb4-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.05 + mIoU(ms+flip): 42.78 + Config: configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb4-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.82 + mIoU(ms+flip): 44.85 + Config: configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb4-20k_voc12aug-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.82 + mIoU(ms+flip): 76.35 + Config: configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb4-20k_voc12aug-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.1 + mIoU(ms+flip): 78.29 + Config: configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb4-40k_voc12aug-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.92 + mIoU(ms+flip): 77.44 + Config: configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb4-40k_voc12aug-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.43 + mIoU(ms+flip): 78.56 + Config: configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch diff --git a/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py b/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..8f5f6aecfe99ca9a07e0bef516d1d5b6533f8223 --- /dev/null +++ b/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py b/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..28b5d3e968c7709fbbf980aaac9733bb89f5e1ea --- /dev/null +++ b/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_4xb2-40k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py b/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..cafd8a209167fe06a3ab0139a6169738456cab6f --- /dev/null +++ b/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py b/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..e17572054fc47a4323afbc790b0dbfac8a26d8da --- /dev/null +++ b/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_4xb2-80k_cityscapes-769x769.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py b/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..7a6152774cb88a1d9eb2a849585e9dff954090f7 --- /dev/null +++ b/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_4xb4-160k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py b/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..be8f0848dfde7a2403228ae070260a8744b948b4 --- /dev/null +++ b/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_4xb4-20k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py b/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..db1d976498377663e992e074906a435028db8b66 --- /dev/null +++ b/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_4xb4-40k_voc12aug-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py b/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..84549a421da988a11e3fe24d56a44980d6883ffa --- /dev/null +++ b/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_4xb4-80k_ade20k-512x512.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/upernet/upernet_r18_4xb2-40k_cityscapes-512x1024.py b/configs/upernet/upernet_r18_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..dbff0e75a1c3f581d0802ecc5531409974e551f6 --- /dev/null +++ b/configs/upernet/upernet_r18_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,6 @@ +_base_ = './upernet_r50_4xb2-40k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512]), + auxiliary_head=dict(in_channels=256)) diff --git a/configs/upernet/upernet_r18_4xb2-80k_cityscapes-512x1024.py b/configs/upernet/upernet_r18_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..dee6349f64f4bcda87d35daeac5d3dd547a61baf --- /dev/null +++ b/configs/upernet/upernet_r18_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,6 @@ +_base_ = './upernet_r50_4xb2-80k_cityscapes-512x1024.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512]), + auxiliary_head=dict(in_channels=256)) diff --git a/configs/upernet/upernet_r18_4xb4-160k_ade20k-512x512.py b/configs/upernet/upernet_r18_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9ac6c35527b588a8cd3abd3a63f913896a9c5b07 --- /dev/null +++ b/configs/upernet/upernet_r18_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150), + auxiliary_head=dict(in_channels=256, num_classes=150)) diff --git a/configs/upernet/upernet_r18_4xb4-20k_voc12aug-512x512.py b/configs/upernet/upernet_r18_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5cae4f5435354c36acdebfa10959facf5bb8c8b8 --- /dev/null +++ b/configs/upernet/upernet_r18_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21), + auxiliary_head=dict(in_channels=256, num_classes=21)) diff --git a/configs/upernet/upernet_r18_4xb4-40k_voc12aug-512x512.py b/configs/upernet/upernet_r18_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..652ded75164653d900284bde65f678f753f61557 --- /dev/null +++ b/configs/upernet/upernet_r18_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21), + auxiliary_head=dict(in_channels=256, num_classes=21)) diff --git a/configs/upernet/upernet_r18_4xb4-80k_ade20k-512x512.py b/configs/upernet/upernet_r18_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1a7956d71fa99f0a3d85f2a997990181f3533902 --- /dev/null +++ b/configs/upernet/upernet_r18_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150), + auxiliary_head=dict(in_channels=256, num_classes=150)) diff --git a/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py b/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..4751fc11027b15c8b2eb0ae68b9f208b42a0408e --- /dev/null +++ b/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py b/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..6f05b6c364379d16ed9493578b45bce1932e1cf7 --- /dev/null +++ b/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py b/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py new file mode 100644 index 0000000000000000000000000000000000000000..f3488c61085ee18d7b3861bfb51481b44a8cc2ec --- /dev/null +++ b/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict(data_preprocessor=data_preprocessor) diff --git a/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py b/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..6a8f48ec51cbf6f890b884444b06599143ff609c --- /dev/null +++ b/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (769, 769) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py b/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5d15b2abd902277a794aca633831cfa38f0fc6d0 --- /dev/null +++ b/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py b/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9e96b4eac0b14a73fed9c4c5510e144ad7c7415a --- /dev/null +++ b/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py b/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..cada9496204d92d0792818c345d155e1839fe861 --- /dev/null +++ b/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=21), + auxiliary_head=dict(num_classes=21)) diff --git a/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py b/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..322d5d8c841df987ce3ba734c63ca245e9d6e008 --- /dev/null +++ b/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/README.md b/configs/vit/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f75326e8e44929e05758b759f51e9e4e7d087ccb --- /dev/null +++ b/configs/vit/README.md @@ -0,0 +1,70 @@ +# Vision Transformer + +> [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/pdf/2010.11929.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +While the Transformer architecture has become the de-facto standard for natural language processing tasks, its applications to computer vision remain limited. In vision, attention is either applied in conjunction with convolutional networks, or used to replace certain components of convolutional networks while keeping their overall structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring substantially fewer computational resources to train. + + + +
+ +
+ +## Usage + +To use other repositories' pre-trained models, it is necessary to convert keys. + +We provide a script [`vit2mmseg.py`](../../tools/model_converters/vit2mmseg.py) in the tools directory to convert the key of models from [timm](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py) to MMSegmentation style. + +```shell +python tools/model_converters/vit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/vit2mmseg.py https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth pretrain/jx_vit_base_p16_224-80ecf9dd.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | ----------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | ViT-B + MLN | 512x512 | 80000 | 9.20 | 6.94 | V100 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k_20210624_130547-0403cee1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/20210624_130547.log.json) | +| UPerNet | ViT-B + MLN | 512x512 | 160000 | 9.20 | 7.58 | V100 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k_20210624_130547-852fa768.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/20210623_192432.log.json) | +| UPerNet | ViT-B + LN + MLN | 512x512 | 160000 | 9.21 | 6.82 | V100 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k_20210621_172828-f444c077.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json) | +| UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | V100 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k_20210624_095228-afc93ec2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210624_095228.log.json) | +| UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | V100 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k_20210621_160903-5110d916.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210621_160903.log.json) | +| UPerNet | DeiT-S + MLN | 512x512 | 160000 | 5.69 | 11.18 | V100 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k_20210621_161021-fb9a5dfb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-S + LN + MLN | 512x512 | 160000 | 5.69 | 12.39 | V100 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k_20210621_161021-c0cd652f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | V100 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k_20210624_130529-1e090789.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210624_130529.log.json) | +| UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | V100 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k_20210621_180100-828705d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210621_180100.log.json) | +| UPerNet | DeiT-B + MLN | 512x512 | 160000 | 9.21 | 7.78 | V100 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k_20210621_191949-4e1450f3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/20210621_191949.log.json) | +| UPerNet | DeiT-B + LN + MLN | 512x512 | 160000 | 9.21 | 7.75 | V100 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k_20210623_153535-8a959c14.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json) | + +## Citation + +```bibtex +@article{dosoViTskiy2020, + title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, + author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, + journal={arXiv preprint arXiv:2010.11929}, + year={2020} +} +``` diff --git a/configs/vit/metafile.yaml b/configs/vit/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68e254a5f99d5107500bd6d326ad98ab812f546d --- /dev/null +++ b/configs/vit/metafile.yaml @@ -0,0 +1,265 @@ +Models: +- Name: vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.71 + mIoU(ms+flip): 49.51 + Config: configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k_20210624_130547-0403cee1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/20210624_130547.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.75 + mIoU(ms+flip): 48.46 + Config: configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k_20210624_130547-852fa768.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/20210623_192432.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.73 + mIoU(ms+flip): 49.95 + Config: configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.21 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k_20210621_172828-f444c077.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-s16_upernet_8xb2-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.96 + mIoU(ms+flip): 43.79 + Config: configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 4.68 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k_20210624_095228-afc93ec2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210624_095228.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-s16_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.87 + mIoU(ms+flip): 43.79 + Config: configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 4.68 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k_20210621_160903-5110d916.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210621_160903.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.82 + mIoU(ms+flip): 45.07 + Config: configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 5.69 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k_20210621_161021-fb9a5dfb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/20210621_161021.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.52 + mIoU(ms+flip): 45.01 + Config: configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 5.69 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k_20210621_161021-c0cd652f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-b16_upernet_8xb2-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.24 + mIoU(ms+flip): 46.73 + Config: configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.75 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k_20210624_130529-1e090789.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210624_130529.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-b16_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.36 + mIoU(ms+flip): 47.16 + Config: configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.75 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k_20210621_180100-828705d7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210621_180100.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.46 + mIoU(ms+flip): 47.16 + Config: configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.21 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k_20210621_191949-4e1450f3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/20210621_191949.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.37 + mIoU(ms+flip): 47.23 + Config: configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.21 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k_20210623_153535-8a959c14.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch diff --git a/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py b/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..39d1c54fafc36077135bdd6a913d929326fe7c8b --- /dev/null +++ b/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,5 @@ +_base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' + +model = dict( + pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', + backbone=dict(drop_path_rate=0.1, final_norm=True)) diff --git a/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py b/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..706673f6b1d48854bb777a137cc9cbe967c25c45 --- /dev/null +++ b/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,6 @@ +_base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' + +model = dict( + pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', + backbone=dict(drop_path_rate=0.1), +) diff --git a/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py b/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..23a23582d7a87a591ea7c0ee2c1bbdd27f29b2d8 --- /dev/null +++ b/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,6 @@ +_base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' + +model = dict( + pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', + backbone=dict(drop_path_rate=0.1), + neck=None) diff --git a/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py b/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..4c8bc939eea06169f838a86cd5eadb236ec00067 --- /dev/null +++ b/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py @@ -0,0 +1,6 @@ +_base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py' + +model = dict( + pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', + backbone=dict(drop_path_rate=0.1), + neck=None) diff --git a/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py b/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..8e626fe0dea6bd04fe8c1958487e4c4e32dd9fb1 --- /dev/null +++ b/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,9 @@ +_base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' + +model = dict( + pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', + backbone=dict( + num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), + auxiliary_head=dict(num_classes=150, in_channels=384)) diff --git a/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py b/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9a69a892b335fc8eb16fefd870223e9f8bde6a6f --- /dev/null +++ b/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,8 @@ +_base_ = './vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py' + +model = dict( + pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), + auxiliary_head=dict(num_classes=150, in_channels=384)) diff --git a/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py b/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9ef699d5d56ad06a2c979076e61f0228af21f6e6 --- /dev/null +++ b/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,8 @@ +_base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py' + +model = dict( + pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=None, + auxiliary_head=dict(num_classes=150, in_channels=384)) diff --git a/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py b/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..9ef699d5d56ad06a2c979076e61f0228af21f6e6 --- /dev/null +++ b/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py @@ -0,0 +1,8 @@ +_base_ = './vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py' + +model = dict( + pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=None, + auxiliary_head=dict(num_classes=150, in_channels=384)) diff --git a/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py b/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..2dd81b48bbbd6c94674b02f4a0d09de5814567b2 --- /dev/null +++ b/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,45 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16_ln_mln.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='pretrain/vit_base_patch16_224.pth', + backbone=dict(drop_path_rate=0.1, final_norm=True), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py b/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..1a7ec16c92942094429a3f1777cae16ae69d81aa --- /dev/null +++ b/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py @@ -0,0 +1,44 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16_ln_mln.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='pretrain/vit_base_patch16_224.pth', + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py b/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..ef7345057c10dc984e651054fb179e82d5c7f1f7 --- /dev/null +++ b/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py @@ -0,0 +1,44 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16_ln_mln.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +crop_size = (512, 512) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='pretrain/vit_base_patch16_224.pth', + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=80000, + by_epoch=False, + ) +] + +# By default, models are trained on 8 GPUs with 2 images per GPU +train_dataloader = dict(batch_size=2) +val_dataloader = dict(batch_size=1) +test_dataloader = val_dataloader diff --git a/configs/vpd/README.md b/configs/vpd/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e90085bec9e772bc05aca0335fe95efd176cd51e --- /dev/null +++ b/configs/vpd/README.md @@ -0,0 +1,50 @@ +# VPD + +> [Unleashing Text-to-Image Diffusion Models for Visual Perception](https://arxiv.org/abs/2303.02153) + +## Introduction + + + +Official Repo + +## Abstract + + + +Diffusion models (DMs) have become the new trend of generative models and have demonstrated a powerful ability of conditional synthesis. Among those, text-to-image diffusion models pre-trained on large-scale image-text pairs are highly controllable by customizable prompts. Unlike the unconditional generative models that focus on low-level attributes and details, text-to-image diffusion models contain more high-level knowledge thanks to the vision-language pre-training. In this paper, we propose VPD (Visual Perception with a pre-trained Diffusion model), a new framework that exploits the semantic information of a pre-trained text-to-image diffusion model in visual perception tasks. Instead of using the pre-trained denoising autoencoder in a diffusion-based pipeline, we simply use it as a backbone and aim to study how to take full advantage of the learned knowledge. Specifically, we prompt the denoising decoder with proper textual inputs and refine the text features with an adapter, leading to a better alignment to the pre-trained stage and making the visual contents interact with the text prompts. We also propose to utilize the cross-attention maps between the visual features and the text features to provide explicit guidance. Compared with other pre-training methods, we show that vision-language pre-trained diffusion models can be faster adapted to downstream visual perception tasks using the proposed VPD. Extensive experiments on semantic segmentation, referring image segmentation and depth estimation demonstrates the effectiveness of our method. Notably, VPD attains 0.254 RMSE on NYUv2 depth estimation and 73.3% oIoU on RefCOCO-val referring image segmentation, establishing new records on these two benchmarks. + + + +
+ +
+ +## Usage + +To run training or inference with VPD model, please install the required packages via + +```sh +pip install -r requirements/albu.txt +pip install -r requirements/optional.txt +``` + +## Results and models + +### NYU + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | RMSE | d1 | d2 | d3 | REL | log_10 | config | download | +| ------ | --------------------- | --------- | ------- | -------- | -------------- | ------ | ----- | ----- | ----- | ----- | ----- | ------ | ----------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| VPD | Stable-Diffusion-v1-5 | 480x480 | 25000 | - | - | A100 | 0.253 | 0.964 | 0.995 | 0.999 | 0.069 | 0.030 | [config](https://github.com/open-mmlab/mmsegmentation/tree/main/configs/vpd/vpd_sd_4xb8-25k_nyu-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vpd/vpd_sd_4xb8-25k_nyu-480x480_20230908-66144bc4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vpd/vpd_sd_4xb8-25k_nyu-480x480_20230908.json) | +| VPD | Stable-Diffusion-v1-5 | 512x512 | 25000 | - | - | A100 | 0.258 | 0.963 | 0.995 | 0.999 | 0.072 | 0.031 | [config](https://github.com/open-mmlab/mmsegmentation/tree/main/configs/vpd/vpd_sd_4xb8-25k_nyu-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vpd/vpd_sd_4xb8-25k_nyu-512x512_20230918-60cefcff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vpd/vpd_sd_4xb8-25k_nyu-512x512_20230918.json) | + +## Citation + +```bibtex +@article{zhao2023unleashing, + title={Unleashing Text-to-Image Diffusion Models for Visual Perception}, + author={Zhao, Wenliang and Rao, Yongming and Liu, Zuyan and Liu, Benlin and Zhou, Jie and Lu, Jiwen}, + journal={ICCV}, + year={2023} +} +``` diff --git a/configs/vpd/metafile.yaml b/configs/vpd/metafile.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccdc0e81eb2639b9ec4eb1ee61de722cbabea78c --- /dev/null +++ b/configs/vpd/metafile.yaml @@ -0,0 +1,56 @@ +Collections: +- Name: VPD + License: Apache License 2.0 + Metadata: + Training Data: + - NYU + Paper: + Title: Unleashing Text-to-Image Diffusion Models for Visual Perception + URL: https://arxiv.org/abs/2303.02153 + README: configs/vpd/README.md + Frameworks: + - PyTorch +Models: +- Name: vpd_sd_4xb8-25k_nyu-480x480 + In Collection: VPD + Results: + Task: Depth Estimation + Dataset: NYU + Metrics: + RMSE: 0.253 + Config: configs/vpd/vpd_sd_4xb8-25k_nyu-480x480.py + Metadata: + Training Data: NYU + Batch Size: 32 + Architecture: + - Stable-Diffusion + Training Resources: 8x A100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vpd/vpd_sd_4xb8-25k_nyu-480x480_20230908-66144bc4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vpd/vpd_sd_4xb8-25k_nyu-480x480_20230908.json + Paper: + Title: 'High-Resolution Image Synthesis with Latent Diffusion Models' + URL: https://arxiv.org/abs/2112.10752 + Code: https://github.com/open-mmlab/mmsegmentation/tree/main/mmseg/models/backbones/vpd.py#L333 + Framework: PyTorch +- Name: vpd_sd_4xb8-25k_nyu-512x512 + In Collection: VPD + Alias: vpd_depth + Results: + Task: Depth Estimation + Dataset: NYU + Metrics: + RMSE: 0.258 + Config: configs/vpd/vpd_sd_4xb8-25k_nyu-512x512.py + Metadata: + Training Data: NYU + Batch Size: 32 + Architecture: + - Stable-Diffusion + Training Resources: 8x A100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vpd/vpd_sd_4xb8-25k_nyu-512x512_20230918-60cefcff.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vpd/vpd_sd_4xb8-25k_nyu-512x512_20230918.json + Paper: + Title: 'High-Resolution Image Synthesis with Latent Diffusion Models' + URL: https://arxiv.org/abs/2112.10752 + Code: https://github.com/open-mmlab/mmsegmentation/tree/main/mmseg/models/backbones/vpd.py#L333 + Framework: PyTorch diff --git a/configs/vpd/vpd_sd_4xb8-25k_nyu-480x480.py b/configs/vpd/vpd_sd_4xb8-25k_nyu-480x480.py new file mode 100644 index 0000000000000000000000000000000000000000..0d14d8dd338e9b6333461c6310b0c29dad7f4a86 --- /dev/null +++ b/configs/vpd/vpd_sd_4xb8-25k_nyu-480x480.py @@ -0,0 +1,38 @@ +_base_ = [ + '../_base_/models/vpd_sd.py', '../_base_/datasets/nyu.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_25k.py' +] + +crop_size = (480, 480) + +model = dict( + type='DepthEstimator', + data_preprocessor=dict(size=crop_size), + backbone=dict( + class_embed_path='https://download.openmmlab.com/mmsegmentation/' + 'v0.5/vpd/nyu_class_embeddings.pth', + class_embed_select=True, + pad_shape=512, + unet_cfg=dict(use_attn=False), + ), + decode_head=dict( + type='VPDDepthHead', + in_channels=[320, 640, 1280, 1280], + max_depth=10, + fmap_border=(1, 1), + ), + test_cfg=dict(mode='slide_flip', crop_size=crop_size, stride=(160, 160))) + +default_hooks = dict( + checkpoint=dict(save_best='rmse', rule='less', max_keep_ckpts=1)) + +# custom optimizer +optim_wrapper = dict( + constructor='ForceDefaultOptimWrapperConstructor', + paramwise_cfg=dict( + bias_decay_mult=0, + force_default_settings=True, + custom_keys={ + 'backbone.encoder_vq': dict(lr_mult=0), + 'backbone.unet': dict(lr_mult=0.01), + })) diff --git a/configs/vpd/vpd_sd_4xb8-25k_nyu-512x512.py b/configs/vpd/vpd_sd_4xb8-25k_nyu-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..e89eb9c422f51ec060d38ef5f30a48ae3c677dff --- /dev/null +++ b/configs/vpd/vpd_sd_4xb8-25k_nyu-512x512.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/models/vpd_sd.py', '../_base_/datasets/nyu_512x512.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_25k.py' +] + +crop_size = (512, 512) + +model = dict( + type='DepthEstimator', + data_preprocessor=dict(size=crop_size), + backbone=dict( + class_embed_path='https://download.openmmlab.com/mmsegmentation/' + 'v0.5/vpd/nyu_class_embeddings.pth', + class_embed_select=True, + pad_shape=512, + unet_cfg=dict(use_attn=False), + ), + decode_head=dict( + type='VPDDepthHead', + in_channels=[320, 640, 1280, 1280], + max_depth=10, + ), + test_cfg=dict(mode='slide_flip', crop_size=crop_size, stride=(128, 128))) + +default_hooks = dict( + checkpoint=dict(save_best='rmse', rule='less', max_keep_ckpts=1)) + +# custom optimizer +optim_wrapper = dict( + constructor='ForceDefaultOptimWrapperConstructor', + paramwise_cfg=dict( + bias_decay_mult=0, + force_default_settings=True, + custom_keys={ + 'backbone.encoder_vq': dict(lr_mult=0), + 'backbone.unet': dict(lr_mult=0.01), + })) diff --git a/demo_1.jpg b/demo_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a8756183f034324e364bc074fa44294c635d3d10 Binary files /dev/null and b/demo_1.jpg differ diff --git a/demo_2.jpg b/demo_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4912df6729896aea16beb71bdc442ff488ae05ee Binary files /dev/null and b/demo_2.jpg differ diff --git a/demo_3.jpg b/demo_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a53ccdb42801194c51161ac11f6549df6e6efadc Binary files /dev/null and b/demo_3.jpg differ diff --git a/demo_4.jpg b/demo_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ed3b1d4c90d5444e24eb716757c128ff4d603433 Binary files /dev/null and b/demo_4.jpg differ diff --git a/gradio_banner.png b/gradio_banner.png new file mode 100644 index 0000000000000000000000000000000000000000..a0c25a4e20b6d1774dac3e8f592303925a2bb713 Binary files /dev/null and b/gradio_banner.png differ diff --git a/mmseg/.mim/configs b/mmseg/.mim/configs new file mode 120000 index 0000000000000000000000000000000000000000..5992d109cbbe3360c4afb3bfbc50a633ae97993b --- /dev/null +++ b/mmseg/.mim/configs @@ -0,0 +1 @@ +../../configs \ No newline at end of file diff --git a/mmseg/.mim/dataset-index.yml b/mmseg/.mim/dataset-index.yml new file mode 120000 index 0000000000000000000000000000000000000000..49d87a813e085bfacc38c3b1dff53ec15916b660 --- /dev/null +++ b/mmseg/.mim/dataset-index.yml @@ -0,0 +1 @@ +../../dataset-index.yml \ No newline at end of file diff --git a/mmseg/.mim/model-index.yml b/mmseg/.mim/model-index.yml new file mode 120000 index 0000000000000000000000000000000000000000..a18c0b389b4ffedeea207c4fdb519ad96dfa0788 --- /dev/null +++ b/mmseg/.mim/model-index.yml @@ -0,0 +1 @@ +../../model-index.yml \ No newline at end of file diff --git a/mmseg/.mim/tools b/mmseg/.mim/tools new file mode 120000 index 0000000000000000000000000000000000000000..31941e941dcc86b0c1236dcb9acb0577018ad91c --- /dev/null +++ b/mmseg/.mim/tools @@ -0,0 +1 @@ +../../tools \ No newline at end of file diff --git a/mmseg/__init__.py b/mmseg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5fcb84e8c4f986121ba9d782b384477129f75ff6 --- /dev/null +++ b/mmseg/__init__.py @@ -0,0 +1,74 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import mmcv +import mmengine +from packaging.version import parse + +from .version import __version__, version_info + +MMCV_MIN = '2.0.0rc4' +MMCV_MAX = '2.2.0' +MMENGINE_MIN = '0.5.0' +MMENGINE_MAX = '1.0.0' + + +def digit_version(version_str: str, length: int = 4): + """Convert a version string into a tuple of integers. + + This method is usually used for comparing two versions. For pre-release + versions: alpha < beta < rc. + + Args: + version_str (str): The version string. + length (int): The maximum number of version levels. Default: 4. + + Returns: + tuple[int]: The version info in digits (integers). + """ + version = parse(version_str) + assert version.release, f'failed to parse version {version_str}' + release = list(version.release) + release = release[:length] + if len(release) < length: + release = release + [0] * (length - len(release)) + if version.is_prerelease: + mapping = {'a': -3, 'b': -2, 'rc': -1} + val = -4 + # version.pre can be None + if version.pre: + if version.pre[0] not in mapping: + warnings.warn(f'unknown prerelease version {version.pre[0]}, ' + 'version checking may go wrong') + else: + val = mapping[version.pre[0]] + release.extend([val, version.pre[-1]]) + else: + release.extend([val, 0]) + + elif version.is_postrelease: + release.extend([1, version.post]) + else: + release.extend([0, 0]) + return tuple(release) + + +mmcv_min_version = digit_version(MMCV_MIN) +mmcv_max_version = digit_version(MMCV_MAX) +mmcv_version = digit_version(mmcv.__version__) + + +assert (mmcv_min_version <= mmcv_version < mmcv_max_version), \ + f'MMCV=={mmcv.__version__} is used but incompatible. ' \ + f'Please install mmcv>=2.0.0rc4.' + +mmengine_min_version = digit_version(MMENGINE_MIN) +mmengine_max_version = digit_version(MMENGINE_MAX) +mmengine_version = digit_version(mmengine.__version__) + +assert (mmengine_min_version <= mmengine_version < mmengine_max_version), \ + f'MMEngine=={mmengine.__version__} is used but incompatible. ' \ + f'Please install mmengine>={mmengine_min_version}, '\ + f'<{mmengine_max_version}.' + +__all__ = ['__version__', 'version_info', 'digit_version'] diff --git a/mmseg/__pycache__/__init__.cpython-39.pyc b/mmseg/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96a8c153af61ffd5dae11d663c43632e2a7ff2d8 Binary files /dev/null and b/mmseg/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/__pycache__/version.cpython-39.pyc b/mmseg/__pycache__/version.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..759ff299525d701f93d38f0bea946c228a0768b6 Binary files /dev/null and b/mmseg/__pycache__/version.cpython-39.pyc differ diff --git a/mmseg/apis/__init__.py b/mmseg/apis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b50a266319c9cf74cb8b13afcff564248c058732 --- /dev/null +++ b/mmseg/apis/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .inference import inference_model, init_model, show_result_pyplot +from .mmseg_inferencer import MMSegInferencer +from .remote_sense_inferencer import RSImage, RSInferencer + +__all__ = [ + 'init_model', 'inference_model', 'show_result_pyplot', 'MMSegInferencer', + 'RSInferencer', 'RSImage' +] diff --git a/mmseg/apis/__pycache__/__init__.cpython-39.pyc b/mmseg/apis/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cdf620cc013146be8fe3d9367e0ac92f6483438f Binary files /dev/null and b/mmseg/apis/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/apis/__pycache__/inference.cpython-39.pyc b/mmseg/apis/__pycache__/inference.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..404d2ab7ceea14a036616ce396b90e97f66776b9 Binary files /dev/null and b/mmseg/apis/__pycache__/inference.cpython-39.pyc differ diff --git a/mmseg/apis/__pycache__/mmseg_inferencer.cpython-39.pyc b/mmseg/apis/__pycache__/mmseg_inferencer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4418315957c4296ef29eb4c9aceaa194b6e14e9e Binary files /dev/null and b/mmseg/apis/__pycache__/mmseg_inferencer.cpython-39.pyc differ diff --git a/mmseg/apis/__pycache__/remote_sense_inferencer.cpython-39.pyc b/mmseg/apis/__pycache__/remote_sense_inferencer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..89d12f3c51d7cce4a2a2aac2f4392a273e93cedb Binary files /dev/null and b/mmseg/apis/__pycache__/remote_sense_inferencer.cpython-39.pyc differ diff --git a/mmseg/apis/__pycache__/utils.cpython-39.pyc b/mmseg/apis/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ffb18c8b5e851bfd59d29879ca8f824cd5209016 Binary files /dev/null and b/mmseg/apis/__pycache__/utils.cpython-39.pyc differ diff --git a/mmseg/apis/inference.py b/mmseg/apis/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..aab11d14f4becc43d4c2ecd3772417e4923bd20e --- /dev/null +++ b/mmseg/apis/inference.py @@ -0,0 +1,189 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from pathlib import Path +from typing import Optional, Union + +import mmcv +import numpy as np +import torch +from mmengine import Config +from mmengine.registry import init_default_scope +from mmengine.runner import load_checkpoint +from mmengine.utils import mkdir_or_exist + +from mmseg.models import BaseSegmentor +from mmseg.registry import MODELS +from mmseg.structures import SegDataSample +from mmseg.utils import SampleList, dataset_aliases, get_classes, get_palette +from mmseg.visualization import SegLocalVisualizer +from .utils import ImageType, _preprare_data + + +def init_model(config: Union[str, Path, Config], + checkpoint: Optional[str] = None, + device: str = 'cuda:0', + cfg_options: Optional[dict] = None): + """Initialize a segmentor from config file. + + Args: + config (str, :obj:`Path`, or :obj:`mmengine.Config`): Config file path, + :obj:`Path`, or the config object. + checkpoint (str, optional): Checkpoint path. If left as None, the model + will not load any weights. + device (str, optional) CPU/CUDA device option. Default 'cuda:0'. + Use 'cpu' for loading model on CPU. + cfg_options (dict, optional): Options to override some settings in + the used config. + Returns: + nn.Module: The constructed segmentor. + """ + if isinstance(config, (str, Path)): + config = Config.fromfile(config) + elif not isinstance(config, Config): + raise TypeError('config must be a filename or Config object, ' + 'but got {}'.format(type(config))) + if cfg_options is not None: + config.merge_from_dict(cfg_options) + if config.model.type == 'EncoderDecoder': + if 'init_cfg' in config.model.backbone: + config.model.backbone.init_cfg = None + elif config.model.type == 'MultimodalEncoderDecoder': + for k, v in config.model.items(): + if isinstance(v, dict) and 'init_cfg' in v: + config.model[k].init_cfg = None + config.model.pretrained = None + config.model.train_cfg = None + init_default_scope(config.get('default_scope', 'mmseg')) + + model = MODELS.build(config.model) + if checkpoint is not None: + checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') + dataset_meta = checkpoint['meta'].get('dataset_meta', None) + # save the dataset_meta in the model for convenience + if 'dataset_meta' in checkpoint.get('meta', {}): + # mmseg 1.x + model.dataset_meta = dataset_meta + elif 'CLASSES' in checkpoint.get('meta', {}): + # < mmseg 1.x + classes = checkpoint['meta']['CLASSES'] + palette = checkpoint['meta']['PALETTE'] + model.dataset_meta = {'classes': classes, 'palette': palette} + else: + warnings.simplefilter('once') + warnings.warn( + 'dataset_meta or class names are not saved in the ' + 'checkpoint\'s meta data, classes and palette will be' + 'set according to num_classes ') + num_classes = model.decode_head.num_classes + dataset_name = None + for name in dataset_aliases.keys(): + if len(get_classes(name)) == num_classes: + dataset_name = name + break + if dataset_name is None: + warnings.warn( + 'No suitable dataset found, use Cityscapes by default') + dataset_name = 'cityscapes' + model.dataset_meta = { + 'classes': get_classes(dataset_name), + 'palette': get_palette(dataset_name) + } + model.cfg = config # save the config in the model for convenience + model.to(device) + model.eval() + return model + + +def inference_model(model: BaseSegmentor, + img: ImageType) -> Union[SegDataSample, SampleList]: + """Inference image(s) with the segmentor. + + Args: + model (nn.Module): The loaded segmentor. + imgs (str/ndarray or list[str/ndarray]): Either image files or loaded + images. + + Returns: + :obj:`SegDataSample` or list[:obj:`SegDataSample`]: + If imgs is a list or tuple, the same length list type results + will be returned, otherwise return the segmentation results directly. + """ + # prepare data + data, is_batch = _preprare_data(img, model) + + # forward the model + with torch.no_grad(): + results = model.test_step(data) + + return results if is_batch else results[0] + + +def show_result_pyplot(model: BaseSegmentor, + img: Union[str, np.ndarray], + result: SegDataSample, + opacity: float = 0.5, + title: str = '', + draw_gt: bool = True, + draw_pred: bool = True, + wait_time: float = 0, + show: bool = True, + with_labels: Optional[bool] = True, + save_dir=None, + out_file=None): + """Visualize the segmentation results on the image. + + Args: + model (nn.Module): The loaded segmentor. + img (str or np.ndarray): Image filename or loaded image. + result (SegDataSample): The prediction SegDataSample result. + opacity(float): Opacity of painted segmentation map. + Default 0.5. Must be in (0, 1] range. + title (str): The title of pyplot figure. + Default is ''. + draw_gt (bool): Whether to draw GT SegDataSample. Default to True. + draw_pred (bool): Whether to draw Prediction SegDataSample. + Defaults to True. + wait_time (float): The interval of show (s). 0 is the special value + that means "forever". Defaults to 0. + show (bool): Whether to display the drawn image. + Default to True. + with_labels(bool, optional): Add semantic labels in visualization + result, Default to True. + save_dir (str, optional): Save file dir for all storage backends. + If it is None, the backend storage will not save any data. + out_file (str, optional): Path to output file. Default to None. + + + + Returns: + np.ndarray: the drawn image which channel is RGB. + """ + if hasattr(model, 'module'): + model = model.module + if isinstance(img, str): + image = mmcv.imread(img, channel_order='rgb') + else: + image = img + if save_dir is not None: + mkdir_or_exist(save_dir) + # init visualizer + visualizer = SegLocalVisualizer( + vis_backends=[dict(type='LocalVisBackend')], + save_dir=save_dir, + alpha=opacity) + visualizer.dataset_meta = dict( + classes=model.dataset_meta['classes'], + palette=model.dataset_meta['palette']) + visualizer.add_datasample( + name=title, + image=image, + data_sample=result, + draw_gt=draw_gt, + draw_pred=draw_pred, + wait_time=wait_time, + out_file=out_file, + show=show, + with_labels=with_labels) + vis_img = visualizer.get_image() + + return vis_img diff --git a/mmseg/apis/mmseg_inferencer.py b/mmseg/apis/mmseg_inferencer.py new file mode 100644 index 0000000000000000000000000000000000000000..02a198b516a71c1f5a0833955607ba4ecc05bf13 --- /dev/null +++ b/mmseg/apis/mmseg_inferencer.py @@ -0,0 +1,382 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings +from typing import List, Optional, Sequence, Union + +import mmcv +import mmengine +import numpy as np +import torch +import torch.nn as nn +from mmcv.transforms import Compose +from mmengine.infer.infer import BaseInferencer, ModelType +from mmengine.model import revert_sync_batchnorm +from mmengine.registry import init_default_scope +from mmengine.runner.checkpoint import _load_checkpoint_to_model +from PIL import Image + +from mmseg.structures import SegDataSample +from mmseg.utils import ConfigType, SampleList, get_classes, get_palette +from mmseg.visualization import SegLocalVisualizer + +InputType = Union[str, np.ndarray] +InputsType = Union[InputType, Sequence[InputType]] +PredType = Union[SegDataSample, SampleList] + + +class MMSegInferencer(BaseInferencer): + """Semantic segmentation inferencer, provides inference and visualization + interfaces. Note: MMEngine >= 0.5.0 is required. + + Args: + model (str, optional): Path to the config file or the model name + defined in metafile. Take the `mmseg metafile `_ + as an example the `model` could be + "fcn_r50-d8_4xb2-40k_cityscapes-512x1024", and the weights of model + will be download automatically. If use config file, like + "configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py", the + `weights` should be defined. + weights (str, optional): Path to the checkpoint. If it is not specified + and model is a model name of metafile, the weights will be loaded + from metafile. Defaults to None. + classes (list, optional): Input classes for result rendering, as the + prediction of segmentation model is a segment map with label + indices, `classes` is a list which includes items responding to the + label indices. If classes is not defined, visualizer will take + `cityscapes` classes by default. Defaults to None. + palette (list, optional): Input palette for result rendering, which is + a list of color palette responding to the classes. If palette is + not defined, visualizer will take `cityscapes` palette by default. + Defaults to None. + dataset_name (str, optional): `Dataset name or alias `_ + visulizer will use the meta information of the dataset i.e. classes + and palette, but the `classes` and `palette` have higher priority. + Defaults to None. + device (str, optional): Device to run inference. If None, the available + device will be automatically used. Defaults to None. + scope (str, optional): The scope of the model. Defaults to 'mmseg'. + """ # noqa + + preprocess_kwargs: set = set() + forward_kwargs: set = {'mode', 'out_dir'} + visualize_kwargs: set = { + 'show', 'wait_time', 'img_out_dir', 'opacity', 'return_vis', + 'with_labels' + } + postprocess_kwargs: set = {'pred_out_dir', 'return_datasample'} + + def __init__(self, + model: Union[ModelType, str], + weights: Optional[str] = None, + classes: Optional[Union[str, List]] = None, + palette: Optional[Union[str, List]] = None, + dataset_name: Optional[str] = None, + device: Optional[str] = None, + scope: Optional[str] = 'mmseg') -> None: + # A global counter tracking the number of images processes, for + # naming of the output images + self.num_visualized_imgs = 0 + self.num_pred_imgs = 0 + init_default_scope(scope if scope else 'mmseg') + super().__init__( + model=model, weights=weights, device=device, scope=scope) + + if device == 'cpu' or not torch.cuda.is_available(): + self.model = revert_sync_batchnorm(self.model) + + assert isinstance(self.visualizer, SegLocalVisualizer) + self.visualizer.set_dataset_meta(classes, palette, dataset_name) + + def _load_weights_to_model(self, model: nn.Module, + checkpoint: Optional[dict], + cfg: Optional[ConfigType]) -> None: + """Loading model weights and meta information from cfg and checkpoint. + + Subclasses could override this method to load extra meta information + from ``checkpoint`` and ``cfg`` to model. + + Args: + model (nn.Module): Model to load weights and meta information. + checkpoint (dict, optional): The loaded checkpoint. + cfg (Config or ConfigDict, optional): The loaded config. + """ + + if checkpoint is not None: + _load_checkpoint_to_model(model, checkpoint) + checkpoint_meta = checkpoint.get('meta', {}) + # save the dataset_meta in the model for convenience + if 'dataset_meta' in checkpoint_meta: + # mmsegmentation 1.x + model.dataset_meta = { + 'classes': checkpoint_meta['dataset_meta'].get('classes'), + 'palette': checkpoint_meta['dataset_meta'].get('palette') + } + elif 'CLASSES' in checkpoint_meta: + # mmsegmentation 0.x + classes = checkpoint_meta['CLASSES'] + palette = checkpoint_meta.get('PALETTE', None) + model.dataset_meta = {'classes': classes, 'palette': palette} + else: + warnings.warn( + 'dataset_meta or class names are not saved in the ' + 'checkpoint\'s meta data, use classes of Cityscapes by ' + 'default.') + model.dataset_meta = { + 'classes': get_classes('cityscapes'), + 'palette': get_palette('cityscapes') + } + else: + warnings.warn('Checkpoint is not loaded, and the inference ' + 'result is calculated by the randomly initialized ' + 'model!') + warnings.warn( + 'weights is None, use cityscapes classes by default.') + model.dataset_meta = { + 'classes': get_classes('cityscapes'), + 'palette': get_palette('cityscapes') + } + + def __call__(self, + inputs: InputsType, + return_datasamples: bool = False, + batch_size: int = 1, + return_vis: bool = False, + show: bool = False, + wait_time: int = 0, + out_dir: str = '', + img_out_dir: str = 'vis', + pred_out_dir: str = 'pred', + **kwargs) -> dict: + """Call the inferencer. + + Args: + inputs (Union[list, str, np.ndarray]): Inputs for the inferencer. + return_datasamples (bool): Whether to return results as + :obj:`SegDataSample`. Defaults to False. + batch_size (int): Batch size. Defaults to 1. + show (bool): Whether to display the rendering color segmentation + mask in a popup window. Defaults to False. + wait_time (float): The interval of show (s). Defaults to 0. + out_dir (str): Output directory of inference results. Defaults + to ''. + img_out_dir (str): Subdirectory of `out_dir`, used to save + rendering color segmentation mask, so `out_dir` must be defined + if you would like to save predicted mask. Defaults to 'vis'. + pred_out_dir (str): Subdirectory of `out_dir`, used to save + predicted mask file, so `out_dir` must be defined if you would + like to save predicted mask. Defaults to 'pred'. + + **kwargs: Other keyword arguments passed to :meth:`preprocess`, + :meth:`forward`, :meth:`visualize` and :meth:`postprocess`. + Each key in kwargs should be in the corresponding set of + ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs`` + and ``postprocess_kwargs``. + + + Returns: + dict: Inference and visualization results. + """ + + if out_dir != '': + pred_out_dir = osp.join(out_dir, pred_out_dir) + img_out_dir = osp.join(out_dir, img_out_dir) + else: + pred_out_dir = '' + img_out_dir = '' + + return super().__call__( + inputs=inputs, + return_datasamples=return_datasamples, + batch_size=batch_size, + show=show, + wait_time=wait_time, + img_out_dir=img_out_dir, + pred_out_dir=pred_out_dir, + return_vis=return_vis, + **kwargs) + + def visualize(self, + inputs: list, + preds: List[dict], + return_vis: bool = False, + show: bool = False, + wait_time: int = 0, + img_out_dir: str = '', + opacity: float = 0.8, + with_labels: Optional[bool] = True) -> List[np.ndarray]: + """Visualize predictions. + + Args: + inputs (list): Inputs preprocessed by :meth:`_inputs_to_list`. + preds (Any): Predictions of the model. + show (bool): Whether to display the image in a popup window. + Defaults to False. + wait_time (float): The interval of show (s). Defaults to 0. + img_out_dir (str): Output directory of rendering prediction i.e. + color segmentation mask. Defaults: '' + opacity (int, float): The transparency of segmentation mask. + Defaults to 0.8. + + Returns: + List[np.ndarray]: Visualization results. + """ + if not show and img_out_dir == '' and not return_vis: + return None + if self.visualizer is None: + raise ValueError('Visualization needs the "visualizer" term' + 'defined in the config, but got None.') + + self.visualizer.set_dataset_meta(**self.model.dataset_meta) + self.visualizer.alpha = opacity + + results = [] + + for single_input, pred in zip(inputs, preds): + if isinstance(single_input, str): + img_bytes = mmengine.fileio.get(single_input) + img = mmcv.imfrombytes(img_bytes) + img = img[:, :, ::-1] + img_name = osp.basename(single_input) + elif isinstance(single_input, np.ndarray): + img = single_input.copy() + img_num = str(self.num_visualized_imgs).zfill(8) + '_vis' + img_name = f'{img_num}.jpg' + else: + raise ValueError('Unsupported input type:' + f'{type(single_input)}') + + out_file = osp.join(img_out_dir, img_name) if img_out_dir != ''\ + else None + + self.visualizer.add_datasample( + img_name, + img, + pred, + show=show, + wait_time=wait_time, + draw_gt=False, + draw_pred=True, + out_file=out_file, + with_labels=with_labels) + if return_vis: + results.append(self.visualizer.get_image()) + self.num_visualized_imgs += 1 + + return results if return_vis else None + + def postprocess(self, + preds: PredType, + visualization: List[np.ndarray], + return_datasample: bool = False, + pred_out_dir: str = '') -> dict: + """Process the predictions and visualization results from ``forward`` + and ``visualize``. + + This method should be responsible for the following tasks: + + 1. Pack the predictions and visualization results and return them. + 2. Save the predictions, if it needed. + + Args: + preds (List[Dict]): Predictions of the model. + visualization (List[np.ndarray]): The list of rendering color + segmentation mask. + return_datasample (bool): Whether to return results as datasamples. + Defaults to False. + pred_out_dir: File to save the inference results w/o + visualization. If left as empty, no file will be saved. + Defaults to ''. + + Returns: + dict: Inference and visualization results with key ``predictions`` + and ``visualization`` + + - ``visualization (Any)``: Returned by :meth:`visualize` + - ``predictions`` (List[np.ndarray], np.ndarray): Returned by + :meth:`forward` and processed in :meth:`postprocess`. + If ``return_datasample=False``, it will be the segmentation mask + with label indice. + """ + if return_datasample: + if len(preds) == 1: + return preds[0] + else: + return preds + + results_dict = {} + + results_dict['predictions'] = [] + results_dict['visualization'] = [] + + for i, pred in enumerate(preds): + pred_data = dict() + if 'pred_sem_seg' in pred.keys(): + pred_data['sem_seg'] = pred.pred_sem_seg.numpy().data[0] + elif 'pred_depth_map' in pred.keys(): + pred_data['depth_map'] = pred.pred_depth_map.numpy().data[0] + + if visualization is not None: + vis = visualization[i] + results_dict['visualization'].append(vis) + if pred_out_dir != '': + mmengine.mkdir_or_exist(pred_out_dir) + for key, data in pred_data.items(): + post_fix = '_pred.png' if key == 'sem_seg' else '_pred.npy' + img_name = str(self.num_pred_imgs).zfill(8) + post_fix + img_path = osp.join(pred_out_dir, img_name) + if key == 'sem_seg': + output = Image.fromarray(data.astype(np.uint8)) + output.save(img_path) + else: + np.save(img_path, data) + pred_data = next(iter(pred_data.values())) + results_dict['predictions'].append(pred_data) + self.num_pred_imgs += 1 + + if len(results_dict['predictions']) == 1: + results_dict['predictions'] = results_dict['predictions'][0] + if visualization is not None: + results_dict['visualization'] = \ + results_dict['visualization'][0] + return results_dict + + def _init_pipeline(self, cfg: ConfigType) -> Compose: + """Initialize the test pipeline. + + Return a pipeline to handle various input data, such as ``str``, + ``np.ndarray``. It is an abstract method in BaseInferencer, and should + be implemented in subclasses. + + The returned pipeline will be used to process a single data. + It will be used in :meth:`preprocess` like this: + + .. code-block:: python + def preprocess(self, inputs, batch_size, **kwargs): + ... + dataset = map(self.pipeline, dataset) + ... + """ + pipeline_cfg = cfg.test_dataloader.dataset.pipeline + # Loading annotations is also not applicable + for transform in ('LoadAnnotations', 'LoadDepthAnnotation'): + idx = self._get_transform_idx(pipeline_cfg, transform) + if idx != -1: + del pipeline_cfg[idx] + + load_img_idx = self._get_transform_idx(pipeline_cfg, + 'LoadImageFromFile') + if load_img_idx == -1: + raise ValueError( + 'LoadImageFromFile is not found in the test pipeline') + pipeline_cfg[load_img_idx]['type'] = 'InferencerLoader' + return Compose(pipeline_cfg) + + def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int: + """Returns the index of the transform in a pipeline. + + If the transform is not found, returns -1. + """ + for i, transform in enumerate(pipeline_cfg): + if transform['type'] == name: + return i + return -1 diff --git a/mmseg/apis/remote_sense_inferencer.py b/mmseg/apis/remote_sense_inferencer.py new file mode 100644 index 0000000000000000000000000000000000000000..6726c6ae3464b3911f7e69b14a0baf35cffc66d0 --- /dev/null +++ b/mmseg/apis/remote_sense_inferencer.py @@ -0,0 +1,279 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import threading +from queue import Queue +from typing import List, Optional, Tuple + +import numpy as np +import torch +from mmengine import Config +from mmengine.model import BaseModel +from mmengine.registry import init_default_scope +from mmengine.runner import load_checkpoint + +try: + from osgeo import gdal +except ImportError: + gdal = None + +from mmseg.registry import MODELS +from .utils import _preprare_data + + +class RSImage: + """Remote sensing image class. + + Args: + img (str or gdal.Dataset): Image file path or gdal.Dataset. + """ + + def __init__(self, image): + self.dataset = gdal.Open(image, gdal.GA_ReadOnly) if isinstance( + image, str) else image + assert isinstance(self.dataset, gdal.Dataset), \ + f'{image} is not a image' + self.width = self.dataset.RasterXSize + self.height = self.dataset.RasterYSize + self.channel = self.dataset.RasterCount + self.trans = self.dataset.GetGeoTransform() + self.proj = self.dataset.GetProjection() + self.band_list = [] + self.band_list.extend( + self.dataset.GetRasterBand(c + 1) for c in range(self.channel)) + self.grids = [] + + def read(self, grid: Optional[List] = None) -> np.ndarray: + """Read image data. If grid is None, read the whole image. + + Args: + grid (Optional[List], optional): Grid to read. Defaults to None. + Returns: + np.ndarray: Image data. + """ + if grid is None: + return np.einsum('ijk->jki', self.dataset.ReadAsArray()) + assert len( + grid) >= 4, 'grid must be a list containing at least 4 elements' + data = self.dataset.ReadAsArray(*grid[:4]) + if data.ndim == 2: + data = data[np.newaxis, ...] + return np.einsum('ijk->jki', data) + + def write(self, data: Optional[np.ndarray], grid: Optional[List] = None): + """Write image data. + + Args: + grid (Optional[List], optional): Grid to write. Defaults to None. + data (Optional[np.ndarray], optional): Data to write. + Defaults to None. + + Raises: + ValueError: Either grid or data must be provided. + """ + if grid is not None: + assert len(grid) == 8, 'grid must be a list of 8 elements' + for band in self.band_list: + band.WriteArray( + data[grid[5]:grid[5] + grid[7], grid[4]:grid[4] + grid[6]], + grid[0] + grid[4], grid[1] + grid[5]) + elif data is not None: + for i in range(self.channel): + self.band_list[i].WriteArray(data[..., i]) + else: + raise ValueError('Either grid or data must be provided.') + + def create_seg_map(self, output_path: Optional[str] = None): + if output_path is None: + output_path = 'output_label.tif' + driver = gdal.GetDriverByName('GTiff') + seg_map = driver.Create(output_path, self.width, self.height, 1, + gdal.GDT_Byte) + seg_map.SetGeoTransform(self.trans) + seg_map.SetProjection(self.proj) + seg_map_img = RSImage(seg_map) + seg_map_img.path = output_path + return seg_map_img + + def create_grids(self, + window_size: Tuple[int, int], + stride: Tuple[int, int] = (0, 0)): + """Create grids for image inference. + + Args: + window_size (Tuple[int, int]): the size of the sliding window. + stride (Tuple[int, int], optional): the stride of the sliding + window. Defaults to (0, 0). + + Raises: + AssertionError: window_size must be a tuple of 2 elements. + AssertionError: stride must be a tuple of 2 elements. + """ + assert len( + window_size) == 2, 'window_size must be a tuple of 2 elements' + assert len(stride) == 2, 'stride must be a tuple of 2 elements' + win_w, win_h = window_size + stride_x, stride_y = stride + + stride_x = win_w if stride_x == 0 else stride_x + stride_y = win_h if stride_y == 0 else stride_y + + x_half_overlap = (win_w - stride_x + 1) // 2 + y_half_overlap = (win_h - stride_y + 1) // 2 + + for y in range(0, self.height, stride_y): + y_end = y + win_h >= self.height + y_offset = self.height - win_h if y_end else y + y_size = win_h + y_crop_off = 0 if y_offset == 0 else y_half_overlap + y_crop_size = y_size if y_end else win_h - y_crop_off + + for x in range(0, self.width, stride_x): + x_end = x + win_w >= self.width + x_offset = self.width - win_w if x_end else x + x_size = win_w + x_crop_off = 0 if x_offset == 0 else x_half_overlap + x_crop_size = x_size if x_end else win_w - x_crop_off + + self.grids.append([ + x_offset, y_offset, x_size, y_size, x_crop_off, y_crop_off, + x_crop_size, y_crop_size + ]) + + +class RSInferencer: + """Remote sensing inference class. + + Args: + model (BaseModel): The loaded model. + batch_size (int, optional): Batch size. Defaults to 1. + thread (int, optional): Number of threads. Defaults to 1. + """ + + def __init__(self, model: BaseModel, batch_size: int = 1, thread: int = 1): + self.model = model + self.batch_size = batch_size + self.END_FLAG = object() + self.read_buffer = Queue(self.batch_size) + self.write_buffer = Queue(self.batch_size) + self.thread = thread + + @classmethod + def from_config_path(cls, + config_path: str, + checkpoint_path: str, + batch_size: int = 1, + thread: int = 1, + device: Optional[str] = 'cpu'): + """Initialize a segmentor from config file. + + Args: + config_path (str): Config file path. + checkpoint_path (str): Checkpoint path. + batch_size (int, optional): Batch size. Defaults to 1. + """ + init_default_scope('mmseg') + cfg = Config.fromfile(config_path) + model = MODELS.build(cfg.model) + model.cfg = cfg + load_checkpoint(model, checkpoint_path, map_location='cpu') + model.to(device) + model.eval() + return cls(model, batch_size, thread) + + @classmethod + def from_model(cls, + model: BaseModel, + checkpoint_path: Optional[str] = None, + batch_size: int = 1, + thread: int = 1, + device: Optional[str] = 'cpu'): + """Initialize a segmentor from model. + + Args: + model (BaseModel): The loaded model. + checkpoint_path (Optional[str]): Checkpoint path. + batch_size (int, optional): Batch size. Defaults to 1. + """ + if checkpoint_path is not None: + load_checkpoint(model, checkpoint_path, map_location='cpu') + model.to(device) + return cls(model, batch_size, thread) + + def read(self, + image: RSImage, + window_size: Tuple[int, int], + strides: Tuple[int, int] = (0, 0)): + """Load image data to read buffer. + + Args: + image (RSImage): The image to read. + window_size (Tuple[int, int]): The size of the sliding window. + strides (Tuple[int, int], optional): The stride of the sliding + window. Defaults to (0, 0). + """ + image.create_grids(window_size, strides) + for grid in image.grids: + self.read_buffer.put([grid, image.read(grid=grid)]) + self.read_buffer.put(self.END_FLAG) + + def inference(self): + """Inference image data from read buffer and put the result to write + buffer.""" + while True: + item = self.read_buffer.get() + if item == self.END_FLAG: + self.read_buffer.put(self.END_FLAG) + self.write_buffer.put(item) + break + data, _ = _preprare_data(item[1], self.model) + with torch.no_grad(): + result = self.model.test_step(data) + item[1] = result[0].pred_sem_seg.cpu().data.numpy()[0] + self.write_buffer.put(item) + self.read_buffer.task_done() + + def write(self, image: RSImage, output_path: Optional[str] = None): + """Write image data from write buffer. + + Args: + image (RSImage): The image to write. + output_path (Optional[str], optional): The path to save the + segmentation map. Defaults to None. + """ + seg_map = image.create_seg_map(output_path) + while True: + item = self.write_buffer.get() + if item == self.END_FLAG: + break + seg_map.write(data=item[1], grid=item[0]) + self.write_buffer.task_done() + + def run(self, + image: RSImage, + window_size: Tuple[int, int], + strides: Tuple[int, int] = (0, 0), + output_path: Optional[str] = None): + """Run inference with multi-threading. + + Args: + image (RSImage): The image to inference. + window_size (Tuple[int, int]): The size of the sliding window. + strides (Tuple[int, int], optional): The stride of the sliding + window. Defaults to (0, 0). + output_path (Optional[str], optional): The path to save the + segmentation map. Defaults to None. + """ + read_thread = threading.Thread( + target=self.read, args=(image, window_size, strides)) + read_thread.start() + inference_threads = [] + for _ in range(self.thread): + inference_thread = threading.Thread(target=self.inference) + inference_thread.start() + inference_threads.append(inference_thread) + write_thread = threading.Thread( + target=self.write, args=(image, output_path)) + write_thread.start() + read_thread.join() + for inference_thread in inference_threads: + inference_thread.join() + write_thread.join() diff --git a/mmseg/apis/utils.py b/mmseg/apis/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4cf877566028dbb2b966c2888b1ebd1a5f57c330 --- /dev/null +++ b/mmseg/apis/utils.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections import defaultdict +from typing import Sequence, Union + +import numpy as np +from mmengine.dataset import Compose +from mmengine.model import BaseModel + +ImageType = Union[str, np.ndarray, Sequence[str], Sequence[np.ndarray]] + + +def _preprare_data(imgs: ImageType, model: BaseModel): + + cfg = model.cfg + for t in cfg.test_pipeline: + if t.get('type') == 'LoadAnnotations': + cfg.test_pipeline.remove(t) + + is_batch = True + if not isinstance(imgs, (list, tuple)): + imgs = [imgs] + is_batch = False + + if isinstance(imgs[0], np.ndarray): + cfg.test_pipeline[0]['type'] = 'LoadImageFromNDArray' + + # TODO: Consider using the singleton pattern to avoid building + # a pipeline for each inference + pipeline = Compose(cfg.test_pipeline) + + data = defaultdict(list) + for img in imgs: + if isinstance(img, np.ndarray): + data_ = dict(img=img) + else: + data_ = dict(img_path=img) + data_ = pipeline(data_) + data['inputs'].append(data_['inputs']) + data['data_samples'].append(data_['data_samples']) + + return data, is_batch diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2bdb63d016664bf76c93e2c3ee6f5386905064c --- /dev/null +++ b/mmseg/datasets/__init__.py @@ -0,0 +1,64 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# yapf: disable +from .ade import ADE20KDataset +from .basesegdataset import BaseCDDataset, BaseSegDataset +from .bdd100k import BDD100KDataset +from .chase_db1 import ChaseDB1Dataset +from .cityscapes import CityscapesDataset +from .coco_stuff import COCOStuffDataset +from .dark_zurich import DarkZurichDataset +from .dataset_wrappers import MultiImageMixDataset +from .decathlon import DecathlonDataset +from .drive import DRIVEDataset +from .dsdl import DSDLSegDataset +from .hrf import HRFDataset +from .isaid import iSAIDDataset +from .isprs import ISPRSDataset +from .levir import LEVIRCDDataset +from .lip import LIPDataset +from .loveda import LoveDADataset +from .mapillary import MapillaryDataset_v1, MapillaryDataset_v2 +from .night_driving import NightDrivingDataset +from .nyu import NYUDataset +from .pascal_context import PascalContextDataset, PascalContextDataset59 +from .potsdam import PotsdamDataset +from .refuge import REFUGEDataset +from .stare import STAREDataset +from .synapse import SynapseDataset +# yapf: disable +from .transforms import (CLAHE, AdjustGamma, Albu, BioMedical3DPad, + BioMedical3DRandomCrop, BioMedical3DRandomFlip, + BioMedicalGaussianBlur, BioMedicalGaussianNoise, + BioMedicalRandomGamma, ConcatCDInput, GenerateEdge, + LoadAnnotations, LoadBiomedicalAnnotation, + LoadBiomedicalData, LoadBiomedicalImageFromFile, + LoadImageFromNDArray, LoadMultipleRSImageFromFile, + LoadSingleRSImageFromFile, PackSegInputs, + PhotoMetricDistortion, RandomCrop, RandomCutOut, + RandomMosaic, RandomRotate, RandomRotFlip, Rerange, + ResizeShortestEdge, ResizeToMultiple, RGB2Gray, + SegRescale) +from .voc import PascalVOCDataset + +# yapf: enable +__all__ = [ + 'BaseSegDataset', 'BioMedical3DRandomCrop', 'BioMedical3DRandomFlip', + 'CityscapesDataset', 'PascalVOCDataset', 'ADE20KDataset', + 'PascalContextDataset', 'PascalContextDataset59', 'ChaseDB1Dataset', + 'DRIVEDataset', 'HRFDataset', 'STAREDataset', 'DarkZurichDataset', + 'NightDrivingDataset', 'COCOStuffDataset', 'LoveDADataset', + 'MultiImageMixDataset', 'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset', + 'LoadAnnotations', 'RandomCrop', 'SegRescale', 'PhotoMetricDistortion', + 'RandomRotate', 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray', + 'RandomCutOut', 'RandomMosaic', 'PackSegInputs', 'ResizeToMultiple', + 'LoadImageFromNDArray', 'LoadBiomedicalImageFromFile', + 'LoadBiomedicalAnnotation', 'LoadBiomedicalData', 'GenerateEdge', + 'DecathlonDataset', 'LIPDataset', 'ResizeShortestEdge', + 'BioMedicalGaussianNoise', 'BioMedicalGaussianBlur', + 'BioMedicalRandomGamma', 'BioMedical3DPad', 'RandomRotFlip', + 'SynapseDataset', 'REFUGEDataset', 'MapillaryDataset_v1', + 'MapillaryDataset_v2', 'Albu', 'LEVIRCDDataset', + 'LoadMultipleRSImageFromFile', 'LoadSingleRSImageFromFile', + 'ConcatCDInput', 'BaseCDDataset', 'DSDLSegDataset', 'BDD100KDataset', + 'NYUDataset' +] diff --git a/mmseg/datasets/__pycache__/__init__.cpython-39.pyc b/mmseg/datasets/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1069ba8c876995301c246afbedb7ee9536e6890e Binary files /dev/null and b/mmseg/datasets/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/ade.cpython-39.pyc b/mmseg/datasets/__pycache__/ade.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..850d78c3bd00752bd9769a330540350ef0222b66 Binary files /dev/null and b/mmseg/datasets/__pycache__/ade.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/basesegdataset.cpython-39.pyc b/mmseg/datasets/__pycache__/basesegdataset.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6fb124f1b88dbd5cde7dbf7b7c0690e8b1f1d403 Binary files /dev/null and b/mmseg/datasets/__pycache__/basesegdataset.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/bdd100k.cpython-39.pyc b/mmseg/datasets/__pycache__/bdd100k.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d926ce41f49ba2bf69bc81d6756e9aeeb279e6f2 Binary files /dev/null and b/mmseg/datasets/__pycache__/bdd100k.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/chase_db1.cpython-39.pyc b/mmseg/datasets/__pycache__/chase_db1.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5820a4c0d15211f5e61fe86d6cea1ef87047cd1b Binary files /dev/null and b/mmseg/datasets/__pycache__/chase_db1.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/cityscapes.cpython-39.pyc b/mmseg/datasets/__pycache__/cityscapes.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..696ce4048986d8546ba1045d997170be42f6c19a Binary files /dev/null and b/mmseg/datasets/__pycache__/cityscapes.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/coco_stuff.cpython-39.pyc b/mmseg/datasets/__pycache__/coco_stuff.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7afae4cc5e3225c9d6da3ba30fdb011850b85976 Binary files /dev/null and b/mmseg/datasets/__pycache__/coco_stuff.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/dark_zurich.cpython-39.pyc b/mmseg/datasets/__pycache__/dark_zurich.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60d20047232b5b43e1d789ddd06178c6e82d537e Binary files /dev/null and b/mmseg/datasets/__pycache__/dark_zurich.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/dataset_wrappers.cpython-39.pyc b/mmseg/datasets/__pycache__/dataset_wrappers.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00c6b6255d95c81401508cd1b82b9f0d233ec0ae Binary files /dev/null and b/mmseg/datasets/__pycache__/dataset_wrappers.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/decathlon.cpython-39.pyc b/mmseg/datasets/__pycache__/decathlon.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..474ce530c24a8cd9bac6e001a1060c5faba78233 Binary files /dev/null and b/mmseg/datasets/__pycache__/decathlon.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/drive.cpython-39.pyc b/mmseg/datasets/__pycache__/drive.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38c7dbe32b9797a58a817a3e81d41eaca7d7e9df Binary files /dev/null and b/mmseg/datasets/__pycache__/drive.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/dsdl.cpython-39.pyc b/mmseg/datasets/__pycache__/dsdl.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e89784c5c9920e56dd8007a9643559d923f1e590 Binary files /dev/null and b/mmseg/datasets/__pycache__/dsdl.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/hrf.cpython-39.pyc b/mmseg/datasets/__pycache__/hrf.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fad5656a2c2142c84e72ab7b70abd7e753d21116 Binary files /dev/null and b/mmseg/datasets/__pycache__/hrf.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/isaid.cpython-39.pyc b/mmseg/datasets/__pycache__/isaid.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4e209a996640c920224818bf1ca2c860be0cb79 Binary files /dev/null and b/mmseg/datasets/__pycache__/isaid.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/isprs.cpython-39.pyc b/mmseg/datasets/__pycache__/isprs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ba59b1f8a7fbde5961483a531707a75e3f29e55 Binary files /dev/null and b/mmseg/datasets/__pycache__/isprs.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/levir.cpython-39.pyc b/mmseg/datasets/__pycache__/levir.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4a9bc786e75b0bdc2f1633e5cef310c5684ff4b1 Binary files /dev/null and b/mmseg/datasets/__pycache__/levir.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/lip.cpython-39.pyc b/mmseg/datasets/__pycache__/lip.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa846d7313ba2f486289bc5ea4a73e29803d579d Binary files /dev/null and b/mmseg/datasets/__pycache__/lip.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/loveda.cpython-39.pyc b/mmseg/datasets/__pycache__/loveda.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa5c1373360d13b382c5cb6e8ec70750f01eccd5 Binary files /dev/null and b/mmseg/datasets/__pycache__/loveda.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/mapillary.cpython-39.pyc b/mmseg/datasets/__pycache__/mapillary.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2aeecb479b9f4abca5fcf28824bfb0c93664e17b Binary files /dev/null and b/mmseg/datasets/__pycache__/mapillary.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/night_driving.cpython-39.pyc b/mmseg/datasets/__pycache__/night_driving.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6620861b8b47103de07bc10827eff9681a9c1b4 Binary files /dev/null and b/mmseg/datasets/__pycache__/night_driving.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/nyu.cpython-39.pyc b/mmseg/datasets/__pycache__/nyu.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..91f37c9788672812c7229d8f59fdf313d7e8ea8b Binary files /dev/null and b/mmseg/datasets/__pycache__/nyu.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/pascal_context.cpython-39.pyc b/mmseg/datasets/__pycache__/pascal_context.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cdbb14eea3618be6bdcb975fa4fb059352c0ad1 Binary files /dev/null and b/mmseg/datasets/__pycache__/pascal_context.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/potsdam.cpython-39.pyc b/mmseg/datasets/__pycache__/potsdam.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1dd4d0dc90bd1c7a5274b756677e474d97cd1139 Binary files /dev/null and b/mmseg/datasets/__pycache__/potsdam.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/refuge.cpython-39.pyc b/mmseg/datasets/__pycache__/refuge.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f9c0d7e5a9d533e081151ae62bf81200681ac8d Binary files /dev/null and b/mmseg/datasets/__pycache__/refuge.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/stare.cpython-39.pyc b/mmseg/datasets/__pycache__/stare.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..565804d4db168d46d5892f96506eea2c3ca8c69b Binary files /dev/null and b/mmseg/datasets/__pycache__/stare.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/synapse.cpython-39.pyc b/mmseg/datasets/__pycache__/synapse.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e50db3b7c06613605b8988e5701dd226d90e8ba6 Binary files /dev/null and b/mmseg/datasets/__pycache__/synapse.cpython-39.pyc differ diff --git a/mmseg/datasets/__pycache__/voc.cpython-39.pyc b/mmseg/datasets/__pycache__/voc.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54ff1ca3d2b12c049e6912387347e2845a678ae0 Binary files /dev/null and b/mmseg/datasets/__pycache__/voc.cpython-39.pyc differ diff --git a/mmseg/datasets/ade.py b/mmseg/datasets/ade.py new file mode 100644 index 0000000000000000000000000000000000000000..e9bdae7421205f25d39441381d6492e9208a4714 --- /dev/null +++ b/mmseg/datasets/ade.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class ADE20KDataset(BaseSegDataset): + """ADE20K dataset. + + In segmentation map annotation for ADE20K, 0 stands for background, which + is not included in 150 categories. ``reduce_zero_label`` is fixed to True. + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to + '.png'. + """ + METAINFO = dict( + classes=('wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', + 'bed ', 'windowpane', 'grass', 'cabinet', 'sidewalk', + 'person', 'earth', 'door', 'table', 'mountain', 'plant', + 'curtain', 'chair', 'car', 'water', 'painting', 'sofa', + 'shelf', 'house', 'sea', 'mirror', 'rug', 'field', 'armchair', + 'seat', 'fence', 'desk', 'rock', 'wardrobe', 'lamp', + 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', + 'path', 'stairs', 'runway', 'case', 'pool table', 'pillow', + 'screen door', 'stairway', 'river', 'bridge', 'bookcase', + 'blind', 'coffee table', 'toilet', 'flower', 'book', 'hill', + 'bench', 'countertop', 'stove', 'palm', 'kitchen island', + 'computer', 'swivel chair', 'boat', 'bar', 'arcade machine', + 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', + 'television receiver', 'airplane', 'dirt track', 'apparel', + 'pole', 'land', 'bannister', 'escalator', 'ottoman', 'bottle', + 'buffet', 'poster', 'stage', 'van', 'ship', 'fountain', + 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', + 'tent', 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', + 'step', 'tank', 'trade name', 'microwave', 'pot', 'animal', + 'bicycle', 'lake', 'dishwasher', 'screen', 'blanket', + 'sculpture', 'hood', 'sconce', 'vase', 'traffic light', + 'tray', 'ashcan', 'fan', 'pier', 'crt screen', 'plate', + 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag'), + palette=[[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]]) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) diff --git a/mmseg/datasets/basesegdataset.py b/mmseg/datasets/basesegdataset.py new file mode 100644 index 0000000000000000000000000000000000000000..9c4668c1f561961fb27642fb7c1ac702f626cbb7 --- /dev/null +++ b/mmseg/datasets/basesegdataset.py @@ -0,0 +1,552 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import os.path as osp +from typing import Callable, Dict, List, Optional, Sequence, Union + +import mmengine +import mmengine.fileio as fileio +import numpy as np +from mmengine.dataset import BaseDataset, Compose + +from mmseg.registry import DATASETS + + +@DATASETS.register_module() +class BaseSegDataset(BaseDataset): + """Custom dataset for semantic segmentation. An example of file structure + is as followed. + + .. code-block:: none + + ├── data + │ ├── my_dataset + │ │ ├── img_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{img_suffix} + │ │ │ │ ├── yyy{img_suffix} + │ │ │ │ ├── zzz{img_suffix} + │ │ │ ├── val + │ │ ├── ann_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{seg_map_suffix} + │ │ │ │ ├── yyy{seg_map_suffix} + │ │ │ │ ├── zzz{seg_map_suffix} + │ │ │ ├── val + + The img/gt_semantic_seg pair of BaseSegDataset should be of the same + except suffix. A valid img/gt_semantic_seg filename pair should be like + ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included + in the suffix). If split is given, then ``xxx`` is specified in txt file. + Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded. + Please refer to ``docs/en/tutorials/new_dataset.md`` for more details. + + + Args: + ann_file (str): Annotation file path. Defaults to ''. + metainfo (dict, optional): Meta information for dataset, such as + specify classes to load. Defaults to None. + data_root (str, optional): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to None. + data_prefix (dict, optional): Prefix for training data. Defaults to + dict(img_path=None, seg_map_path=None). + img_suffix (str): Suffix of images. Default: '.jpg' + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + filter_cfg (dict, optional): Config for filter data. Defaults to None. + indices (int or Sequence[int], optional): Support using first few + data in annotation file to facilitate training/testing on a smaller + dataset. Defaults to None which means using all ``data_infos``. + serialize_data (bool, optional): Whether to hold memory using + serialized objects, when enabled, data loader workers can use + shared RAM from master process instead of making a copy. Defaults + to True. + pipeline (list, optional): Processing pipeline. Defaults to []. + test_mode (bool, optional): ``test_mode=True`` means in test phase. + Defaults to False. + lazy_init (bool, optional): Whether to load annotation during + instantiation. In some cases, such as visualization, only the meta + information of the dataset is needed, which is not necessary to + load annotation file. ``Basedataset`` can skip load annotations to + save time by set ``lazy_init=True``. Defaults to False. + max_refetch (int, optional): If ``Basedataset.prepare_data`` get a + None img. The maximum extra number of cycles to get a valid + image. Defaults to 1000. + ignore_index (int): The label index to be ignored. Default: 255 + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default to False. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + METAINFO: dict = dict() + + def __init__(self, + ann_file: str = '', + img_suffix='.jpg', + seg_map_suffix='.png', + metainfo: Optional[dict] = None, + data_root: Optional[str] = None, + data_prefix: dict = dict(img_path='', seg_map_path=''), + filter_cfg: Optional[dict] = None, + indices: Optional[Union[int, Sequence[int]]] = None, + serialize_data: bool = True, + pipeline: List[Union[dict, Callable]] = [], + test_mode: bool = False, + lazy_init: bool = False, + max_refetch: int = 1000, + ignore_index: int = 255, + reduce_zero_label: bool = False, + backend_args: Optional[dict] = None) -> None: + + self.img_suffix = img_suffix + self.seg_map_suffix = seg_map_suffix + self.ignore_index = ignore_index + self.reduce_zero_label = reduce_zero_label + self.backend_args = backend_args.copy() if backend_args else None + + self.data_root = data_root + self.data_prefix = copy.copy(data_prefix) + self.ann_file = ann_file + self.filter_cfg = copy.deepcopy(filter_cfg) + self._indices = indices + self.serialize_data = serialize_data + self.test_mode = test_mode + self.max_refetch = max_refetch + self.data_list: List[dict] = [] + self.data_bytes: np.ndarray + + # Set meta information. + self._metainfo = self._load_metainfo(copy.deepcopy(metainfo)) + + # Get label map for custom classes + new_classes = self._metainfo.get('classes', None) + self.label_map = self.get_label_map(new_classes) + self._metainfo.update( + dict( + label_map=self.label_map, + reduce_zero_label=self.reduce_zero_label)) + + # Update palette based on label map or generate palette + # if it is not defined + updated_palette = self._update_palette() + self._metainfo.update(dict(palette=updated_palette)) + + # Join paths. + if self.data_root is not None: + self._join_prefix() + + # Build pipeline. + self.pipeline = Compose(pipeline) + # Full initialize the dataset. + if not lazy_init: + self.full_init() + + if test_mode: + assert self._metainfo.get('classes') is not None, \ + 'dataset metainfo `classes` should be specified when testing' + + @classmethod + def get_label_map(cls, + new_classes: Optional[Sequence] = None + ) -> Union[Dict, None]: + """Require label mapping. + + The ``label_map`` is a dictionary, its keys are the old label ids and + its values are the new label ids, and is used for changing pixel + labels in load_annotations. If and only if old classes in cls.METAINFO + is not equal to new classes in self._metainfo and nether of them is not + None, `label_map` is not None. + + Args: + new_classes (list, tuple, optional): The new classes name from + metainfo. Default to None. + + + Returns: + dict, optional: The mapping from old classes in cls.METAINFO to + new classes in self._metainfo + """ + old_classes = cls.METAINFO.get('classes', None) + if (new_classes is not None and old_classes is not None + and list(new_classes) != list(old_classes)): + + label_map = {} + if not set(new_classes).issubset(cls.METAINFO['classes']): + raise ValueError( + f'new classes {new_classes} is not a ' + f'subset of classes {old_classes} in METAINFO.') + for i, c in enumerate(old_classes): + if c not in new_classes: + label_map[i] = 255 + else: + label_map[i] = new_classes.index(c) + return label_map + else: + return None + + def _update_palette(self) -> list: + """Update palette after loading metainfo. + + If length of palette is equal to classes, just return the palette. + If palette is not defined, it will randomly generate a palette. + If classes is updated by customer, it will return the subset of + palette. + + Returns: + Sequence: Palette for current dataset. + """ + palette = self._metainfo.get('palette', []) + classes = self._metainfo.get('classes', []) + # palette does match classes + if len(palette) == len(classes): + return palette + + if len(palette) == 0: + # Get random state before set seed, and restore + # random state later. + # It will prevent loss of randomness, as the palette + # may be different in each iteration if not specified. + # See: https://github.com/open-mmlab/mmdetection/issues/5844 + state = np.random.get_state() + np.random.seed(42) + # random palette + new_palette = np.random.randint( + 0, 255, size=(len(classes), 3)).tolist() + np.random.set_state(state) + elif len(palette) >= len(classes) and self.label_map is not None: + new_palette = [] + # return subset of palette + for old_id, new_id in sorted( + self.label_map.items(), key=lambda x: x[1]): + if new_id != 255: + new_palette.append(palette[old_id]) + new_palette = type(palette)(new_palette) + else: + raise ValueError('palette does not match classes ' + f'as metainfo is {self._metainfo}.') + return new_palette + + def load_data_list(self) -> List[dict]: + """Load annotation from directory or annotation file. + + Returns: + list[dict]: All data info of dataset. + """ + data_list = [] + img_dir = self.data_prefix.get('img_path', None) + ann_dir = self.data_prefix.get('seg_map_path', None) + if not osp.isdir(self.ann_file) and self.ann_file: + assert osp.isfile(self.ann_file), \ + f'Failed to load `ann_file` {self.ann_file}' + lines = mmengine.list_from_file( + self.ann_file, backend_args=self.backend_args) + for line in lines: + img_name = line.strip() + data_info = dict( + img_path=osp.join(img_dir, img_name + self.img_suffix)) + if ann_dir is not None: + seg_map = img_name + self.seg_map_suffix + data_info['seg_map_path'] = osp.join(ann_dir, seg_map) + data_info['label_map'] = self.label_map + data_info['reduce_zero_label'] = self.reduce_zero_label + data_info['seg_fields'] = [] + data_list.append(data_info) + else: + _suffix_len = len(self.img_suffix) + for img in fileio.list_dir_or_file( + dir_path=img_dir, + list_dir=False, + suffix=self.img_suffix, + recursive=True, + backend_args=self.backend_args): + data_info = dict(img_path=osp.join(img_dir, img)) + if ann_dir is not None: + seg_map = img[:-_suffix_len] + self.seg_map_suffix + data_info['seg_map_path'] = osp.join(ann_dir, seg_map) + data_info['label_map'] = self.label_map + data_info['reduce_zero_label'] = self.reduce_zero_label + data_info['seg_fields'] = [] + data_list.append(data_info) + data_list = sorted(data_list, key=lambda x: x['img_path']) + return data_list + + +@DATASETS.register_module() +class BaseCDDataset(BaseDataset): + """Custom dataset for change detection. An example of file structure is as + followed. + + .. code-block:: none + + ├── data + │ ├── my_dataset + │ │ ├── img_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{img_suffix} + │ │ │ │ ├── yyy{img_suffix} + │ │ │ │ ├── zzz{img_suffix} + │ │ │ ├── val + │ │ ├── img_dir2 + │ │ │ ├── train + │ │ │ │ ├── xxx{img_suffix} + │ │ │ │ ├── yyy{img_suffix} + │ │ │ │ ├── zzz{img_suffix} + │ │ │ ├── val + │ │ ├── ann_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{seg_map_suffix} + │ │ │ │ ├── yyy{seg_map_suffix} + │ │ │ │ ├── zzz{seg_map_suffix} + │ │ │ ├── val + + The image names in img_dir and img_dir2 should be consistent. + The img/gt_semantic_seg pair of BaseSegDataset should be of the same + except suffix. A valid img/gt_semantic_seg filename pair should be like + ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included + in the suffix). If split is given, then ``xxx`` is specified in txt file. + Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded. + Please refer to ``docs/en/tutorials/new_dataset.md`` for more details. + + + Args: + ann_file (str): Annotation file path. Defaults to ''. + metainfo (dict, optional): Meta information for dataset, such as + specify classes to load. Defaults to None. + data_root (str, optional): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to None. + data_prefix (dict, optional): Prefix for training data. Defaults to + dict(img_path=None, img_path2=None, seg_map_path=None). + img_suffix (str): Suffix of images. Default: '.jpg' + img_suffix2 (str): Suffix of images. Default: '.jpg' + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + filter_cfg (dict, optional): Config for filter data. Defaults to None. + indices (int or Sequence[int], optional): Support using first few + data in annotation file to facilitate training/testing on a smaller + dataset. Defaults to None which means using all ``data_infos``. + serialize_data (bool, optional): Whether to hold memory using + serialized objects, when enabled, data loader workers can use + shared RAM from master process instead of making a copy. Defaults + to True. + pipeline (list, optional): Processing pipeline. Defaults to []. + test_mode (bool, optional): ``test_mode=True`` means in test phase. + Defaults to False. + lazy_init (bool, optional): Whether to load annotation during + instantiation. In some cases, such as visualization, only the meta + information of the dataset is needed, which is not necessary to + load annotation file. ``Basedataset`` can skip load annotations to + save time by set ``lazy_init=True``. Defaults to False. + max_refetch (int, optional): If ``Basedataset.prepare_data`` get a + None img. The maximum extra number of cycles to get a valid + image. Defaults to 1000. + ignore_index (int): The label index to be ignored. Default: 255 + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default to False. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + METAINFO: dict = dict() + + def __init__(self, + ann_file: str = '', + img_suffix='.jpg', + img_suffix2='.jpg', + seg_map_suffix='.png', + metainfo: Optional[dict] = None, + data_root: Optional[str] = None, + data_prefix: dict = dict( + img_path='', img_path2='', seg_map_path=''), + filter_cfg: Optional[dict] = None, + indices: Optional[Union[int, Sequence[int]]] = None, + serialize_data: bool = True, + pipeline: List[Union[dict, Callable]] = [], + test_mode: bool = False, + lazy_init: bool = False, + max_refetch: int = 1000, + ignore_index: int = 255, + reduce_zero_label: bool = False, + backend_args: Optional[dict] = None) -> None: + + self.img_suffix = img_suffix + self.img_suffix2 = img_suffix2 + self.seg_map_suffix = seg_map_suffix + self.ignore_index = ignore_index + self.reduce_zero_label = reduce_zero_label + self.backend_args = backend_args.copy() if backend_args else None + + self.data_root = data_root + self.data_prefix = copy.copy(data_prefix) + self.ann_file = ann_file + self.filter_cfg = copy.deepcopy(filter_cfg) + self._indices = indices + self.serialize_data = serialize_data + self.test_mode = test_mode + self.max_refetch = max_refetch + self.data_list: List[dict] = [] + self.data_bytes: np.ndarray + + # Set meta information. + self._metainfo = self._load_metainfo(copy.deepcopy(metainfo)) + + # Get label map for custom classes + new_classes = self._metainfo.get('classes', None) + self.label_map = self.get_label_map(new_classes) + self._metainfo.update( + dict( + label_map=self.label_map, + reduce_zero_label=self.reduce_zero_label)) + + # Update palette based on label map or generate palette + # if it is not defined + updated_palette = self._update_palette() + self._metainfo.update(dict(palette=updated_palette)) + + # Join paths. + if self.data_root is not None: + self._join_prefix() + + # Build pipeline. + self.pipeline = Compose(pipeline) + # Full initialize the dataset. + if not lazy_init: + self.full_init() + + if test_mode: + assert self._metainfo.get('classes') is not None, \ + 'dataset metainfo `classes` should be specified when testing' + + @classmethod + def get_label_map(cls, + new_classes: Optional[Sequence] = None + ) -> Union[Dict, None]: + """Require label mapping. + + The ``label_map`` is a dictionary, its keys are the old label ids and + its values are the new label ids, and is used for changing pixel + labels in load_annotations. If and only if old classes in cls.METAINFO + is not equal to new classes in self._metainfo and nether of them is not + None, `label_map` is not None. + + Args: + new_classes (list, tuple, optional): The new classes name from + metainfo. Default to None. + + + Returns: + dict, optional: The mapping from old classes in cls.METAINFO to + new classes in self._metainfo + """ + old_classes = cls.METAINFO.get('classes', None) + if (new_classes is not None and old_classes is not None + and list(new_classes) != list(old_classes)): + + label_map = {} + if not set(new_classes).issubset(cls.METAINFO['classes']): + raise ValueError( + f'new classes {new_classes} is not a ' + f'subset of classes {old_classes} in METAINFO.') + for i, c in enumerate(old_classes): + if c not in new_classes: + label_map[i] = 255 + else: + label_map[i] = new_classes.index(c) + return label_map + else: + return None + + def _update_palette(self) -> list: + """Update palette after loading metainfo. + + If length of palette is equal to classes, just return the palette. + If palette is not defined, it will randomly generate a palette. + If classes is updated by customer, it will return the subset of + palette. + + Returns: + Sequence: Palette for current dataset. + """ + palette = self._metainfo.get('palette', []) + classes = self._metainfo.get('classes', []) + # palette does match classes + if len(palette) == len(classes): + return palette + + if len(palette) == 0: + # Get random state before set seed, and restore + # random state later. + # It will prevent loss of randomness, as the palette + # may be different in each iteration if not specified. + # See: https://github.com/open-mmlab/mmdetection/issues/5844 + state = np.random.get_state() + np.random.seed(42) + # random palette + new_palette = np.random.randint( + 0, 255, size=(len(classes), 3)).tolist() + np.random.set_state(state) + elif len(palette) >= len(classes) and self.label_map is not None: + new_palette = [] + # return subset of palette + for old_id, new_id in sorted( + self.label_map.items(), key=lambda x: x[1]): + if new_id != 255: + new_palette.append(palette[old_id]) + new_palette = type(palette)(new_palette) + else: + raise ValueError('palette does not match classes ' + f'as metainfo is {self._metainfo}.') + return new_palette + + def load_data_list(self) -> List[dict]: + """Load annotation from directory or annotation file. + + Returns: + list[dict]: All data info of dataset. + """ + data_list = [] + img_dir = self.data_prefix.get('img_path', None) + img_dir2 = self.data_prefix.get('img_path2', None) + ann_dir = self.data_prefix.get('seg_map_path', None) + if osp.isfile(self.ann_file): + lines = mmengine.list_from_file( + self.ann_file, backend_args=self.backend_args) + for line in lines: + img_name = line.strip() + if '.' in osp.basename(img_name): + img_name, img_ext = osp.splitext(img_name) + self.img_suffix = img_ext + self.img_suffix2 = img_ext + data_info = dict( + img_path=osp.join(img_dir, img_name + self.img_suffix), + img_path2=osp.join(img_dir2, img_name + self.img_suffix2)) + + if ann_dir is not None: + seg_map = img_name + self.seg_map_suffix + data_info['seg_map_path'] = osp.join(ann_dir, seg_map) + data_info['label_map'] = self.label_map + data_info['reduce_zero_label'] = self.reduce_zero_label + data_info['seg_fields'] = [] + data_list.append(data_info) + else: + for img in fileio.list_dir_or_file( + dir_path=img_dir, + list_dir=False, + suffix=self.img_suffix, + recursive=True, + backend_args=self.backend_args): + if '.' in osp.basename(img): + img, img_ext = osp.splitext(img) + self.img_suffix = img_ext + self.img_suffix2 = img_ext + data_info = dict( + img_path=osp.join(img_dir, img + self.img_suffix), + img_path2=osp.join(img_dir2, img + self.img_suffix2)) + if ann_dir is not None: + seg_map = img + self.seg_map_suffix + data_info['seg_map_path'] = osp.join(ann_dir, seg_map) + data_info['label_map'] = self.label_map + data_info['reduce_zero_label'] = self.reduce_zero_label + data_info['seg_fields'] = [] + data_list.append(data_info) + data_list = sorted(data_list, key=lambda x: x['img_path']) + return data_list diff --git a/mmseg/datasets/bdd100k.py b/mmseg/datasets/bdd100k.py new file mode 100644 index 0000000000000000000000000000000000000000..8ae70b5cb29f2b34c5804129c85622bfcca6767d --- /dev/null +++ b/mmseg/datasets/bdd100k.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from mmseg.datasets.basesegdataset import BaseSegDataset +from mmseg.registry import DATASETS + + +@DATASETS.register_module() +class BDD100KDataset(BaseSegDataset): + METAINFO = dict( + classes=('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', + 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', + 'motorcycle', 'bicycle'), + palette=[[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, + 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], + [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], + [0, 60, 100], [0, 80, 100], [0, 0, 230], [119, 11, 32]]) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) diff --git a/mmseg/datasets/chase_db1.py b/mmseg/datasets/chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..626ddf75e9a2a10a09ca1f298f12f4290268d504 --- /dev/null +++ b/mmseg/datasets/chase_db1.py @@ -0,0 +1,32 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class ChaseDB1Dataset(BaseSegDataset): + """Chase_db1 dataset. + + In segmentation map annotation for Chase_db1, 0 stands for background, + which is included in 2 categories. ``reduce_zero_label`` is fixed to False. + The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_1stHO.png'. + """ + METAINFO = dict( + classes=('background', 'vessel'), + palette=[[120, 120, 120], [6, 230, 230]]) + + def __init__(self, + img_suffix='.png', + seg_map_suffix='_1stHO.png', + reduce_zero_label=False, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) + assert fileio.exists( + self.data_prefix['img_path'], backend_args=self.backend_args) diff --git a/mmseg/datasets/cityscapes.py b/mmseg/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f494d62424a39581961ab705b3308e7e07bee110 --- /dev/null +++ b/mmseg/datasets/cityscapes.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class CityscapesDataset(BaseSegDataset): + """Cityscapes dataset. + + The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is + fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset. + """ + METAINFO = dict( + classes=('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', + 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', + 'motorcycle', 'bicycle'), + palette=[[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, + 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], + [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], + [0, 60, 100], [0, 80, 100], [0, 0, 230], [119, 11, 32]]) + + def __init__(self, + img_suffix='_leftImg8bit.png', + seg_map_suffix='_gtFine_labelTrainIds.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) diff --git a/mmseg/datasets/coco_stuff.py b/mmseg/datasets/coco_stuff.py new file mode 100644 index 0000000000000000000000000000000000000000..1e1574d9702330cc5b10bab084841df61e7121ff --- /dev/null +++ b/mmseg/datasets/coco_stuff.py @@ -0,0 +1,99 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class COCOStuffDataset(BaseSegDataset): + """COCO-Stuff dataset. + + In segmentation map annotation for COCO-Stuff, Train-IDs of the 10k version + are from 1 to 171, where 0 is the ignore index, and Train-ID of COCO Stuff + 164k is from 0 to 170, where 255 is the ignore index. So, they are all 171 + semantic categories. ``reduce_zero_label`` is set to True and False for the + 10k and 164k versions, respectively. The ``img_suffix`` is fixed to '.jpg', + and ``seg_map_suffix`` is fixed to '.png'. + """ + METAINFO = dict( + classes=( + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', + 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', + 'scissors', 'teddy bear', 'hair drier', 'toothbrush', 'banner', + 'blanket', 'branch', 'bridge', 'building-other', 'bush', 'cabinet', + 'cage', 'cardboard', 'carpet', 'ceiling-other', 'ceiling-tile', + 'cloth', 'clothes', 'clouds', 'counter', 'cupboard', 'curtain', + 'desk-stuff', 'dirt', 'door-stuff', 'fence', 'floor-marble', + 'floor-other', 'floor-stone', 'floor-tile', 'floor-wood', 'flower', + 'fog', 'food-other', 'fruit', 'furniture-other', 'grass', 'gravel', + 'ground-other', 'hill', 'house', 'leaves', 'light', 'mat', 'metal', + 'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net', + 'paper', 'pavement', 'pillow', 'plant-other', 'plastic', + 'platform', 'playingfield', 'railing', 'railroad', 'river', 'road', + 'rock', 'roof', 'rug', 'salad', 'sand', 'sea', 'shelf', + 'sky-other', 'skyscraper', 'snow', 'solid-other', 'stairs', + 'stone', 'straw', 'structural-other', 'table', 'tent', + 'textile-other', 'towel', 'tree', 'vegetable', 'wall-brick', + 'wall-concrete', 'wall-other', 'wall-panel', 'wall-stone', + 'wall-tile', 'wall-wood', 'water-other', 'waterdrops', + 'window-blind', 'window-other', 'wood'), + palette=[[0, 192, 64], [0, 192, 64], [0, 64, 96], [128, 192, 192], + [0, 64, 64], [0, 192, 224], [0, 192, 192], [128, 192, 64], + [0, 192, 96], [128, 192, 64], [128, 32, 192], [0, 0, 224], + [0, 0, 64], [0, 160, 192], [128, 0, 96], [128, 0, 192], + [0, 32, 192], [128, 128, 224], [0, 0, 192], [128, 160, 192], + [128, 128, 0], [128, 0, 32], [128, 32, 0], [128, 0, 128], + [64, 128, 32], [0, 160, 0], [0, 0, 0], [192, 128, 160], + [0, 32, 0], [0, 128, 128], [64, 128, 160], [128, 160, 0], + [0, 128, 0], [192, 128, 32], [128, 96, 128], [0, 0, 128], + [64, 0, 32], [0, 224, 128], [128, 0, 0], [192, 0, 160], + [0, 96, 128], [128, 128, 128], [64, 0, 160], [128, 224, 128], + [128, 128, 64], [192, 0, 32], [128, 96, 0], [128, 0, 192], + [0, 128, 32], [64, 224, 0], [0, 0, 64], [128, 128, 160], + [64, 96, 0], [0, 128, 192], [0, 128, 160], [192, 224, 0], + [0, 128, 64], [128, 128, 32], [192, 32, 128], [0, 64, 192], + [0, 0, 32], [64, 160, 128], [128, 64, 64], [128, 0, 160], + [64, 32, 128], [128, 192, 192], [0, 0, 160], [192, 160, 128], + [128, 192, 0], [128, 0, 96], [192, 32, 0], [128, 64, 128], + [64, 128, 96], [64, 160, 0], [0, 64, 0], [192, 128, 224], + [64, 32, 0], [0, 192, 128], [64, 128, 224], [192, 160, 0], + [0, 192, 0], [192, 128, 96], [192, 96, 128], [0, 64, 128], + [64, 0, 96], [64, 224, 128], [128, 64, 0], [192, 0, 224], + [64, 96, 128], [128, 192, 128], [64, 0, 224], [192, 224, 128], + [128, 192, 64], [192, 0, 96], [192, 96, 0], [128, 64, 192], + [0, 128, 96], [0, 224, 0], [64, 64, 64], [128, 128, 224], + [0, 96, 0], [64, 192, 192], [0, 128, 224], [128, 224, 0], + [64, 192, 64], [128, 128, 96], [128, 32, 128], [64, 0, 192], + [0, 64, 96], [0, 160, 128], [192, 0, 64], [128, 64, 224], + [0, 32, 128], [192, 128, 192], [0, 64, 224], [128, 160, 128], + [192, 128, 0], [128, 64, 32], [128, 32, 64], [192, 0, 128], + [64, 192, 32], [0, 160, 64], [64, 0, 0], [192, 192, 160], + [0, 32, 64], [64, 128, 128], [64, 192, 160], [128, 160, 64], + [64, 128, 0], [192, 192, 32], [128, 96, 192], [64, 0, 128], + [64, 64, 32], [0, 224, 192], [192, 0, 0], [192, 64, 160], + [0, 96, 192], [192, 128, 128], [64, 64, 160], [128, 224, 192], + [192, 128, 64], [192, 64, 32], [128, 96, 64], [192, 0, 192], + [0, 192, 32], [64, 224, 64], [64, 0, 64], [128, 192, 160], + [64, 96, 64], [64, 128, 192], [0, 192, 160], [192, 224, 64], + [64, 128, 64], [128, 192, 32], [192, 32, 192], [64, 64, 192], + [0, 64, 32], [64, 160, 192], [192, 64, 64], [128, 64, 160], + [64, 32, 192], [192, 192, 192], [0, 64, 160], [192, 160, 192], + [192, 192, 0], [128, 64, 96], [192, 32, 64], [192, 64, 128], + [64, 192, 96], [64, 160, 64], [64, 64, 0]]) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='_labelTrainIds.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) diff --git a/mmseg/datasets/dark_zurich.py b/mmseg/datasets/dark_zurich.py new file mode 100644 index 0000000000000000000000000000000000000000..9b5393fa9e5047e81790f91829cfe4b7f33cc707 --- /dev/null +++ b/mmseg/datasets/dark_zurich.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .cityscapes import CityscapesDataset + + +@DATASETS.register_module() +class DarkZurichDataset(CityscapesDataset): + """DarkZurichDataset dataset.""" + + def __init__(self, + img_suffix='_rgb_anon.png', + seg_map_suffix='_gt_labelTrainIds.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) diff --git a/mmseg/datasets/dataset_wrappers.py b/mmseg/datasets/dataset_wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..082c116ff4582ecc7064dba1aba3c164dd556af5 --- /dev/null +++ b/mmseg/datasets/dataset_wrappers.py @@ -0,0 +1,136 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import collections +import copy +from typing import List, Optional, Sequence, Union + +from mmengine.dataset import ConcatDataset, force_full_init + +from mmseg.registry import DATASETS, TRANSFORMS + + +@DATASETS.register_module() +class MultiImageMixDataset: + """A wrapper of multiple images mixed dataset. + + Suitable for training on multiple images mixed data augmentation like + mosaic and mixup. + + Args: + dataset (ConcatDataset or dict): The dataset to be mixed. + pipeline (Sequence[dict]): Sequence of transform object or + config dict to be composed. + skip_type_keys (list[str], optional): Sequence of type string to + be skip pipeline. Default to None. + """ + + def __init__(self, + dataset: Union[ConcatDataset, dict], + pipeline: Sequence[dict], + skip_type_keys: Optional[List[str]] = None, + lazy_init: bool = False) -> None: + assert isinstance(pipeline, collections.abc.Sequence) + + if isinstance(dataset, dict): + self.dataset = DATASETS.build(dataset) + elif isinstance(dataset, ConcatDataset): + self.dataset = dataset + else: + raise TypeError( + 'elements in datasets sequence should be config or ' + f'`ConcatDataset` instance, but got {type(dataset)}') + + if skip_type_keys is not None: + assert all([ + isinstance(skip_type_key, str) + for skip_type_key in skip_type_keys + ]) + self._skip_type_keys = skip_type_keys + + self.pipeline = [] + self.pipeline_types = [] + for transform in pipeline: + if isinstance(transform, dict): + self.pipeline_types.append(transform['type']) + transform = TRANSFORMS.build(transform) + self.pipeline.append(transform) + else: + raise TypeError('pipeline must be a dict') + + self._metainfo = self.dataset.metainfo + self.num_samples = len(self.dataset) + + self._fully_initialized = False + if not lazy_init: + self.full_init() + + @property + def metainfo(self) -> dict: + """Get the meta information of the multi-image-mixed dataset. + + Returns: + dict: The meta information of multi-image-mixed dataset. + """ + return copy.deepcopy(self._metainfo) + + def full_init(self): + """Loop to ``full_init`` each dataset.""" + if self._fully_initialized: + return + + self.dataset.full_init() + self._ori_len = len(self.dataset) + self._fully_initialized = True + + @force_full_init + def get_data_info(self, idx: int) -> dict: + """Get annotation by index. + + Args: + idx (int): Global index of ``ConcatDataset``. + + Returns: + dict: The idx-th annotation of the datasets. + """ + return self.dataset.get_data_info(idx) + + @force_full_init + def __len__(self): + return self.num_samples + + def __getitem__(self, idx): + results = copy.deepcopy(self.dataset[idx]) + for (transform, transform_type) in zip(self.pipeline, + self.pipeline_types): + if self._skip_type_keys is not None and \ + transform_type in self._skip_type_keys: + continue + + if hasattr(transform, 'get_indices'): + indices = transform.get_indices(self.dataset) + if not isinstance(indices, collections.abc.Sequence): + indices = [indices] + mix_results = [ + copy.deepcopy(self.dataset[index]) for index in indices + ] + results['mix_results'] = mix_results + + results = transform(results) + + if 'mix_results' in results: + results.pop('mix_results') + + return results + + def update_skip_type_keys(self, skip_type_keys): + """Update skip_type_keys. + + It is called by an external hook. + + Args: + skip_type_keys (list[str], optional): Sequence of type + string to be skip pipeline. + """ + assert all([ + isinstance(skip_type_key, str) for skip_type_key in skip_type_keys + ]) + self._skip_type_keys = skip_type_keys diff --git a/mmseg/datasets/decathlon.py b/mmseg/datasets/decathlon.py new file mode 100644 index 0000000000000000000000000000000000000000..26aa4ef0d7f44e55d4400ed6151ea1f6cb3930ec --- /dev/null +++ b/mmseg/datasets/decathlon.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import os.path as osp +from typing import List + +from mmengine.fileio import load + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class DecathlonDataset(BaseSegDataset): + """Dataset for Dacathlon dataset. + + The dataset.json format is shown as follows + + .. code-block:: none + + { + "name": "BRATS", + "tensorImageSize": "4D", + "modality": + { + "0": "FLAIR", + "1": "T1w", + "2": "t1gd", + "3": "T2w" + }, + "labels": { + "0": "background", + "1": "edema", + "2": "non-enhancing tumor", + "3": "enhancing tumour" + }, + "numTraining": 484, + "numTest": 266, + "training": + [ + { + "image": "./imagesTr/BRATS_306.nii.gz" + "label": "./labelsTr/BRATS_306.nii.gz" + ... + } + ] + "test": + [ + "./imagesTs/BRATS_557.nii.gz" + ... + ] + } + """ + + def load_data_list(self) -> List[dict]: + """Load annotation from directory or annotation file. + + Returns: + list[dict]: All data info of dataset. + """ + # `self.ann_file` denotes the absolute annotation file path if + # `self.root=None` or relative path if `self.root=/path/to/data/`. + annotations = load(self.ann_file) + if not isinstance(annotations, dict): + raise TypeError(f'The annotations loaded from annotation file ' + f'should be a dict, but got {type(annotations)}!') + raw_data_list = annotations[ + 'training'] if not self.test_mode else annotations['test'] + data_list = [] + for raw_data_info in raw_data_list: + # `2:` works for removing './' in file path, which will break + # loading from cloud storage. + if isinstance(raw_data_info, dict): + data_info = dict( + img_path=osp.join(self.data_root, raw_data_info['image'] + [2:])) + data_info['seg_map_path'] = osp.join( + self.data_root, raw_data_info['label'][2:]) + else: + data_info = dict( + img_path=osp.join(self.data_root, raw_data_info)[2:]) + data_info['label_map'] = self.label_map + data_info['reduce_zero_label'] = self.reduce_zero_label + data_info['seg_fields'] = [] + data_list.append(data_info) + annotations.pop('training') + annotations.pop('test') + + metainfo = copy.deepcopy(annotations) + metainfo['classes'] = [*metainfo['labels'].values()] + # Meta information load from annotation file will not influence the + # existed meta information load from `BaseDataset.METAINFO` and + # `metainfo` arguments defined in constructor. + for k, v in metainfo.items(): + self._metainfo.setdefault(k, v) + + return data_list diff --git a/mmseg/datasets/drive.py b/mmseg/datasets/drive.py new file mode 100644 index 0000000000000000000000000000000000000000..76c0160a6b6bf4a56ff135620ff0b08dc086d1d9 --- /dev/null +++ b/mmseg/datasets/drive.py @@ -0,0 +1,32 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class DRIVEDataset(BaseSegDataset): + """DRIVE dataset. + + In segmentation map annotation for DRIVE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_manual1.png'. + """ + METAINFO = dict( + classes=('background', 'vessel'), + palette=[[120, 120, 120], [6, 230, 230]]) + + def __init__(self, + img_suffix='.png', + seg_map_suffix='_manual1.png', + reduce_zero_label=False, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) + assert fileio.exists( + self.data_prefix['img_path'], backend_args=self.backend_args) diff --git a/mmseg/datasets/dsdl.py b/mmseg/datasets/dsdl.py new file mode 100644 index 0000000000000000000000000000000000000000..bf7e4e61b5fdd4bcb34617c8e53b93829def443a --- /dev/null +++ b/mmseg/datasets/dsdl.py @@ -0,0 +1,116 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +from typing import Dict, List, Optional, Sequence, Union + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + +try: + from dsdl.dataset import DSDLDataset +except ImportError: + DSDLDataset = None + + +@DATASETS.register_module() +class DSDLSegDataset(BaseSegDataset): + """Dataset for dsdl segmentation. + + Args: + specific_key_path(dict): Path of specific key which can not + be loaded by it's field name. + pre_transform(dict): pre-transform functions before loading. + used_labels(sequence): list of actual used classes in train steps, + this must be subset of class domain. + """ + + METAINFO = {} + + def __init__(self, + specific_key_path: Dict = {}, + pre_transform: Dict = {}, + used_labels: Optional[Sequence] = None, + **kwargs) -> None: + + if DSDLDataset is None: + raise RuntimeError( + 'Package dsdl is not installed. Please run "pip install dsdl".' + ) + self.used_labels = used_labels + + loc_config = dict(type='LocalFileReader', working_dir='') + if kwargs.get('data_root'): + kwargs['ann_file'] = os.path.join(kwargs['data_root'], + kwargs['ann_file']) + required_fields = ['Image', 'LabelMap'] + + self.dsdldataset = DSDLDataset( + dsdl_yaml=kwargs['ann_file'], + location_config=loc_config, + required_fields=required_fields, + specific_key_path=specific_key_path, + transform=pre_transform, + ) + BaseSegDataset.__init__(self, **kwargs) + + def load_data_list(self) -> List[Dict]: + """Load data info from a dsdl yaml file named as ``self.ann_file`` + + Returns: + List[dict]: A list of data list. + """ + + if self.used_labels: + self._metainfo['classes'] = tuple(self.used_labels) + self.label_map = self.get_label_map(self.used_labels) + else: + self._metainfo['classes'] = tuple(['background'] + + self.dsdldataset.class_names) + data_list = [] + + for i, data in enumerate(self.dsdldataset): + datainfo = dict( + img_path=os.path.join(self.data_prefix['img_path'], + data['Image'][0].location), + seg_map_path=os.path.join(self.data_prefix['seg_map_path'], + data['LabelMap'][0].location), + label_map=self.label_map, + reduce_zero_label=self.reduce_zero_label, + seg_fields=[], + ) + data_list.append(datainfo) + + return data_list + + def get_label_map(self, + new_classes: Optional[Sequence] = None + ) -> Union[Dict, None]: + """Require label mapping. + + The ``label_map`` is a dictionary, its keys are the old label ids and + its values are the new label ids, and is used for changing pixel + labels in load_annotations. If and only if old classes in class_dom + is not equal to new classes in args and nether of them is not + None, `label_map` is not None. + Args: + new_classes (list, tuple, optional): The new classes name from + metainfo. Default to None. + Returns: + dict, optional: The mapping from old classes to new classes. + """ + old_classes = ['background'] + self.dsdldataset.class_names + if (new_classes is not None and old_classes is not None + and list(new_classes) != list(old_classes)): + + label_map = {} + if not set(new_classes).issubset(old_classes): + raise ValueError( + f'new classes {new_classes} is not a ' + f'subset of classes {old_classes} in class_dom.') + for i, c in enumerate(old_classes): + if c not in new_classes: + label_map[i] = 255 + else: + label_map[i] = new_classes.index(c) + return label_map + else: + return None diff --git a/mmseg/datasets/hrf.py b/mmseg/datasets/hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..fd669cce26420b7e2c810ecace247a9e09350a5d --- /dev/null +++ b/mmseg/datasets/hrf.py @@ -0,0 +1,32 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class HRFDataset(BaseSegDataset): + """HRF dataset. + + In segmentation map annotation for HRF, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.png'. + """ + METAINFO = dict( + classes=('background', 'vessel'), + palette=[[120, 120, 120], [6, 230, 230]]) + + def __init__(self, + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) + assert fileio.exists( + self.data_prefix['img_path'], backend_args=self.backend_args) diff --git a/mmseg/datasets/isaid.py b/mmseg/datasets/isaid.py new file mode 100644 index 0000000000000000000000000000000000000000..61942ec1ea33e76c65c22d8e7fc71fb8194841dd --- /dev/null +++ b/mmseg/datasets/isaid.py @@ -0,0 +1,39 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class iSAIDDataset(BaseSegDataset): + """ iSAID: A Large-scale Dataset for Instance Segmentation in Aerial Images + In segmentation map annotation for iSAID dataset, which is included + in 16 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_manual1.png'. + """ + + METAINFO = dict( + classes=('background', 'ship', 'store_tank', 'baseball_diamond', + 'tennis_court', 'basketball_court', 'Ground_Track_Field', + 'Bridge', 'Large_Vehicle', 'Small_Vehicle', 'Helicopter', + 'Swimming_pool', 'Roundabout', 'Soccer_ball_field', 'plane', + 'Harbor'), + palette=[[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127], + [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, 127], + [0, 0, 127], [0, 0, 191], [0, 0, 255], [0, 191, 127], + [0, 127, 191], [0, 127, 255], [0, 100, 155]]) + + def __init__(self, + img_suffix='.png', + seg_map_suffix='_instance_color_RGB.png', + ignore_index=255, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + ignore_index=ignore_index, + **kwargs) + assert fileio.exists( + self.data_prefix['img_path'], backend_args=self.backend_args) diff --git a/mmseg/datasets/isprs.py b/mmseg/datasets/isprs.py new file mode 100644 index 0000000000000000000000000000000000000000..30af53c569b05c9be1218e9a58655c36c8aa9931 --- /dev/null +++ b/mmseg/datasets/isprs.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class ISPRSDataset(BaseSegDataset): + """ISPRS dataset. + + In segmentation map annotation for ISPRS, 0 is the ignore index. + ``reduce_zero_label`` should be set to True. The ``img_suffix`` and + ``seg_map_suffix`` are both fixed to '.png'. + """ + METAINFO = dict( + classes=('impervious_surface', 'building', 'low_vegetation', 'tree', + 'car', 'clutter'), + palette=[[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], + [255, 255, 0], [255, 0, 0]]) + + def __init__(self, + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) diff --git a/mmseg/datasets/levir.py b/mmseg/datasets/levir.py new file mode 100644 index 0000000000000000000000000000000000000000..f467481bad70a426381842dba61d85576c196eaf --- /dev/null +++ b/mmseg/datasets/levir.py @@ -0,0 +1,31 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from mmseg.registry import DATASETS +from .basesegdataset import BaseCDDataset + + +@DATASETS.register_module() +class LEVIRCDDataset(BaseCDDataset): + """ISPRS dataset. + + In segmentation map annotation for ISPRS, 0 is to ignore index. + ``reduce_zero_label`` should be set to True. The ``img_suffix`` and + ``seg_map_suffix`` are both fixed to '.png'. + """ + + METAINFO = dict( + classes=('background', 'changed'), + palette=[[0, 0, 0], [255, 255, 255]]) + + def __init__(self, + img_suffix='.png', + img_suffix2='.png', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + img_suffix2=img_suffix2, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) diff --git a/mmseg/datasets/lip.py b/mmseg/datasets/lip.py new file mode 100644 index 0000000000000000000000000000000000000000..3a32a193aff990ae9f819d4a0a1be82df1d049cb --- /dev/null +++ b/mmseg/datasets/lip.py @@ -0,0 +1,47 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class LIPDataset(BaseSegDataset): + """LIP dataset. + + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to + '.png'. + """ + METAINFO = dict( + classes=('Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', + 'UpperClothes', 'Dress', 'Coat', 'Socks', 'Pants', + 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', + 'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe', + 'Right-shoe'), + palette=( + [0, 0, 0], + [128, 0, 0], + [255, 0, 0], + [0, 85, 0], + [170, 0, 51], + [255, 85, 0], + [0, 0, 85], + [0, 119, 221], + [85, 85, 0], + [0, 85, 85], + [85, 51, 0], + [52, 86, 128], + [0, 128, 0], + [0, 0, 255], + [51, 170, 221], + [0, 255, 255], + [85, 255, 170], + [170, 255, 85], + [255, 255, 0], + [255, 170, 0], + )) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) diff --git a/mmseg/datasets/loveda.py b/mmseg/datasets/loveda.py new file mode 100644 index 0000000000000000000000000000000000000000..5c16db503adee6f1a1cac67e1dc72ff873ccd5ea --- /dev/null +++ b/mmseg/datasets/loveda.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class LoveDADataset(BaseSegDataset): + """LoveDA dataset. + + In segmentation map annotation for LoveDA, 0 is the ignore index. + ``reduce_zero_label`` should be set to True. The ``img_suffix`` and + ``seg_map_suffix`` are both fixed to '.png'. + """ + METAINFO = dict( + classes=('background', 'building', 'road', 'water', 'barren', 'forest', + 'agricultural'), + palette=[[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255], + [159, 129, 183], [0, 255, 0], [255, 195, 128]]) + + def __init__(self, + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) diff --git a/mmseg/datasets/mapillary.py b/mmseg/datasets/mapillary.py new file mode 100644 index 0000000000000000000000000000000000000000..6c2947338ec79b3d8558cee0387a2a84e41f0421 --- /dev/null +++ b/mmseg/datasets/mapillary.py @@ -0,0 +1,176 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class MapillaryDataset_v1(BaseSegDataset): + """Mapillary Vistas Dataset. + + Dataset paper link: + http://ieeexplore.ieee.org/document/8237796/ + + v1.2 contain 66 object classes. + (37 instance-specific) + + v2.0 contain 124 object classes. + (70 instance-specific, 46 stuff, 8 void or crowd). + + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png' for Mapillary Vistas Dataset. + """ + METAINFO = dict( + classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', + 'Barrier', 'Wall', 'Bike Lane', 'Crosswalk - Plain', + 'Curb Cut', 'Parking', 'Pedestrian Area', 'Rail Track', + 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building', + 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', + 'Other Rider', 'Lane Marking - Crosswalk', + 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', + 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', + 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera', + 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', + 'Phone Booth', 'Pothole', 'Street Light', 'Pole', + 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light', + 'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can', + 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', 'Motorcycle', + 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', + 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled'), + palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], + [180, 165, 180], [90, 120, 150], [102, 102, 156], + [128, 64, 255], [140, 140, 200], [170, 170, 170], + [250, 170, 160], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], + [244, 35, 232], [150, 100, 100], [70, 70, 70], [150, 120, 90], + [220, 20, 60], [255, 0, 0], [255, 0, 100], [255, 0, 200], + [200, 128, 128], [255, 255, 255], [64, 170, + 64], [230, 160, 50], + [70, 130, 180], [190, 255, 255], [152, 251, 152], + [107, 142, 35], [0, 170, 30], [255, 255, 128], [250, 0, 30], + [100, 140, 180], [220, 220, 220], [220, 128, 128], + [222, 40, 40], [100, 170, 30], [40, 40, 40], [33, 33, 33], + [100, 128, 160], [142, 0, 0], [70, 100, 150], [210, 170, 100], + [153, 153, 153], [128, 128, 128], [0, 0, 80], [250, 170, 30], + [192, 192, 192], [220, 220, 0], [140, 140, 20], [119, 11, 32], + [150, 0, 255], [0, 60, 100], [0, 0, 142], [0, 0, 90], + [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], + [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, + 10], [0, 0, 0]]) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) + + +@DATASETS.register_module() +class MapillaryDataset_v2(BaseSegDataset): + """Mapillary Vistas Dataset. + + Dataset paper link: + http://ieeexplore.ieee.org/document/8237796/ + + v1.2 contain 66 object classes. + (37 instance-specific) + + v2.0 contain 124 object classes. + (70 instance-specific, 46 stuff, 8 void or crowd). + + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png' for Mapillary Vistas Dataset. + """ + METAINFO = dict( + classes=( + 'Bird', 'Ground Animal', 'Ambiguous Barrier', 'Concrete Block', + 'Curb', 'Fence', 'Guard Rail', 'Barrier', 'Road Median', + 'Road Side', 'Lane Separator', 'Temporary Barrier', 'Wall', + 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Driveway', + 'Parking', 'Parking Aisle', 'Pedestrian Area', 'Rail Track', + 'Road', 'Road Shoulder', 'Service Lane', 'Sidewalk', + 'Traffic Island', 'Bridge', 'Building', 'Garage', 'Tunnel', + 'Person', 'Person Group', 'Bicyclist', 'Motorcyclist', + 'Other Rider', 'Lane Marking - Dashed Line', + 'Lane Marking - Straight Line', 'Lane Marking - Zigzag Line', + 'Lane Marking - Ambiguous', 'Lane Marking - Arrow (Left)', + 'Lane Marking - Arrow (Other)', 'Lane Marking - Arrow (Right)', + 'Lane Marking - Arrow (Split Left or Straight)', + 'Lane Marking - Arrow (Split Right or Straight)', + 'Lane Marking - Arrow (Straight)', 'Lane Marking - Crosswalk', + 'Lane Marking - Give Way (Row)', + 'Lane Marking - Give Way (Single)', + 'Lane Marking - Hatched (Chevron)', + 'Lane Marking - Hatched (Diagonal)', 'Lane Marking - Other', + 'Lane Marking - Stop Line', 'Lane Marking - Symbol (Bicycle)', + 'Lane Marking - Symbol (Other)', 'Lane Marking - Text', + 'Lane Marking (only) - Dashed Line', + 'Lane Marking (only) - Crosswalk', 'Lane Marking (only) - Other', + 'Lane Marking (only) - Test', 'Mountain', 'Sand', 'Sky', 'Snow', + 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', 'Bike Rack', + 'Catch Basin', 'CCTV Camera', 'Fire Hydrant', 'Junction Box', + 'Mailbox', 'Manhole', 'Parking Meter', 'Phone Booth', 'Pothole', + 'Signage - Advertisement', 'Signage - Ambiguous', 'Signage - Back', + 'Signage - Information', 'Signage - Other', 'Signage - Store', + 'Street Light', 'Pole', 'Pole Group', 'Traffic Sign Frame', + 'Utility Pole', 'Traffic Cone', 'Traffic Light - General (Single)', + 'Traffic Light - Pedestrians', 'Traffic Light - General (Upright)', + 'Traffic Light - General (Horizontal)', 'Traffic Light - Cyclists', + 'Traffic Light - Other', 'Traffic Sign - Ambiguous', + 'Traffic Sign (Back)', 'Traffic Sign - Direction (Back)', + 'Traffic Sign - Direction (Front)', 'Traffic Sign (Front)', + 'Traffic Sign - Parking', 'Traffic Sign - Temporary (Back)', + 'Traffic Sign - Temporary (Front)', 'Trash Can', 'Bicycle', 'Boat', + 'Bus', 'Car', 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', + 'Trailer', 'Truck', 'Vehicle Group', 'Wheeled Slow', 'Water Valve', + 'Car Mount', 'Dynamic', 'Ego Vehicle', 'Ground', 'Static', + 'Unlabeled'), + palette=[[165, 42, 42], [0, 192, 0], [250, 170, 31], [250, 170, 32], + [196, 196, 196], [190, 153, 153], [180, 165, 180], + [90, 120, 150], [250, 170, 33], [250, 170, 34], + [128, 128, 128], [250, 170, 35], [102, 102, 156], + [128, 64, 255], [140, 140, 200], [170, 170, 170], + [250, 170, 36], [250, 170, 160], [250, 170, 37], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], + [110, 110, 110], [244, 35, 232], [128, 196, + 128], [150, 100, 100], + [70, 70, 70], [150, 150, 150], [150, 120, 90], [220, 20, 60], + [220, 20, 60], [255, 0, 0], [255, 0, 100], [255, 0, 200], + [255, 255, 255], [255, 255, 255], [250, 170, 29], + [250, 170, 28], [250, 170, 26], [250, 170, + 25], [250, 170, 24], + [250, 170, 22], [250, 170, 21], [250, 170, + 20], [255, 255, 255], + [250, 170, 19], [250, 170, 18], [250, 170, + 12], [250, 170, 11], + [255, 255, 255], [255, 255, 255], [250, 170, 16], + [250, 170, 15], [250, 170, 15], [255, 255, 255], + [255, 255, 255], [255, 255, 255], [255, 255, 255], + [64, 170, 64], [230, 160, 50], + [70, 130, 180], [190, 255, 255], [152, 251, 152], + [107, 142, 35], [0, 170, 30], [255, 255, 128], [250, 0, 30], + [100, 140, 180], [220, 128, 128], [222, 40, + 40], [100, 170, 30], + [40, 40, 40], [33, 33, 33], [100, 128, 160], [20, 20, 255], + [142, 0, 0], [70, 100, 150], [250, 171, 30], [250, 172, 30], + [250, 173, 30], [250, 174, 30], [250, 175, + 30], [250, 176, 30], + [210, 170, 100], [153, 153, 153], [153, 153, 153], + [128, 128, 128], [0, 0, 80], [210, 60, 60], [250, 170, 30], + [250, 170, 30], [250, 170, 30], [250, 170, + 30], [250, 170, 30], + [250, 170, 30], [192, 192, 192], [192, 192, 192], + [192, 192, 192], [220, 220, 0], [220, 220, 0], [0, 0, 196], + [192, 192, 192], [220, 220, 0], [140, 140, 20], [119, 11, 32], + [150, 0, 255], [0, 60, 100], [0, 0, 142], [0, 0, 90], + [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], + [0, 0, 70], [0, 0, 142], [0, 0, 192], [170, 170, 170], + [32, 32, 32], [111, 74, 0], [120, 10, 10], [81, 0, 81], + [111, 111, 0], [0, 0, 0]]) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) diff --git a/mmseg/datasets/night_driving.py b/mmseg/datasets/night_driving.py new file mode 100644 index 0000000000000000000000000000000000000000..3ead91ec77cbd8e3f0a870dee3462549183e9c9b --- /dev/null +++ b/mmseg/datasets/night_driving.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .cityscapes import CityscapesDataset + + +@DATASETS.register_module() +class NightDrivingDataset(CityscapesDataset): + """NightDrivingDataset dataset.""" + + def __init__(self, + img_suffix='_leftImg8bit.png', + seg_map_suffix='_gtCoarse_labelTrainIds.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) diff --git a/mmseg/datasets/nyu.py b/mmseg/datasets/nyu.py new file mode 100644 index 0000000000000000000000000000000000000000..fcfda46647d25b5d16425af97a06ffb8c1f81bca --- /dev/null +++ b/mmseg/datasets/nyu.py @@ -0,0 +1,123 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from typing import List + +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class NYUDataset(BaseSegDataset): + """NYU depth estimation dataset. The file structure should be. + + .. code-block:: none + + ├── data + │ ├── nyu + │ │ ├── images + │ │ │ ├── train + │ │ │ │ ├── scene_xxx.jpg + │ │ │ │ ├── ... + │ │ │ ├── test + │ │ ├── annotations + │ │ │ ├── train + │ │ │ │ ├── scene_xxx.png + │ │ │ │ ├── ... + │ │ │ ├── test + + Args: + ann_file (str): Annotation file path. Defaults to ''. + metainfo (dict, optional): Meta information for dataset, such as + specify classes to load. Defaults to None. + data_root (str, optional): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to None. + data_prefix (dict, optional): Prefix for training data. Defaults to + dict(img_path='images', depth_map_path='annotations'). + img_suffix (str): Suffix of images. Default: '.jpg' + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + filter_cfg (dict, optional): Config for filter data. Defaults to None. + indices (int or Sequence[int], optional): Support using first few + data in annotation file to facilitate training/testing on a smaller + dataset. Defaults to None which means using all ``data_infos``. + serialize_data (bool, optional): Whether to hold memory using + serialized objects, when enabled, data loader workers can use + shared RAM from master process instead of making a copy. Defaults + to True. + pipeline (list, optional): Processing pipeline. Defaults to []. + test_mode (bool, optional): ``test_mode=True`` means in test phase. + Defaults to False. + lazy_init (bool, optional): Whether to load annotation during + instantiation. In some cases, such as visualization, only the meta + information of the dataset is needed, which is not necessary to + load annotation file. ``Basedataset`` can skip load annotations to + save time by set ``lazy_init=True``. Defaults to False. + max_refetch (int, optional): If ``Basedataset.prepare_data`` get a + None img. The maximum extra number of cycles to get a valid + image. Defaults to 1000. + ignore_index (int): The label index to be ignored. Default: 255 + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default to False. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + METAINFO = dict( + classes=('printer_room', 'bathroom', 'living_room', 'study', + 'conference_room', 'study_room', 'kitchen', 'home_office', + 'bedroom', 'dinette', 'playroom', 'indoor_balcony', + 'laundry_room', 'basement', 'excercise_room', 'foyer', + 'home_storage', 'cafe', 'furniture_store', 'office_kitchen', + 'student_lounge', 'dining_room', 'reception_room', + 'computer_lab', 'classroom', 'office', 'bookstore')) + + def __init__(self, + data_prefix=dict( + img_path='images', depth_map_path='annotations'), + img_suffix='.jpg', + depth_map_suffix='.png', + **kwargs) -> None: + super().__init__( + data_prefix=data_prefix, + img_suffix=img_suffix, + seg_map_suffix=depth_map_suffix, + **kwargs) + + def _get_category_id_from_filename(self, image_fname: str) -> int: + """Retrieve the category ID from the given image filename.""" + image_fname = osp.basename(image_fname) + position = image_fname.find(next(filter(str.isdigit, image_fname)), 0) + categoty_name = image_fname[:position - 1] + if categoty_name not in self._metainfo['classes']: + return -1 + else: + return self._metainfo['classes'].index(categoty_name) + + def load_data_list(self) -> List[dict]: + """Load annotation from directory or annotation file. + + Returns: + list[dict]: All data info of dataset. + """ + data_list = [] + img_dir = self.data_prefix.get('img_path', None) + ann_dir = self.data_prefix.get('depth_map_path', None) + + _suffix_len = len(self.img_suffix) + for img in fileio.list_dir_or_file( + dir_path=img_dir, + list_dir=False, + suffix=self.img_suffix, + recursive=True, + backend_args=self.backend_args): + data_info = dict(img_path=osp.join(img_dir, img)) + if ann_dir is not None: + depth_map = img[:-_suffix_len] + self.seg_map_suffix + data_info['depth_map_path'] = osp.join(ann_dir, depth_map) + data_info['seg_fields'] = [] + data_info['category_id'] = self._get_category_id_from_filename(img) + data_list.append(data_info) + data_list = sorted(data_list, key=lambda x: x['img_path']) + return data_list diff --git a/mmseg/datasets/pascal_context.py b/mmseg/datasets/pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..82d00a9b3086a0db81457ab9b2f79c79de4ffaa8 --- /dev/null +++ b/mmseg/datasets/pascal_context.py @@ -0,0 +1,116 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class PascalContextDataset(BaseSegDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + ann_file (str): Annotation file path. + """ + + METAINFO = dict( + classes=('background', 'aeroplane', 'bag', 'bed', 'bedclothes', + 'bench', 'bicycle', 'bird', 'boat', 'book', 'bottle', + 'building', 'bus', 'cabinet', 'car', 'cat', 'ceiling', + 'chair', 'cloth', 'computer', 'cow', 'cup', 'curtain', 'dog', + 'door', 'fence', 'floor', 'flower', 'food', 'grass', 'ground', + 'horse', 'keyboard', 'light', 'motorbike', 'mountain', + 'mouse', 'person', 'plate', 'platform', 'pottedplant', 'road', + 'rock', 'sheep', 'shelves', 'sidewalk', 'sign', 'sky', 'snow', + 'sofa', 'table', 'track', 'train', 'tree', 'truck', + 'tvmonitor', 'wall', 'water', 'window', 'wood'), + palette=[[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]]) + + def __init__(self, + ann_file='', + img_suffix='.jpg', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + ann_file=ann_file, + reduce_zero_label=reduce_zero_label, + **kwargs) + assert fileio.exists(self.data_prefix['img_path'], self.backend_args) + + +@DATASETS.register_module() +class PascalContextDataset59(BaseSegDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + True. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + Noted: If the background is 255 and the ids of categories are from 0 to 58, + ``reduce_zero_label`` needs to be set to False. + + Args: + ann_file (str): Annotation file path. + """ + METAINFO = dict( + classes=('aeroplane', 'bag', 'bed', 'bedclothes', 'bench', 'bicycle', + 'bird', 'boat', 'book', 'bottle', 'building', 'bus', + 'cabinet', 'car', 'cat', 'ceiling', 'chair', 'cloth', + 'computer', 'cow', 'cup', 'curtain', 'dog', 'door', 'fence', + 'floor', 'flower', 'food', 'grass', 'ground', 'horse', + 'keyboard', 'light', 'motorbike', 'mountain', 'mouse', + 'person', 'plate', 'platform', 'pottedplant', 'road', 'rock', + 'sheep', 'shelves', 'sidewalk', 'sign', 'sky', 'snow', 'sofa', + 'table', 'track', 'train', 'tree', 'truck', 'tvmonitor', + 'wall', 'water', 'window', 'wood'), + palette=[[180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], + [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]]) + + def __init__(self, + ann_file='', + img_suffix='.jpg', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs): + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + ann_file=ann_file, + reduce_zero_label=reduce_zero_label, + **kwargs) + assert fileio.exists(self.data_prefix['img_path'], self.backend_args) diff --git a/mmseg/datasets/potsdam.py b/mmseg/datasets/potsdam.py new file mode 100644 index 0000000000000000000000000000000000000000..6892de3dd29fda569527342377c6e83ce0d972bf --- /dev/null +++ b/mmseg/datasets/potsdam.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class PotsdamDataset(BaseSegDataset): + """ISPRS Potsdam dataset. + + In segmentation map annotation for Potsdam dataset, 0 is the ignore index. + ``reduce_zero_label`` should be set to True. The ``img_suffix`` and + ``seg_map_suffix`` are both fixed to '.png'. + """ + METAINFO = dict( + classes=('impervious_surface', 'building', 'low_vegetation', 'tree', + 'car', 'clutter'), + palette=[[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], + [255, 255, 0], [255, 0, 0]]) + + def __init__(self, + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) diff --git a/mmseg/datasets/refuge.py b/mmseg/datasets/refuge.py new file mode 100644 index 0000000000000000000000000000000000000000..4016a825a37cdd0162f9c3e72df2fcabc6984991 --- /dev/null +++ b/mmseg/datasets/refuge.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class REFUGEDataset(BaseSegDataset): + """REFUGE dataset. + + In segmentation map annotation for REFUGE, 0 stands for background, which + is not included in 2 categories. ``reduce_zero_label`` is fixed to True. + The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.png'. + """ + METAINFO = dict( + classes=('background', ' Optic Cup', 'Optic Disc'), + palette=[[120, 120, 120], [6, 230, 230], [56, 59, 120]]) + + def __init__(self, **kwargs) -> None: + super().__init__( + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) + assert fileio.exists( + self.data_prefix['img_path'], backend_args=self.backend_args) diff --git a/mmseg/datasets/stare.py b/mmseg/datasets/stare.py new file mode 100644 index 0000000000000000000000000000000000000000..1b997bb785f20a9225c8b7e3f9b0522bc5e5ed99 --- /dev/null +++ b/mmseg/datasets/stare.py @@ -0,0 +1,32 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class STAREDataset(BaseSegDataset): + """STARE dataset. + + In segmentation map annotation for STARE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.ah.png'. + """ + METAINFO = dict( + classes=('background', 'vessel'), + palette=[[120, 120, 120], [6, 230, 230]]) + + def __init__(self, + img_suffix='.png', + seg_map_suffix='.ah.png', + reduce_zero_label=False, + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + reduce_zero_label=reduce_zero_label, + **kwargs) + assert fileio.exists( + self.data_prefix['img_path'], backend_args=self.backend_args) diff --git a/mmseg/datasets/synapse.py b/mmseg/datasets/synapse.py new file mode 100644 index 0000000000000000000000000000000000000000..6f83b6415046667fb24086083c43083040f4487c --- /dev/null +++ b/mmseg/datasets/synapse.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class SynapseDataset(BaseSegDataset): + """Synapse dataset. + + Before dataset preprocess of Synapse, there are total 13 categories of + foreground which does not include background. After preprocessing, 8 + foreground categories are kept while the other 5 foreground categories are + handled as background. The ``img_suffix`` is fixed to '.jpg' and + ``seg_map_suffix`` is fixed to '.png'. + """ + METAINFO = dict( + classes=('background', 'aorta', 'gallbladder', 'left_kidney', + 'right_kidney', 'liver', 'pancreas', 'spleen', 'stomach'), + palette=[[0, 0, 0], [0, 0, 255], [0, 255, 0], [255, 0, 0], + [0, 255, 255], [255, 0, 255], [255, 255, 0], [60, 255, 255], + [240, 240, 240]]) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) diff --git a/mmseg/datasets/transforms/__init__.py b/mmseg/datasets/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..125f07081810c980ebc6ded077bcf5dfd955cfcf --- /dev/null +++ b/mmseg/datasets/transforms/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .formatting import PackSegInputs +from .loading import (LoadAnnotations, LoadBiomedicalAnnotation, + LoadBiomedicalData, LoadBiomedicalImageFromFile, + LoadDepthAnnotation, LoadImageFromNDArray, + LoadMultipleRSImageFromFile, LoadSingleRSImageFromFile) +# yapf: disable +from .transforms import (CLAHE, AdjustGamma, Albu, BioMedical3DPad, + BioMedical3DRandomCrop, BioMedical3DRandomFlip, + BioMedicalGaussianBlur, BioMedicalGaussianNoise, + BioMedicalRandomGamma, ConcatCDInput, GenerateEdge, + PhotoMetricDistortion, RandomCrop, RandomCutOut, + RandomDepthMix, RandomFlip, RandomMosaic, + RandomRotate, RandomRotFlip, Rerange, Resize, + ResizeShortestEdge, ResizeToMultiple, RGB2Gray, + SegRescale) + +# yapf: enable +__all__ = [ + 'LoadAnnotations', 'RandomCrop', 'BioMedical3DRandomCrop', 'SegRescale', + 'PhotoMetricDistortion', 'RandomRotate', 'AdjustGamma', 'CLAHE', 'Rerange', + 'RGB2Gray', 'RandomCutOut', 'RandomMosaic', 'PackSegInputs', + 'ResizeToMultiple', 'LoadImageFromNDArray', 'LoadBiomedicalImageFromFile', + 'LoadBiomedicalAnnotation', 'LoadBiomedicalData', 'GenerateEdge', + 'ResizeShortestEdge', 'BioMedicalGaussianNoise', 'BioMedicalGaussianBlur', + 'BioMedical3DRandomFlip', 'BioMedicalRandomGamma', 'BioMedical3DPad', + 'RandomRotFlip', 'Albu', 'LoadSingleRSImageFromFile', 'ConcatCDInput', + 'LoadMultipleRSImageFromFile', 'LoadDepthAnnotation', 'RandomDepthMix', + 'RandomFlip', 'Resize' +] diff --git a/mmseg/datasets/transforms/__pycache__/__init__.cpython-39.pyc b/mmseg/datasets/transforms/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19be217193b59f0946e93eabd562ce8f3e95b491 Binary files /dev/null and b/mmseg/datasets/transforms/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/datasets/transforms/__pycache__/formatting.cpython-39.pyc b/mmseg/datasets/transforms/__pycache__/formatting.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1da2d2ba05bed3174aca77524d16389e1685910e Binary files /dev/null and b/mmseg/datasets/transforms/__pycache__/formatting.cpython-39.pyc differ diff --git a/mmseg/datasets/transforms/__pycache__/loading.cpython-39.pyc b/mmseg/datasets/transforms/__pycache__/loading.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..284d9596afb31ab50bd7b1fce14cc9a6416b60fe Binary files /dev/null and b/mmseg/datasets/transforms/__pycache__/loading.cpython-39.pyc differ diff --git a/mmseg/datasets/transforms/__pycache__/transforms.cpython-39.pyc b/mmseg/datasets/transforms/__pycache__/transforms.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7e91db347d428a43d0887a58f63be14b9c43057 Binary files /dev/null and b/mmseg/datasets/transforms/__pycache__/transforms.cpython-39.pyc differ diff --git a/mmseg/datasets/transforms/formatting.py b/mmseg/datasets/transforms/formatting.py new file mode 100644 index 0000000000000000000000000000000000000000..bd250551e98ffc9decaa2e168943821501844c1f --- /dev/null +++ b/mmseg/datasets/transforms/formatting.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import numpy as np +from mmcv.transforms import to_tensor +from mmcv.transforms.base import BaseTransform +from mmengine.structures import PixelData + +from mmseg.registry import TRANSFORMS +from mmseg.structures import SegDataSample + + +@TRANSFORMS.register_module() +class PackSegInputs(BaseTransform): + """Pack the inputs data for the semantic segmentation. + + The ``img_meta`` item is always populated. The contents of the + ``img_meta`` dictionary depends on ``meta_keys``. By default this includes: + + - ``img_path``: filename of the image + + - ``ori_shape``: original shape of the image as a tuple (h, w, c) + + - ``img_shape``: shape of the image input to the network as a tuple \ + (h, w, c). Note that images may be zero padded on the \ + bottom/right if the batch tensor is larger than this shape. + + - ``pad_shape``: shape of padded images + + - ``scale_factor``: a float indicating the preprocessing scale + + - ``flip``: a boolean indicating if image flip transform was used + + - ``flip_direction``: the flipping direction + + Args: + meta_keys (Sequence[str], optional): Meta keys to be packed from + ``SegDataSample`` and collected in ``data[img_metas]``. + Default: ``('img_path', 'ori_shape', + 'img_shape', 'pad_shape', 'scale_factor', 'flip', + 'flip_direction')`` + """ + + def __init__(self, + meta_keys=('img_path', 'seg_map_path', 'ori_shape', + 'img_shape', 'pad_shape', 'scale_factor', 'flip', + 'flip_direction', 'reduce_zero_label')): + self.meta_keys = meta_keys + + def transform(self, results: dict) -> dict: + """Method to pack the input data. + + Args: + results (dict): Result dict from the data pipeline. + + Returns: + dict: + + - 'inputs' (obj:`torch.Tensor`): The forward data of models. + - 'data_sample' (obj:`SegDataSample`): The annotation info of the + sample. + """ + packed_results = dict() + if 'img' in results: + img = results['img'] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + if not img.flags.c_contiguous: + img = to_tensor(np.ascontiguousarray(img.transpose(2, 0, 1))) + else: + img = img.transpose(2, 0, 1) + img = to_tensor(img).contiguous() + packed_results['inputs'] = img + + data_sample = SegDataSample() + if 'gt_seg_map' in results: + if len(results['gt_seg_map'].shape) == 2: + data = to_tensor(results['gt_seg_map'][None, + ...].astype(np.int64)) + else: + warnings.warn('Please pay attention your ground truth ' + 'segmentation map, usually the segmentation ' + 'map is 2D, but got ' + f'{results["gt_seg_map"].shape}') + data = to_tensor(results['gt_seg_map'].astype(np.int64)) + gt_sem_seg_data = dict(data=data) + data_sample.gt_sem_seg = PixelData(**gt_sem_seg_data) + + if 'gt_edge_map' in results: + gt_edge_data = dict( + data=to_tensor(results['gt_edge_map'][None, + ...].astype(np.int64))) + data_sample.set_data(dict(gt_edge_map=PixelData(**gt_edge_data))) + + if 'gt_depth_map' in results: + gt_depth_data = dict( + data=to_tensor(results['gt_depth_map'][None, ...])) + data_sample.set_data(dict(gt_depth_map=PixelData(**gt_depth_data))) + + img_meta = {} + for key in self.meta_keys: + if key in results: + img_meta[key] = results[key] + data_sample.set_metainfo(img_meta) + packed_results['data_samples'] = data_sample + + return packed_results + + def __repr__(self) -> str: + repr_str = self.__class__.__name__ + repr_str += f'(meta_keys={self.meta_keys})' + return repr_str diff --git a/mmseg/datasets/transforms/loading.py b/mmseg/datasets/transforms/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..438b5527f08d4aa7b66a7ba972af05f34dd192ff --- /dev/null +++ b/mmseg/datasets/transforms/loading.py @@ -0,0 +1,704 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from typing import Dict, Optional, Union + +import mmcv +import mmengine.fileio as fileio +import numpy as np +from mmcv.transforms import BaseTransform +from mmcv.transforms import LoadAnnotations as MMCV_LoadAnnotations +from mmcv.transforms import LoadImageFromFile + +from mmseg.registry import TRANSFORMS +from mmseg.utils import datafrombytes + +try: + from osgeo import gdal +except ImportError: + gdal = None + + +@TRANSFORMS.register_module() +class LoadAnnotations(MMCV_LoadAnnotations): + """Load annotations for semantic segmentation provided by dataset. + + The annotation format is as the following: + + .. code-block:: python + + { + # Filename of semantic segmentation ground truth file. + 'seg_map_path': 'a/b/c' + } + + After this module, the annotation has been changed to the format below: + + .. code-block:: python + + { + # in str + 'seg_fields': List + # In uint8 type. + 'gt_seg_map': np.ndarray (H, W) + } + + Required Keys: + + - seg_map_path (str): Path of semantic segmentation ground truth file. + + Added Keys: + + - seg_fields (List) + - gt_seg_map (np.uint8) + + Args: + reduce_zero_label (bool, optional): Whether reduce all label value + by 1. Usually used for datasets where 0 is background label. + Defaults to None. + imdecode_backend (str): The image decoding backend type. The backend + argument for :func:``mmcv.imfrombytes``. + See :fun:``mmcv.imfrombytes`` for details. + Defaults to 'pillow'. + backend_args (dict): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + + def __init__( + self, + reduce_zero_label=None, + backend_args=None, + imdecode_backend='pillow', + ) -> None: + super().__init__( + with_bbox=False, + with_label=False, + with_seg=True, + with_keypoints=False, + imdecode_backend=imdecode_backend, + backend_args=backend_args) + self.reduce_zero_label = reduce_zero_label + if self.reduce_zero_label is not None: + warnings.warn('`reduce_zero_label` will be deprecated, ' + 'if you would like to ignore the zero label, please ' + 'set `reduce_zero_label=True` when dataset ' + 'initialized') + self.imdecode_backend = imdecode_backend + + def _load_seg_map(self, results: dict) -> None: + """Private function to load semantic segmentation annotations. + + Args: + results (dict): Result dict from :obj:``mmcv.BaseDataset``. + + Returns: + dict: The dict contains loaded semantic segmentation annotations. + """ + + img_bytes = fileio.get( + results['seg_map_path'], backend_args=self.backend_args) + gt_semantic_seg = mmcv.imfrombytes( + img_bytes, flag='unchanged', + backend=self.imdecode_backend).squeeze().astype(np.uint8) + + # reduce zero_label + if self.reduce_zero_label is None: + self.reduce_zero_label = results['reduce_zero_label'] + assert self.reduce_zero_label == results['reduce_zero_label'], \ + 'Initialize dataset with `reduce_zero_label` as ' \ + f'{results["reduce_zero_label"]} but when load annotation ' \ + f'the `reduce_zero_label` is {self.reduce_zero_label}' + if self.reduce_zero_label: + # avoid using underflow conversion + gt_semantic_seg[gt_semantic_seg == 0] = 255 + gt_semantic_seg = gt_semantic_seg - 1 + gt_semantic_seg[gt_semantic_seg == 254] = 255 + # modify if custom classes + if results.get('label_map', None) is not None: + # Add deep copy to solve bug of repeatedly + # replace `gt_semantic_seg`, which is reported in + # https://github.com/open-mmlab/mmsegmentation/pull/1445/ + gt_semantic_seg_copy = gt_semantic_seg.copy() + for old_id, new_id in results['label_map'].items(): + gt_semantic_seg[gt_semantic_seg_copy == old_id] = new_id + results['gt_seg_map'] = gt_semantic_seg + results['seg_fields'].append('gt_seg_map') + + def __repr__(self) -> str: + repr_str = self.__class__.__name__ + repr_str += f'(reduce_zero_label={self.reduce_zero_label}, ' + repr_str += f"imdecode_backend='{self.imdecode_backend}', " + repr_str += f'backend_args={self.backend_args})' + return repr_str + + +@TRANSFORMS.register_module() +class LoadImageFromNDArray(LoadImageFromFile): + """Load an image from ``results['img']``. + + Similar with :obj:`LoadImageFromFile`, but the image has been loaded as + :obj:`np.ndarray` in ``results['img']``. Can be used when loading image + from webcam. + + Required Keys: + + - img + + Modified Keys: + + - img + - img_path + - img_shape + - ori_shape + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + """ + + def transform(self, results: dict) -> dict: + """Transform function to add image meta information. + + Args: + results (dict): Result dict with Webcam read image in + ``results['img']``. + + Returns: + dict: The dict contains loaded image and meta information. + """ + + img = results['img'] + if self.to_float32: + img = img.astype(np.float32) + + results['img_path'] = None + results['img'] = img + results['img_shape'] = img.shape[:2] + results['ori_shape'] = img.shape[:2] + return results + + +@TRANSFORMS.register_module() +class LoadBiomedicalImageFromFile(BaseTransform): + """Load an biomedical mage from file. + + Required Keys: + + - img_path + + Added Keys: + + - img (np.ndarray): Biomedical image with shape (N, Z, Y, X) by default, + N is the number of modalities, and data type is float32 + if set to_float32 = True, or float64 if decode_backend is 'nifti' and + to_float32 is False. + - img_shape + - ori_shape + + Args: + decode_backend (str): The data decoding backend type. Options are + 'numpy'and 'nifti', and there is a convention that when backend is + 'nifti' the axis of data loaded is XYZ, and when backend is + 'numpy', the the axis is ZYX. The data will be transposed if the + backend is 'nifti'. Defaults to 'nifti'. + to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z. + Defaults to False. + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an float64 array. + Defaults to True. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + + def __init__(self, + decode_backend: str = 'nifti', + to_xyz: bool = False, + to_float32: bool = True, + backend_args: Optional[dict] = None) -> None: + self.decode_backend = decode_backend + self.to_xyz = to_xyz + self.to_float32 = to_float32 + self.backend_args = backend_args.copy() if backend_args else None + + def transform(self, results: Dict) -> Dict: + """Functions to load image. + + Args: + results (dict): Result dict from :obj:``mmcv.BaseDataset``. + + Returns: + dict: The dict contains loaded image and meta information. + """ + + filename = results['img_path'] + + data_bytes = fileio.get(filename, self.backend_args) + img = datafrombytes(data_bytes, backend=self.decode_backend) + + if self.to_float32: + img = img.astype(np.float32) + + if len(img.shape) == 3: + img = img[None, ...] + + if self.decode_backend == 'nifti': + img = img.transpose(0, 3, 2, 1) + + if self.to_xyz: + img = img.transpose(0, 3, 2, 1) + + results['img'] = img + results['img_shape'] = img.shape[1:] + results['ori_shape'] = img.shape[1:] + return results + + def __repr__(self): + repr_str = (f'{self.__class__.__name__}(' + f"decode_backend='{self.decode_backend}', " + f'to_xyz={self.to_xyz}, ' + f'to_float32={self.to_float32}, ' + f'backend_args={self.backend_args})') + return repr_str + + +@TRANSFORMS.register_module() +class LoadBiomedicalAnnotation(BaseTransform): + """Load ``seg_map`` annotation provided by biomedical dataset. + + The annotation format is as the following: + + .. code-block:: python + + { + 'gt_seg_map': np.ndarray (X, Y, Z) or (Z, Y, X) + } + + Required Keys: + + - seg_map_path + + Added Keys: + + - gt_seg_map (np.ndarray): Biomedical seg map with shape (Z, Y, X) by + default, and data type is float32 if set to_float32 = True, or + float64 if decode_backend is 'nifti' and to_float32 is False. + + Args: + decode_backend (str): The data decoding backend type. Options are + 'numpy'and 'nifti', and there is a convention that when backend is + 'nifti' the axis of data loaded is XYZ, and when backend is + 'numpy', the the axis is ZYX. The data will be transposed if the + backend is 'nifti'. Defaults to 'nifti'. + to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z. + Defaults to False. + to_float32 (bool): Whether to convert the loaded seg map to a float32 + numpy array. If set to False, the loaded image is an float64 array. + Defaults to True. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See :class:`mmengine.fileio` for details. + Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + + def __init__(self, + decode_backend: str = 'nifti', + to_xyz: bool = False, + to_float32: bool = True, + backend_args: Optional[dict] = None) -> None: + super().__init__() + self.decode_backend = decode_backend + self.to_xyz = to_xyz + self.to_float32 = to_float32 + self.backend_args = backend_args.copy() if backend_args else None + + def transform(self, results: Dict) -> Dict: + """Functions to load image. + + Args: + results (dict): Result dict from :obj:``mmcv.BaseDataset``. + + Returns: + dict: The dict contains loaded image and meta information. + """ + data_bytes = fileio.get(results['seg_map_path'], self.backend_args) + gt_seg_map = datafrombytes(data_bytes, backend=self.decode_backend) + + if self.to_float32: + gt_seg_map = gt_seg_map.astype(np.float32) + + if self.decode_backend == 'nifti': + gt_seg_map = gt_seg_map.transpose(2, 1, 0) + + if self.to_xyz: + gt_seg_map = gt_seg_map.transpose(2, 1, 0) + + results['gt_seg_map'] = gt_seg_map + return results + + def __repr__(self): + repr_str = (f'{self.__class__.__name__}(' + f"decode_backend='{self.decode_backend}', " + f'to_xyz={self.to_xyz}, ' + f'to_float32={self.to_float32}, ' + f'backend_args={self.backend_args})') + return repr_str + + +@TRANSFORMS.register_module() +class LoadBiomedicalData(BaseTransform): + """Load an biomedical image and annotation from file. + + The loading data format is as the following: + + .. code-block:: python + + { + 'img': np.ndarray data[:-1, X, Y, Z] + 'seg_map': np.ndarray data[-1, X, Y, Z] + } + + + Required Keys: + + - img_path + + Added Keys: + + - img (np.ndarray): Biomedical image with shape (N, Z, Y, X) by default, + N is the number of modalities. + - gt_seg_map (np.ndarray, optional): Biomedical seg map with shape + (Z, Y, X) by default. + - img_shape + - ori_shape + + Args: + with_seg (bool): Whether to parse and load the semantic segmentation + annotation. Defaults to False. + decode_backend (str): The data decoding backend type. Options are + 'numpy'and 'nifti', and there is a convention that when backend is + 'nifti' the axis of data loaded is XYZ, and when backend is + 'numpy', the the axis is ZYX. The data will be transposed if the + backend is 'nifti'. Defaults to 'nifti'. + to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z. + Defaults to False. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + + def __init__(self, + with_seg=False, + decode_backend: str = 'numpy', + to_xyz: bool = False, + backend_args: Optional[dict] = None) -> None: # noqa + self.with_seg = with_seg + self.decode_backend = decode_backend + self.to_xyz = to_xyz + self.backend_args = backend_args.copy() if backend_args else None + + def transform(self, results: Dict) -> Dict: + """Functions to load image. + + Args: + results (dict): Result dict from :obj:``mmcv.BaseDataset``. + + Returns: + dict: The dict contains loaded image and meta information. + """ + data_bytes = fileio.get(results['img_path'], self.backend_args) + data = datafrombytes(data_bytes, backend=self.decode_backend) + # img is 4D data (N, X, Y, Z), N is the number of protocol + img = data[:-1, :] + + if self.decode_backend == 'nifti': + img = img.transpose(0, 3, 2, 1) + + if self.to_xyz: + img = img.transpose(0, 3, 2, 1) + + results['img'] = img + results['img_shape'] = img.shape[1:] + results['ori_shape'] = img.shape[1:] + + if self.with_seg: + gt_seg_map = data[-1, :] + if self.decode_backend == 'nifti': + gt_seg_map = gt_seg_map.transpose(2, 1, 0) + + if self.to_xyz: + gt_seg_map = gt_seg_map.transpose(2, 1, 0) + results['gt_seg_map'] = gt_seg_map + return results + + def __repr__(self) -> str: + repr_str = (f'{self.__class__.__name__}(' + f'with_seg={self.with_seg}, ' + f"decode_backend='{self.decode_backend}', " + f'to_xyz={self.to_xyz}, ' + f'backend_args={self.backend_args})') + return repr_str + + +@TRANSFORMS.register_module() +class InferencerLoader(BaseTransform): + """Load an image from ``results['img']``. + + Similar with :obj:`LoadImageFromFile`, but the image has been loaded as + :obj:`np.ndarray` in ``results['img']``. Can be used when loading image + from webcam. + + Required Keys: + + - img + + Modified Keys: + + - img + - img_path + - img_shape + - ori_shape + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + """ + + def __init__(self, **kwargs) -> None: + super().__init__() + self.from_file = TRANSFORMS.build( + dict(type='LoadImageFromFile', **kwargs)) + self.from_ndarray = TRANSFORMS.build( + dict(type='LoadImageFromNDArray', **kwargs)) + + def transform(self, single_input: Union[str, np.ndarray, dict]) -> dict: + """Transform function to add image meta information. + + Args: + results (dict): Result dict with Webcam read image in + ``results['img']``. + + Returns: + dict: The dict contains loaded image and meta information. + """ + if isinstance(single_input, str): + inputs = dict(img_path=single_input) + elif isinstance(single_input, np.ndarray): + inputs = dict(img=single_input) + elif isinstance(single_input, dict): + inputs = single_input + else: + raise NotImplementedError + + if 'img' in inputs: + return self.from_ndarray(inputs) + return self.from_file(inputs) + + +@TRANSFORMS.register_module() +class LoadSingleRSImageFromFile(BaseTransform): + """Load a Remote Sensing mage from file. + + Required Keys: + + - img_path + + Modified Keys: + + - img + - img_shape + - ori_shape + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is a float64 array. + Defaults to True. + """ + + def __init__(self, to_float32: bool = True): + self.to_float32 = to_float32 + + if gdal is None: + raise RuntimeError('gdal is not installed') + + def transform(self, results: Dict) -> Dict: + """Functions to load image. + + Args: + results (dict): Result dict from :obj:``mmcv.BaseDataset``. + + Returns: + dict: The dict contains loaded image and meta information. + """ + + filename = results['img_path'] + ds = gdal.Open(filename) + if ds is None: + raise Exception(f'Unable to open file: {filename}') + img = np.einsum('ijk->jki', ds.ReadAsArray()) + + if self.to_float32: + img = img.astype(np.float32) + + results['img'] = img + results['img_shape'] = img.shape[:2] + results['ori_shape'] = img.shape[:2] + return results + + def __repr__(self): + repr_str = (f'{self.__class__.__name__}(' + f'to_float32={self.to_float32})') + return repr_str + + +@TRANSFORMS.register_module() +class LoadMultipleRSImageFromFile(BaseTransform): + """Load two Remote Sensing mage from file. + + Required Keys: + + - img_path + - img_path2 + + Modified Keys: + + - img + - img2 + - img_shape + - ori_shape + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is a float64 array. + Defaults to True. + """ + + def __init__(self, to_float32: bool = True): + if gdal is None: + raise RuntimeError('gdal is not installed') + self.to_float32 = to_float32 + + def transform(self, results: Dict) -> Dict: + """Functions to load image. + + Args: + results (dict): Result dict from :obj:``mmcv.BaseDataset``. + + Returns: + dict: The dict contains loaded image and meta information. + """ + + filename = results['img_path'] + filename2 = results['img_path2'] + + ds = gdal.Open(filename) + ds2 = gdal.Open(filename2) + + if ds is None: + raise Exception(f'Unable to open file: {filename}') + if ds2 is None: + raise Exception(f'Unable to open file: {filename2}') + + img = np.einsum('ijk->jki', ds.ReadAsArray()) + img2 = np.einsum('ijk->jki', ds2.ReadAsArray()) + + if self.to_float32: + img = img.astype(np.float32) + img2 = img2.astype(np.float32) + + if img.shape != img2.shape: + raise Exception(f'Image shapes do not match:' + f' {img.shape} vs {img2.shape}') + + results['img'] = img + results['img2'] = img2 + results['img_shape'] = img.shape[:2] + results['ori_shape'] = img.shape[:2] + return results + + def __repr__(self): + repr_str = (f'{self.__class__.__name__}(' + f'to_float32={self.to_float32})') + return repr_str + + +@TRANSFORMS.register_module() +class LoadDepthAnnotation(BaseTransform): + """Load ``depth_map`` annotation provided by depth estimation dataset. + + The annotation format is as the following: + + .. code-block:: python + + { + 'gt_depth_map': np.ndarray [Y, X] + } + + Required Keys: + + - seg_depth_path + + Added Keys: + + - gt_depth_map (np.ndarray): Depth map with shape (Y, X) by + default, and data type is float32 if set to_float32 = True. + - depth_rescale_factor (float): The rescale factor of depth map, which + can be used to recover the original value of depth map. + + Args: + decode_backend (str): The data decoding backend type. Options are + 'numpy', 'nifti', and 'cv2'. Defaults to 'cv2'. + to_float32 (bool): Whether to convert the loaded depth map to a float32 + numpy array. If set to False, the loaded image is an uint16 array. + Defaults to True. + depth_rescale_factor (float): Factor to rescale the depth value to + limit the range. Defaults to 1.0. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See :class:`mmengine.fileio` for details. + Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + + def __init__(self, + decode_backend: str = 'cv2', + to_float32: bool = True, + depth_rescale_factor: float = 1.0, + backend_args: Optional[dict] = None) -> None: + super().__init__() + self.decode_backend = decode_backend + self.to_float32 = to_float32 + self.depth_rescale_factor = depth_rescale_factor + self.backend_args = backend_args.copy() if backend_args else None + + def transform(self, results: Dict) -> Dict: + """Functions to load depth map. + + Args: + results (dict): Result dict from :obj:``mmcv.BaseDataset``. + + Returns: + dict: The dict contains loaded depth map. + """ + data_bytes = fileio.get(results['depth_map_path'], self.backend_args) + gt_depth_map = datafrombytes(data_bytes, backend=self.decode_backend) + + if self.to_float32: + gt_depth_map = gt_depth_map.astype(np.float32) + + gt_depth_map *= self.depth_rescale_factor + results['gt_depth_map'] = gt_depth_map + results['seg_fields'].append('gt_depth_map') + results['depth_rescale_factor'] = self.depth_rescale_factor + return results + + def __repr__(self): + repr_str = (f'{self.__class__.__name__}(' + f"decode_backend='{self.decode_backend}', " + f'to_float32={self.to_float32}, ' + f'backend_args={self.backend_args})') + return repr_str diff --git a/mmseg/datasets/transforms/transforms.py b/mmseg/datasets/transforms/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..082ae5b4401dce3b90bab888bd754ee164094b88 --- /dev/null +++ b/mmseg/datasets/transforms/transforms.py @@ -0,0 +1,2514 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import inspect +import warnings +from typing import Dict, List, Optional, Sequence, Tuple, Union + +import cv2 +import mmcv +import mmengine +import numpy as np +from mmcv.transforms import RandomFlip as MMCV_RandomFlip +from mmcv.transforms import Resize as MMCV_Resize +from mmcv.transforms.base import BaseTransform +from mmcv.transforms.utils import cache_randomness +from mmengine.utils import is_tuple_of +from numpy import random +from scipy.ndimage import gaussian_filter + +from mmseg.datasets.dataset_wrappers import MultiImageMixDataset +from mmseg.registry import TRANSFORMS + +try: + import albumentations + from albumentations import Compose + ALBU_INSTALLED = True +except ImportError: + albumentations = None + Compose = None + ALBU_INSTALLED = False + + +@TRANSFORMS.register_module() +class ResizeToMultiple(BaseTransform): + """Resize images & seg to multiple of divisor. + + Required Keys: + + - img + - gt_seg_map + + Modified Keys: + + - img + - img_shape + - pad_shape + + Args: + size_divisor (int): images and gt seg maps need to resize to multiple + of size_divisor. Default: 32. + interpolation (str, optional): The interpolation mode of image resize. + Default: None + """ + + def __init__(self, size_divisor=32, interpolation=None): + self.size_divisor = size_divisor + self.interpolation = interpolation + + def transform(self, results: dict) -> dict: + """Call function to resize images, semantic segmentation map to + multiple of size divisor. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Resized results, 'img_shape', 'pad_shape' keys are updated. + """ + # Align image to multiple of size divisor. + img = results['img'] + img = mmcv.imresize_to_multiple( + img, + self.size_divisor, + scale_factor=1, + interpolation=self.interpolation + if self.interpolation else 'bilinear') + + results['img'] = img + results['img_shape'] = img.shape[:2] + results['pad_shape'] = img.shape[:2] + + # Align segmentation map to multiple of size divisor. + for key in results.get('seg_fields', []): + gt_seg = results[key] + gt_seg = mmcv.imresize_to_multiple( + gt_seg, + self.size_divisor, + scale_factor=1, + interpolation='nearest') + results[key] = gt_seg + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(size_divisor={self.size_divisor}, ' + f'interpolation={self.interpolation})') + return repr_str + + +@TRANSFORMS.register_module() +class Rerange(BaseTransform): + """Rerange the image pixel value. + + Required Keys: + + - img + + Modified Keys: + + - img + + Args: + min_value (float or int): Minimum value of the reranged image. + Default: 0. + max_value (float or int): Maximum value of the reranged image. + Default: 255. + """ + + def __init__(self, min_value=0, max_value=255): + assert isinstance(min_value, float) or isinstance(min_value, int) + assert isinstance(max_value, float) or isinstance(max_value, int) + assert min_value < max_value + self.min_value = min_value + self.max_value = max_value + + def transform(self, results: dict) -> dict: + """Call function to rerange images. + + Args: + results (dict): Result dict from loading pipeline. + Returns: + dict: Reranged results. + """ + + img = results['img'] + img_min_value = np.min(img) + img_max_value = np.max(img) + + assert img_min_value < img_max_value + # rerange to [0, 1] + img = (img - img_min_value) / (img_max_value - img_min_value) + # rerange to [min_value, max_value] + img = img * (self.max_value - self.min_value) + self.min_value + results['img'] = img + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(min_value={self.min_value}, max_value={self.max_value})' + return repr_str + + +@TRANSFORMS.register_module() +class CLAHE(BaseTransform): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Required Keys: + + - img + + Modified Keys: + + - img + + Args: + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + """ + + def __init__(self, clip_limit=40.0, tile_grid_size=(8, 8)): + assert isinstance(clip_limit, (float, int)) + self.clip_limit = clip_limit + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + self.tile_grid_size = tile_grid_size + + def transform(self, results: dict) -> dict: + """Call function to Use CLAHE method process images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + for i in range(results['img'].shape[2]): + results['img'][:, :, i] = mmcv.clahe( + np.array(results['img'][:, :, i], dtype=np.uint8), + self.clip_limit, self.tile_grid_size) + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(clip_limit={self.clip_limit}, ' \ + f'tile_grid_size={self.tile_grid_size})' + return repr_str + + +@TRANSFORMS.register_module() +class RandomCrop(BaseTransform): + """Random crop the image & seg. + + Required Keys: + + - img + - gt_seg_map + + Modified Keys: + + - img + - img_shape + - gt_seg_map + + + Args: + crop_size (Union[int, Tuple[int, int]]): Expected size after cropping + with the format of (h, w). If set to an integer, then cropping + width and height are equal to this integer. + cat_max_ratio (float): The maximum ratio that single category could + occupy. + ignore_index (int): The label index to be ignored. Default: 255 + """ + + def __init__(self, + crop_size: Union[int, Tuple[int, int]], + cat_max_ratio: float = 1., + ignore_index: int = 255): + super().__init__() + assert isinstance(crop_size, int) or ( + isinstance(crop_size, tuple) and len(crop_size) == 2 + ), 'The expected crop_size is an integer, or a tuple containing two ' + 'intergers' + + if isinstance(crop_size, int): + crop_size = (crop_size, crop_size) + assert crop_size[0] > 0 and crop_size[1] > 0 + self.crop_size = crop_size + self.cat_max_ratio = cat_max_ratio + self.ignore_index = ignore_index + + @cache_randomness + def crop_bbox(self, results: dict) -> tuple: + """get a crop bounding box. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + tuple: Coordinates of the cropped image. + """ + + def generate_crop_bbox(img: np.ndarray) -> tuple: + """Randomly get a crop bounding box. + + Args: + img (np.ndarray): Original input image. + + Returns: + tuple: Coordinates of the cropped image. + """ + + margin_h = max(img.shape[0] - self.crop_size[0], 0) + margin_w = max(img.shape[1] - self.crop_size[1], 0) + offset_h = np.random.randint(0, margin_h + 1) + offset_w = np.random.randint(0, margin_w + 1) + crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0] + crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1] + + return crop_y1, crop_y2, crop_x1, crop_x2 + + img = results['img'] + crop_bbox = generate_crop_bbox(img) + if self.cat_max_ratio < 1.: + # Repeat 10 times + for _ in range(10): + seg_temp = self.crop(results['gt_seg_map'], crop_bbox) + labels, cnt = np.unique(seg_temp, return_counts=True) + cnt = cnt[labels != self.ignore_index] + if len(cnt) > 1 and np.max(cnt) / np.sum( + cnt) < self.cat_max_ratio: + break + crop_bbox = generate_crop_bbox(img) + + return crop_bbox + + def crop(self, img: np.ndarray, crop_bbox: tuple) -> np.ndarray: + """Crop from ``img`` + + Args: + img (np.ndarray): Original input image. + crop_bbox (tuple): Coordinates of the cropped image. + + Returns: + np.ndarray: The cropped image. + """ + + crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox + img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] + return img + + def transform(self, results: dict) -> dict: + """Transform function to randomly crop images, semantic segmentation + maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Randomly cropped results, 'img_shape' key in result dict is + updated according to crop size. + """ + + img = results['img'] + crop_bbox = self.crop_bbox(results) + + # crop the image + img = self.crop(img, crop_bbox) + + # crop semantic seg + for key in results.get('seg_fields', []): + results[key] = self.crop(results[key], crop_bbox) + + results['img'] = img + results['img_shape'] = img.shape[:2] + return results + + def __repr__(self): + return self.__class__.__name__ + f'(crop_size={self.crop_size})' + + +@TRANSFORMS.register_module() +class RandomRotate(BaseTransform): + """Rotate the image & seg. + + Required Keys: + + - img + - gt_seg_map + + Modified Keys: + + - img + - gt_seg_map + + Args: + prob (float): The rotation probability. + degree (float, tuple[float]): Range of degrees to select from. If + degree is a number instead of tuple like (min, max), + the range of degree will be (``-degree``, ``+degree``) + pad_val (float, optional): Padding value of image. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. Default: None. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. Default: False + """ + + def __init__(self, + prob, + degree, + pad_val=0, + seg_pad_val=255, + center=None, + auto_bound=False): + self.prob = prob + assert prob >= 0 and prob <= 1 + if isinstance(degree, (float, int)): + assert degree > 0, f'degree {degree} should be positive' + self.degree = (-degree, degree) + else: + self.degree = degree + assert len(self.degree) == 2, f'degree {self.degree} should be a ' \ + f'tuple of (min, max)' + self.pal_val = pad_val + self.seg_pad_val = seg_pad_val + self.center = center + self.auto_bound = auto_bound + + @cache_randomness + def generate_degree(self): + return np.random.rand() < self.prob, np.random.uniform( + min(*self.degree), max(*self.degree)) + + def transform(self, results: dict) -> dict: + """Call function to rotate image, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Rotated results. + """ + + rotate, degree = self.generate_degree() + if rotate: + # rotate image + results['img'] = mmcv.imrotate( + results['img'], + angle=degree, + border_value=self.pal_val, + center=self.center, + auto_bound=self.auto_bound) + + # rotate segs + for key in results.get('seg_fields', []): + results[key] = mmcv.imrotate( + results[key], + angle=degree, + border_value=self.seg_pad_val, + center=self.center, + auto_bound=self.auto_bound, + interpolation='nearest') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' \ + f'degree={self.degree}, ' \ + f'pad_val={self.pal_val}, ' \ + f'seg_pad_val={self.seg_pad_val}, ' \ + f'center={self.center}, ' \ + f'auto_bound={self.auto_bound})' + return repr_str + + +@TRANSFORMS.register_module() +class RGB2Gray(BaseTransform): + """Convert RGB image to grayscale image. + + Required Keys: + + - img + + Modified Keys: + + - img + - img_shape + + This transform calculate the weighted mean of input image channels with + ``weights`` and then expand the channels to ``out_channels``. When + ``out_channels`` is None, the number of output channels is the same as + input channels. + + Args: + out_channels (int): Expected number of output channels after + transforming. Default: None. + weights (tuple[float]): The weights to calculate the weighted mean. + Default: (0.299, 0.587, 0.114). + """ + + def __init__(self, out_channels=None, weights=(0.299, 0.587, 0.114)): + assert out_channels is None or out_channels > 0 + self.out_channels = out_channels + assert isinstance(weights, tuple) + for item in weights: + assert isinstance(item, (float, int)) + self.weights = weights + + def transform(self, results: dict) -> dict: + """Call function to convert RGB image to grayscale image. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with grayscale image. + """ + img = results['img'] + assert len(img.shape) == 3 + assert img.shape[2] == len(self.weights) + weights = np.array(self.weights).reshape((1, 1, -1)) + img = (img * weights).sum(2, keepdims=True) + if self.out_channels is None: + img = img.repeat(weights.shape[2], axis=2) + else: + img = img.repeat(self.out_channels, axis=2) + + results['img'] = img + results['img_shape'] = img.shape + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(out_channels={self.out_channels}, ' \ + f'weights={self.weights})' + return repr_str + + +@TRANSFORMS.register_module() +class AdjustGamma(BaseTransform): + """Using gamma correction to process the image. + + Required Keys: + + - img + + Modified Keys: + + - img + + Args: + gamma (float or int): Gamma value used in gamma correction. + Default: 1.0. + """ + + def __init__(self, gamma=1.0): + assert isinstance(gamma, float) or isinstance(gamma, int) + assert gamma > 0 + self.gamma = gamma + inv_gamma = 1.0 / gamma + self.table = np.array([(i / 255.0)**inv_gamma * 255 + for i in np.arange(256)]).astype('uint8') + + def transform(self, results: dict) -> dict: + """Call function to process the image with gamma correction. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + results['img'] = mmcv.lut_transform( + np.array(results['img'], dtype=np.uint8), self.table) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(gamma={self.gamma})' + + +@TRANSFORMS.register_module() +class SegRescale(BaseTransform): + """Rescale semantic segmentation maps. + + Required Keys: + + - gt_seg_map + + Modified Keys: + + - gt_seg_map + + Args: + scale_factor (float): The scale factor of the final output. + """ + + def __init__(self, scale_factor=1): + self.scale_factor = scale_factor + + def transform(self, results: dict) -> dict: + """Call function to scale the semantic segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with semantic segmentation map scaled. + """ + for key in results.get('seg_fields', []): + if self.scale_factor != 1: + results[key] = mmcv.imrescale( + results[key], self.scale_factor, interpolation='nearest') + return results + + def __repr__(self): + return self.__class__.__name__ + f'(scale_factor={self.scale_factor})' + + +@TRANSFORMS.register_module() +class PhotoMetricDistortion(BaseTransform): + """Apply photometric distortion to image sequentially, every transformation + is applied with a probability of 0.5. The position of random contrast is in + second or second to last. + + 1. random brightness + 2. random contrast (mode 0) + 3. convert color from BGR to HSV + 4. random saturation + 5. random hue + 6. convert color from HSV to BGR + 7. random contrast (mode 1) + + Required Keys: + + - img + + Modified Keys: + + - img + + Args: + brightness_delta (int): delta of brightness. + contrast_range (tuple): range of contrast. + saturation_range (tuple): range of saturation. + hue_delta (int): delta of hue. + """ + + def __init__(self, + brightness_delta: int = 32, + contrast_range: Sequence[float] = (0.5, 1.5), + saturation_range: Sequence[float] = (0.5, 1.5), + hue_delta: int = 18): + self.brightness_delta = brightness_delta + self.contrast_lower, self.contrast_upper = contrast_range + self.saturation_lower, self.saturation_upper = saturation_range + self.hue_delta = hue_delta + + def convert(self, + img: np.ndarray, + alpha: int = 1, + beta: int = 0) -> np.ndarray: + """Multiple with alpha and add beat with clip. + + Args: + img (np.ndarray): The input image. + alpha (int): Image weights, change the contrast/saturation + of the image. Default: 1 + beta (int): Image bias, change the brightness of the + image. Default: 0 + + Returns: + np.ndarray: The transformed image. + """ + + img = img.astype(np.float32) * alpha + beta + img = np.clip(img, 0, 255) + return img.astype(np.uint8) + + def brightness(self, img: np.ndarray) -> np.ndarray: + """Brightness distortion. + + Args: + img (np.ndarray): The input image. + Returns: + np.ndarray: Image after brightness change. + """ + + if random.randint(2): + return self.convert( + img, + beta=random.uniform(-self.brightness_delta, + self.brightness_delta)) + return img + + def contrast(self, img: np.ndarray) -> np.ndarray: + """Contrast distortion. + + Args: + img (np.ndarray): The input image. + Returns: + np.ndarray: Image after contrast change. + """ + + if random.randint(2): + return self.convert( + img, + alpha=random.uniform(self.contrast_lower, self.contrast_upper)) + return img + + def saturation(self, img: np.ndarray) -> np.ndarray: + """Saturation distortion. + + Args: + img (np.ndarray): The input image. + Returns: + np.ndarray: Image after saturation change. + """ + + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, 1] = self.convert( + img[:, :, 1], + alpha=random.uniform(self.saturation_lower, + self.saturation_upper)) + img = mmcv.hsv2bgr(img) + return img + + def hue(self, img: np.ndarray) -> np.ndarray: + """Hue distortion. + + Args: + img (np.ndarray): The input image. + Returns: + np.ndarray: Image after hue change. + """ + + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, + 0] = (img[:, :, 0].astype(int) + + random.randint(-self.hue_delta, self.hue_delta)) % 180 + img = mmcv.hsv2bgr(img) + return img + + def transform(self, results: dict) -> dict: + """Transform function to perform photometric distortion on images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with images distorted. + """ + + img = results['img'] + # random brightness + img = self.brightness(img) + + # mode == 0 --> do random contrast first + # mode == 1 --> do random contrast last + mode = random.randint(2) + if mode == 1: + img = self.contrast(img) + + # random saturation + img = self.saturation(img) + + # random hue + img = self.hue(img) + + # random contrast + if mode == 0: + img = self.contrast(img) + + results['img'] = img + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(brightness_delta={self.brightness_delta}, ' + f'contrast_range=({self.contrast_lower}, ' + f'{self.contrast_upper}), ' + f'saturation_range=({self.saturation_lower}, ' + f'{self.saturation_upper}), ' + f'hue_delta={self.hue_delta})') + return repr_str + + +@TRANSFORMS.register_module() +class RandomCutOut(BaseTransform): + """CutOut operation. + + Randomly drop some regions of image used in + `Cutout `_. + + Required Keys: + + - img + - gt_seg_map + + Modified Keys: + + - img + - gt_seg_map + + Args: + prob (float): cutout probability. + n_holes (int | tuple[int, int]): Number of regions to be dropped. + If it is given as a list, number of holes will be randomly + selected from the closed interval [`n_holes[0]`, `n_holes[1]`]. + cutout_shape (tuple[int, int] | list[tuple[int, int]]): The candidate + shape of dropped regions. It can be `tuple[int, int]` to use a + fixed cutout shape, or `list[tuple[int, int]]` to randomly choose + shape from the list. + cutout_ratio (tuple[float, float] | list[tuple[float, float]]): The + candidate ratio of dropped regions. It can be `tuple[float, float]` + to use a fixed ratio or `list[tuple[float, float]]` to randomly + choose ratio from the list. Please note that `cutout_shape` + and `cutout_ratio` cannot be both given at the same time. + fill_in (tuple[float, float, float] | tuple[int, int, int]): The value + of pixel to fill in the dropped regions. Default: (0, 0, 0). + seg_fill_in (int): The labels of pixel to fill in the dropped regions. + If seg_fill_in is None, skip. Default: None. + """ + + def __init__(self, + prob, + n_holes, + cutout_shape=None, + cutout_ratio=None, + fill_in=(0, 0, 0), + seg_fill_in=None): + + assert 0 <= prob and prob <= 1 + assert (cutout_shape is None) ^ (cutout_ratio is None), \ + 'Either cutout_shape or cutout_ratio should be specified.' + assert (isinstance(cutout_shape, (list, tuple)) + or isinstance(cutout_ratio, (list, tuple))) + if isinstance(n_holes, tuple): + assert len(n_holes) == 2 and 0 <= n_holes[0] < n_holes[1] + else: + n_holes = (n_holes, n_holes) + if seg_fill_in is not None: + assert (isinstance(seg_fill_in, int) and 0 <= seg_fill_in + and seg_fill_in <= 255) + self.prob = prob + self.n_holes = n_holes + self.fill_in = fill_in + self.seg_fill_in = seg_fill_in + self.with_ratio = cutout_ratio is not None + self.candidates = cutout_ratio if self.with_ratio else cutout_shape + if not isinstance(self.candidates, list): + self.candidates = [self.candidates] + + @cache_randomness + def do_cutout(self): + return np.random.rand() < self.prob + + @cache_randomness + def generate_patches(self, results): + cutout = self.do_cutout() + + h, w, _ = results['img'].shape + if cutout: + n_holes = np.random.randint(self.n_holes[0], self.n_holes[1] + 1) + else: + n_holes = 0 + x1_lst = [] + y1_lst = [] + index_lst = [] + for _ in range(n_holes): + x1_lst.append(np.random.randint(0, w)) + y1_lst.append(np.random.randint(0, h)) + index_lst.append(np.random.randint(0, len(self.candidates))) + return cutout, n_holes, x1_lst, y1_lst, index_lst + + def transform(self, results: dict) -> dict: + """Call function to drop some regions of image.""" + cutout, n_holes, x1_lst, y1_lst, index_lst = self.generate_patches( + results) + if cutout: + h, w, c = results['img'].shape + for i in range(n_holes): + x1 = x1_lst[i] + y1 = y1_lst[i] + index = index_lst[i] + if not self.with_ratio: + cutout_w, cutout_h = self.candidates[index] + else: + cutout_w = int(self.candidates[index][0] * w) + cutout_h = int(self.candidates[index][1] * h) + + x2 = np.clip(x1 + cutout_w, 0, w) + y2 = np.clip(y1 + cutout_h, 0, h) + results['img'][y1:y2, x1:x2, :] = self.fill_in + + if self.seg_fill_in is not None: + for key in results.get('seg_fields', []): + results[key][y1:y2, x1:x2] = self.seg_fill_in + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' + repr_str += f'n_holes={self.n_holes}, ' + repr_str += (f'cutout_ratio={self.candidates}, ' if self.with_ratio + else f'cutout_shape={self.candidates}, ') + repr_str += f'fill_in={self.fill_in}, ' + repr_str += f'seg_fill_in={self.seg_fill_in})' + return repr_str + + +@TRANSFORMS.register_module() +class RandomRotFlip(BaseTransform): + """Rotate and flip the image & seg or just rotate the image & seg. + + Required Keys: + + - img + - gt_seg_map + + Modified Keys: + + - img + - gt_seg_map + + Args: + rotate_prob (float): The probability of rotate image. + flip_prob (float): The probability of rotate&flip image. + degree (float, tuple[float]): Range of degrees to select from. If + degree is a number instead of tuple like (min, max), + the range of degree will be (``-degree``, ``+degree``) + """ + + def __init__(self, rotate_prob=0.5, flip_prob=0.5, degree=(-20, 20)): + self.rotate_prob = rotate_prob + self.flip_prob = flip_prob + assert 0 <= rotate_prob <= 1 and 0 <= flip_prob <= 1 + if isinstance(degree, (float, int)): + assert degree > 0, f'degree {degree} should be positive' + self.degree = (-degree, degree) + else: + self.degree = degree + assert len(self.degree) == 2, f'degree {self.degree} should be a ' \ + f'tuple of (min, max)' + + def random_rot_flip(self, results: dict) -> dict: + k = np.random.randint(0, 4) + results['img'] = np.rot90(results['img'], k) + for key in results.get('seg_fields', []): + results[key] = np.rot90(results[key], k) + axis = np.random.randint(0, 2) + results['img'] = np.flip(results['img'], axis=axis).copy() + for key in results.get('seg_fields', []): + results[key] = np.flip(results[key], axis=axis).copy() + return results + + def random_rotate(self, results: dict) -> dict: + angle = np.random.uniform(min(*self.degree), max(*self.degree)) + results['img'] = mmcv.imrotate(results['img'], angle=angle) + for key in results.get('seg_fields', []): + results[key] = mmcv.imrotate(results[key], angle=angle) + return results + + def transform(self, results: dict) -> dict: + """Call function to rotate or rotate & flip image, semantic + segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Rotated or rotated & flipped results. + """ + rotate_flag = 0 + if random.random() < self.rotate_prob: + results = self.random_rotate(results) + rotate_flag = 1 + if random.random() < self.flip_prob and rotate_flag == 0: + results = self.random_rot_flip(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(rotate_prob={self.rotate_prob}, ' \ + f'flip_prob={self.flip_prob}, ' \ + f'degree={self.degree})' + return repr_str + + +@TRANSFORMS.register_module() +class RandomFlip(MMCV_RandomFlip): + """Flip the image & bbox & segmentation map. Added or Updated + keys: flip, flip_direction, img, gt_bboxes, gt_seg_map, and gt_depth_map. + There are 3 flip modes: + + - ``prob`` is float, ``direction`` is string: the image will be + ``direction``ly flipped with probability of ``prob`` . + E.g., ``prob=0.5``, ``direction='horizontal'``, + then image will be horizontally flipped with probability of 0.5. + + - ``prob`` is float, ``direction`` is list of string: the image will + be ``direction[i]``ly flipped with probability of + ``prob/len(direction)``. + E.g., ``prob=0.5``, ``direction=['horizontal', 'vertical']``, + then image will be horizontally flipped with probability of 0.25, + vertically with probability of 0.25. + + - ``prob`` is list of float, ``direction`` is list of string: + given ``len(prob) == len(direction)``, the image will + be ``direction[i]``ly flipped with probability of ``prob[i]``. + E.g., ``prob=[0.3, 0.5]``, ``direction=['horizontal', + 'vertical']``, then image will be horizontally flipped with + probability of 0.3, vertically with probability of 0.5. + + Required Keys: + + - img + - gt_bboxes (optional) + - gt_seg_map (optional) + - gt_depth_map (optional) + + Modified Keys: + + - img + - gt_bboxes (optional) + - gt_seg_map (optional) + - gt_depth_map (optional) + + Added Keys: + + - flip + - flip_direction + - swap_seg_labels (optional) + + Args: + prob (float | list[float], optional): The flipping probability. + Defaults to None. + direction(str | list[str]): The flipping direction. Options + If input is a list, the length must equal ``prob``. Each + element in ``prob`` indicates the flip probability of + corresponding direction. Defaults to 'horizontal'. + swap_seg_labels (list, optional): The label pair need to be swapped + for ground truth, like 'left arm' and 'right arm' need to be + swapped after horizontal flipping. For example, ``[(1, 5)]``, + where 1/5 is the label of the left/right arm. Defaults to None. + """ + + def _flip(self, results: dict) -> None: + """Flip images, bounding boxes and semantic segmentation map.""" + # flip image + results['img'] = mmcv.imflip( + results['img'], direction=results['flip_direction']) + + img_shape = results['img'].shape[:2] + + # flip bboxes + if results.get('gt_bboxes', None) is not None: + results['gt_bboxes'] = self._flip_bbox(results['gt_bboxes'], + img_shape, + results['flip_direction']) + + # flip seg map + for key in results.get('seg_fields', []): + if results.get(key, None) is not None: + results[key] = self._flip_seg_map( + results[key], direction=results['flip_direction']).copy() + results['swap_seg_labels'] = self.swap_seg_labels + + +@TRANSFORMS.register_module() +class Resize(MMCV_Resize): + """Resize images & seg & depth map. + + This transform resizes the input image according to ``scale`` or + ``scale_factor``. Seg map, depth map and other relative annotations are + then resized with the same scale factor. + if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to + resize. + + Required Keys: + + - img + - gt_seg_map (optional) + - gt_depth_map (optional) + + Modified Keys: + + - img + - gt_seg_map + - gt_depth_map + + Added Keys: + + - scale + - scale_factor + - keep_ratio + + Args: + scale (int or tuple): Images scales for resizing. Defaults to None + scale_factor (float or tuple[float]): Scale factors for resizing. + Defaults to None. + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. Defaults to False. + clip_object_border (bool): Whether to clip the objects + outside the border of the image. In some dataset like MOT17, the gt + bboxes are allowed to cross the border of images. Therefore, we + don't need to clip the gt bboxes in these cases. Defaults to True. + backend (str): Image resize backend, choices are 'cv2' and 'pillow'. + These two backends generates slightly different results. Defaults + to 'cv2'. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. Defaults + to 'bilinear'. + """ + + def _resize_seg(self, results: dict) -> None: + """Resize semantic segmentation map with ``results['scale']``.""" + for seg_key in results.get('seg_fields', []): + if results.get(seg_key, None) is not None: + if self.keep_ratio: + gt_seg = mmcv.imrescale( + results[seg_key], + results['scale'], + interpolation='nearest', + backend=self.backend) + else: + gt_seg = mmcv.imresize( + results[seg_key], + results['scale'], + interpolation='nearest', + backend=self.backend) + results[seg_key] = gt_seg + + +@TRANSFORMS.register_module() +class RandomMosaic(BaseTransform): + """Mosaic augmentation. Given 4 images, mosaic transform combines them into + one output image. The output image is composed of the parts from each sub- + image. + + .. code:: text + + mosaic transform + center_x + +------------------------------+ + | pad | pad | + | +-----------+ | + | | | | + | | image1 |--------+ | + | | | | | + | | | image2 | | + center_y |----+-------------+-----------| + | | cropped | | + |pad | image3 | image4 | + | | | | + +----|-------------+-----------+ + | | + +-------------+ + + The mosaic transform steps are as follows: + 1. Choose the mosaic center as the intersections of 4 images + 2. Get the left top image according to the index, and randomly + sample another 3 images from the custom dataset. + 3. Sub image will be cropped if image is larger than mosaic patch + + Required Keys: + + - img + - gt_seg_map + - mix_results + + Modified Keys: + + - img + - img_shape + - ori_shape + - gt_seg_map + + Args: + prob (float): mosaic probability. + img_scale (Sequence[int]): Image size after mosaic pipeline of + a single image. The size of the output image is four times + that of a single image. The output image comprises 4 single images. + Default: (640, 640). + center_ratio_range (Sequence[float]): Center ratio range of mosaic + output. Default: (0.5, 1.5). + pad_val (int): Pad value. Default: 0. + seg_pad_val (int): Pad value of segmentation map. Default: 255. + """ + + def __init__(self, + prob, + img_scale=(640, 640), + center_ratio_range=(0.5, 1.5), + pad_val=0, + seg_pad_val=255): + assert 0 <= prob and prob <= 1 + assert isinstance(img_scale, tuple) + self.prob = prob + self.img_scale = img_scale + self.center_ratio_range = center_ratio_range + self.pad_val = pad_val + self.seg_pad_val = seg_pad_val + + @cache_randomness + def do_mosaic(self): + return np.random.rand() < self.prob + + def transform(self, results: dict) -> dict: + """Call function to make a mosaic of image. + + Args: + results (dict): Result dict. + + Returns: + dict: Result dict with mosaic transformed. + """ + mosaic = self.do_mosaic() + if mosaic: + results = self._mosaic_transform_img(results) + results = self._mosaic_transform_seg(results) + return results + + def get_indices(self, dataset: MultiImageMixDataset) -> list: + """Call function to collect indices. + + Args: + dataset (:obj:`MultiImageMixDataset`): The dataset. + + Returns: + list: indices. + """ + + indices = [random.randint(0, len(dataset)) for _ in range(3)] + return indices + + @cache_randomness + def generate_mosaic_center(self): + # mosaic center x, y + center_x = int( + random.uniform(*self.center_ratio_range) * self.img_scale[1]) + center_y = int( + random.uniform(*self.center_ratio_range) * self.img_scale[0]) + return center_x, center_y + + def _mosaic_transform_img(self, results: dict) -> dict: + """Mosaic transform function. + + Args: + results (dict): Result dict. + + Returns: + dict: Updated result dict. + """ + + assert 'mix_results' in results + if len(results['img'].shape) == 3: + c = results['img'].shape[2] + mosaic_img = np.full( + (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2), c), + self.pad_val, + dtype=results['img'].dtype) + else: + mosaic_img = np.full( + (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)), + self.pad_val, + dtype=results['img'].dtype) + + # mosaic center x, y + self.center_x, self.center_y = self.generate_mosaic_center() + center_position = (self.center_x, self.center_y) + + loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right') + for i, loc in enumerate(loc_strs): + if loc == 'top_left': + result_patch = copy.deepcopy(results) + else: + result_patch = copy.deepcopy(results['mix_results'][i - 1]) + + img_i = result_patch['img'] + h_i, w_i = img_i.shape[:2] + # keep_ratio resize + scale_ratio_i = min(self.img_scale[0] / h_i, + self.img_scale[1] / w_i) + img_i = mmcv.imresize( + img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i))) + + # compute the combine parameters + paste_coord, crop_coord = self._mosaic_combine( + loc, center_position, img_i.shape[:2][::-1]) + x1_p, y1_p, x2_p, y2_p = paste_coord + x1_c, y1_c, x2_c, y2_c = crop_coord + + # crop and paste image + mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c] + + results['img'] = mosaic_img + results['img_shape'] = mosaic_img.shape + results['ori_shape'] = mosaic_img.shape + + return results + + def _mosaic_transform_seg(self, results: dict) -> dict: + """Mosaic transform function for label annotations. + + Args: + results (dict): Result dict. + + Returns: + dict: Updated result dict. + """ + + assert 'mix_results' in results + for key in results.get('seg_fields', []): + mosaic_seg = np.full( + (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)), + self.seg_pad_val, + dtype=results[key].dtype) + + # mosaic center x, y + center_position = (self.center_x, self.center_y) + + loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right') + for i, loc in enumerate(loc_strs): + if loc == 'top_left': + result_patch = copy.deepcopy(results) + else: + result_patch = copy.deepcopy(results['mix_results'][i - 1]) + + gt_seg_i = result_patch[key] + h_i, w_i = gt_seg_i.shape[:2] + # keep_ratio resize + scale_ratio_i = min(self.img_scale[0] / h_i, + self.img_scale[1] / w_i) + gt_seg_i = mmcv.imresize( + gt_seg_i, + (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)), + interpolation='nearest') + + # compute the combine parameters + paste_coord, crop_coord = self._mosaic_combine( + loc, center_position, gt_seg_i.shape[:2][::-1]) + x1_p, y1_p, x2_p, y2_p = paste_coord + x1_c, y1_c, x2_c, y2_c = crop_coord + + # crop and paste image + mosaic_seg[y1_p:y2_p, x1_p:x2_p] = \ + gt_seg_i[y1_c:y2_c, x1_c:x2_c] + + results[key] = mosaic_seg + + return results + + def _mosaic_combine(self, loc: str, center_position_xy: Sequence[float], + img_shape_wh: Sequence[int]) -> tuple: + """Calculate global coordinate of mosaic image and local coordinate of + cropped sub-image. + + Args: + loc (str): Index for the sub-image, loc in ('top_left', + 'top_right', 'bottom_left', 'bottom_right'). + center_position_xy (Sequence[float]): Mixing center for 4 images, + (x, y). + img_shape_wh (Sequence[int]): Width and height of sub-image + + Returns: + tuple[tuple[float]]: Corresponding coordinate of pasting and + cropping + - paste_coord (tuple): paste corner coordinate in mosaic image. + - crop_coord (tuple): crop corner coordinate in mosaic image. + """ + + assert loc in ('top_left', 'top_right', 'bottom_left', 'bottom_right') + if loc == 'top_left': + # index0 to top left part of image + x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \ + max(center_position_xy[1] - img_shape_wh[1], 0), \ + center_position_xy[0], \ + center_position_xy[1] + crop_coord = img_shape_wh[0] - (x2 - x1), img_shape_wh[1] - ( + y2 - y1), img_shape_wh[0], img_shape_wh[1] + + elif loc == 'top_right': + # index1 to top right part of image + x1, y1, x2, y2 = center_position_xy[0], \ + max(center_position_xy[1] - img_shape_wh[1], 0), \ + min(center_position_xy[0] + img_shape_wh[0], + self.img_scale[1] * 2), \ + center_position_xy[1] + crop_coord = 0, img_shape_wh[1] - (y2 - y1), min( + img_shape_wh[0], x2 - x1), img_shape_wh[1] + + elif loc == 'bottom_left': + # index2 to bottom left part of image + x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \ + center_position_xy[1], \ + center_position_xy[0], \ + min(self.img_scale[0] * 2, center_position_xy[1] + + img_shape_wh[1]) + crop_coord = img_shape_wh[0] - (x2 - x1), 0, img_shape_wh[0], min( + y2 - y1, img_shape_wh[1]) + + else: + # index3 to bottom right part of image + x1, y1, x2, y2 = center_position_xy[0], \ + center_position_xy[1], \ + min(center_position_xy[0] + img_shape_wh[0], + self.img_scale[1] * 2), \ + min(self.img_scale[0] * 2, center_position_xy[1] + + img_shape_wh[1]) + crop_coord = 0, 0, min(img_shape_wh[0], + x2 - x1), min(y2 - y1, img_shape_wh[1]) + + paste_coord = x1, y1, x2, y2 + return paste_coord, crop_coord + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' + repr_str += f'img_scale={self.img_scale}, ' + repr_str += f'center_ratio_range={self.center_ratio_range}, ' + repr_str += f'pad_val={self.pad_val}, ' + repr_str += f'seg_pad_val={self.pad_val})' + return repr_str + + +@TRANSFORMS.register_module() +class GenerateEdge(BaseTransform): + """Generate Edge for CE2P approach. + + Edge will be used to calculate loss of + `CE2P `_. + + Modified from https://github.com/liutinglt/CE2P/blob/master/dataset/target_generation.py # noqa:E501 + + Required Keys: + + - img_shape + - gt_seg_map + + Added Keys: + - gt_edge_map (np.ndarray, uint8): The edge annotation generated from the + seg map by extracting border between different semantics. + + Args: + edge_width (int): The width of edge. Default to 3. + ignore_index (int): Index that will be ignored. Default to 255. + """ + + def __init__(self, edge_width: int = 3, ignore_index: int = 255) -> None: + super().__init__() + self.edge_width = edge_width + self.ignore_index = ignore_index + + def transform(self, results: Dict) -> Dict: + """Call function to generate edge from segmentation map. + + Args: + results (dict): Result dict. + + Returns: + dict: Result dict with edge mask. + """ + h, w = results['img_shape'] + edge = np.zeros((h, w), dtype=np.uint8) + seg_map = results['gt_seg_map'] + + # down + edge_down = edge[1:h, :] + edge_down[(seg_map[1:h, :] != seg_map[:h - 1, :]) + & (seg_map[1:h, :] != self.ignore_index) & + (seg_map[:h - 1, :] != self.ignore_index)] = 1 + # left + edge_left = edge[:, :w - 1] + edge_left[(seg_map[:, :w - 1] != seg_map[:, 1:w]) + & (seg_map[:, :w - 1] != self.ignore_index) & + (seg_map[:, 1:w] != self.ignore_index)] = 1 + # up_left + edge_upleft = edge[:h - 1, :w - 1] + edge_upleft[(seg_map[:h - 1, :w - 1] != seg_map[1:h, 1:w]) + & (seg_map[:h - 1, :w - 1] != self.ignore_index) & + (seg_map[1:h, 1:w] != self.ignore_index)] = 1 + # up_right + edge_upright = edge[:h - 1, 1:w] + edge_upright[(seg_map[:h - 1, 1:w] != seg_map[1:h, :w - 1]) + & (seg_map[:h - 1, 1:w] != self.ignore_index) & + (seg_map[1:h, :w - 1] != self.ignore_index)] = 1 + + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, + (self.edge_width, self.edge_width)) + edge = cv2.dilate(edge, kernel) + + results['gt_edge_map'] = edge + results['edge_width'] = self.edge_width + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'edge_width={self.edge_width}, ' + repr_str += f'ignore_index={self.ignore_index})' + return repr_str + + +@TRANSFORMS.register_module() +class ResizeShortestEdge(BaseTransform): + """Resize the image and mask while keeping the aspect ratio unchanged. + + Modified from https://github.com/facebookresearch/detectron2/blob/main/detectron2/data/transforms/augmentation_impl.py#L130 # noqa:E501 + Copyright (c) Facebook, Inc. and its affiliates. + Licensed under the Apache-2.0 License + + This transform attempts to scale the shorter edge to the given + `scale`, as long as the longer edge does not exceed `max_size`. + If `max_size` is reached, then downscale so that the longer + edge does not exceed `max_size`. + + Required Keys: + + - img + - gt_seg_map (optional) + + Modified Keys: + + - img + - img_shape + - gt_seg_map (optional)) + + Added Keys: + + - scale + - scale_factor + - keep_ratio + + + Args: + scale (Union[int, Tuple[int, int]]): The target short edge length. + If it's tuple, will select the min value as the short edge length. + max_size (int): The maximum allowed longest edge length. + """ + + def __init__(self, scale: Union[int, Tuple[int, int]], + max_size: int) -> None: + super().__init__() + self.scale = scale + self.max_size = max_size + + # Create a empty Resize object + self.resize = TRANSFORMS.build({ + 'type': 'Resize', + 'scale': 0, + 'keep_ratio': True + }) + + def _get_output_shape(self, img, short_edge_length) -> Tuple[int, int]: + """Compute the target image shape with the given `short_edge_length`. + + Args: + img (np.ndarray): The input image. + short_edge_length (Union[int, Tuple[int, int]]): The target short + edge length. If it's tuple, will select the min value as the + short edge length. + """ + h, w = img.shape[:2] + if isinstance(short_edge_length, int): + size = short_edge_length * 1.0 + elif isinstance(short_edge_length, tuple): + size = min(short_edge_length) * 1.0 + scale = size / min(h, w) + if h < w: + new_h, new_w = size, scale * w + else: + new_h, new_w = scale * h, size + + if max(new_h, new_w) > self.max_size: + scale = self.max_size * 1.0 / max(new_h, new_w) + new_h *= scale + new_w *= scale + + new_h = int(new_h + 0.5) + new_w = int(new_w + 0.5) + return (new_w, new_h) + + def transform(self, results: Dict) -> Dict: + self.resize.scale = self._get_output_shape(results['img'], self.scale) + return self.resize(results) + + +@TRANSFORMS.register_module() +class BioMedical3DRandomCrop(BaseTransform): + """Crop the input patch for medical image & segmentation mask. + + Required Keys: + + - img (np.ndarray): Biomedical image with shape (N, Z, Y, X), + N is the number of modalities, and data type is float32. + - gt_seg_map (np.ndarray, optional): Biomedical semantic segmentation mask + with shape (Z, Y, X). + + Modified Keys: + + - img + - img_shape + - gt_seg_map (optional) + + Args: + crop_shape (Union[int, Tuple[int, int, int]]): Expected size after + cropping with the format of (z, y, x). If set to an integer, + then cropping width and height are equal to this integer. + keep_foreground (bool): If keep_foreground is True, it will sample a + voxel of foreground classes randomly, and will take it as the + center of the crop bounding-box. Default to True. + """ + + def __init__(self, + crop_shape: Union[int, Tuple[int, int, int]], + keep_foreground: bool = True): + super().__init__() + assert isinstance(crop_shape, int) or ( + isinstance(crop_shape, tuple) and len(crop_shape) == 3 + ), 'The expected crop_shape is an integer, or a tuple containing ' + 'three integers' + + if isinstance(crop_shape, int): + crop_shape = (crop_shape, crop_shape, crop_shape) + assert crop_shape[0] > 0 and crop_shape[1] > 0 and crop_shape[2] > 0 + self.crop_shape = crop_shape + self.keep_foreground = keep_foreground + + def random_sample_location(self, seg_map: np.ndarray) -> dict: + """sample foreground voxel when keep_foreground is True. + + Args: + seg_map (np.ndarray): gt seg map. + + Returns: + dict: Coordinates of selected foreground voxel. + """ + num_samples = 10000 + # at least 1% of the class voxels need to be selected, + # otherwise it may be too sparse + min_percent_coverage = 0.01 + class_locs = {} + foreground_classes = [] + all_classes = np.unique(seg_map) + for c in all_classes: + if c == 0: + # to avoid the segmentation mask full of background 0 + # and the class_locs is just void dictionary {} when it return + # there add a void list for background 0. + class_locs[c] = [] + else: + all_locs = np.argwhere(seg_map == c) + target_num_samples = min(num_samples, len(all_locs)) + target_num_samples = max( + target_num_samples, + int(np.ceil(len(all_locs) * min_percent_coverage))) + + selected = all_locs[np.random.choice( + len(all_locs), target_num_samples, replace=False)] + class_locs[c] = selected + foreground_classes.append(c) + + selected_voxel = None + if len(foreground_classes) > 0: + selected_class = np.random.choice(foreground_classes) + voxels_of_that_class = class_locs[selected_class] + selected_voxel = voxels_of_that_class[np.random.choice( + len(voxels_of_that_class))] + + return selected_voxel + + def random_generate_crop_bbox(self, margin_z: int, margin_y: int, + margin_x: int) -> tuple: + """Randomly get a crop bounding box. + + Args: + seg_map (np.ndarray): Ground truth segmentation map. + + Returns: + tuple: Coordinates of the cropped image. + """ + offset_z = np.random.randint(0, margin_z + 1) + offset_y = np.random.randint(0, margin_y + 1) + offset_x = np.random.randint(0, margin_x + 1) + crop_z1, crop_z2 = offset_z, offset_z + self.crop_shape[0] + crop_y1, crop_y2 = offset_y, offset_y + self.crop_shape[1] + crop_x1, crop_x2 = offset_x, offset_x + self.crop_shape[2] + + return crop_z1, crop_z2, crop_y1, crop_y2, crop_x1, crop_x2 + + def generate_margin(self, results: dict) -> tuple: + """Generate margin of crop bounding-box. + + If keep_foreground is True, it will sample a voxel of foreground + classes randomly, and will take it as the center of the bounding-box, + and return the margin between of the bounding-box and image. + If keep_foreground is False, it will return the difference from crop + shape and image shape. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + tuple: The margin for 3 dimensions of crop bounding-box and image. + """ + + seg_map = results['gt_seg_map'] + if self.keep_foreground: + selected_voxel = self.random_sample_location(seg_map) + if selected_voxel is None: + # this only happens if some image does not contain + # foreground voxels at all + warnings.warn(f'case does not contain any foreground classes' + f': {results["img_path"]}') + margin_z = max(seg_map.shape[0] - self.crop_shape[0], 0) + margin_y = max(seg_map.shape[1] - self.crop_shape[1], 0) + margin_x = max(seg_map.shape[2] - self.crop_shape[2], 0) + else: + margin_z = max(0, selected_voxel[0] - self.crop_shape[0] // 2) + margin_y = max(0, selected_voxel[1] - self.crop_shape[1] // 2) + margin_x = max(0, selected_voxel[2] - self.crop_shape[2] // 2) + margin_z = max( + 0, min(seg_map.shape[0] - self.crop_shape[0], margin_z)) + margin_y = max( + 0, min(seg_map.shape[1] - self.crop_shape[1], margin_y)) + margin_x = max( + 0, min(seg_map.shape[2] - self.crop_shape[2], margin_x)) + else: + margin_z = max(seg_map.shape[0] - self.crop_shape[0], 0) + margin_y = max(seg_map.shape[1] - self.crop_shape[1], 0) + margin_x = max(seg_map.shape[2] - self.crop_shape[2], 0) + + return margin_z, margin_y, margin_x + + def crop(self, img: np.ndarray, crop_bbox: tuple) -> np.ndarray: + """Crop from ``img`` + + Args: + img (np.ndarray): Original input image. + crop_bbox (tuple): Coordinates of the cropped image. + + Returns: + np.ndarray: The cropped image. + """ + crop_z1, crop_z2, crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox + if len(img.shape) == 3: + # crop seg map + img = img[crop_z1:crop_z2, crop_y1:crop_y2, crop_x1:crop_x2] + else: + # crop image + assert len(img.shape) == 4 + img = img[:, crop_z1:crop_z2, crop_y1:crop_y2, crop_x1:crop_x2] + return img + + def transform(self, results: dict) -> dict: + """Transform function to randomly crop images, semantic segmentation + maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Randomly cropped results, 'img_shape' key in result dict is + updated according to crop size. + """ + margin = self.generate_margin(results) + crop_bbox = self.random_generate_crop_bbox(*margin) + + # crop the image + img = results['img'] + results['img'] = self.crop(img, crop_bbox) + results['img_shape'] = results['img'].shape[1:] + + # crop semantic seg + seg_map = results['gt_seg_map'] + results['gt_seg_map'] = self.crop(seg_map, crop_bbox) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(crop_shape={self.crop_shape})' + + +@TRANSFORMS.register_module() +class BioMedicalGaussianNoise(BaseTransform): + """Add random Gaussian noise to image. + + Modified from https://github.com/MIC-DKFZ/batchgenerators/blob/7651ece69faf55263dd582a9f5cbd149ed9c3ad0/batchgenerators/transforms/noise_transforms.py#L53 # noqa:E501 + + Copyright (c) German Cancer Research Center (DKFZ) + Licensed under the Apache License, Version 2.0 + + Required Keys: + + - img (np.ndarray): Biomedical image with shape (N, Z, Y, X), + N is the number of modalities, and data type is float32. + + Modified Keys: + + - img + + Args: + prob (float): Probability to add Gaussian noise for + each sample. Default to 0.1. + mean (float): Mean or “centre” of the distribution. Default to 0.0. + std (float): Standard deviation of distribution. Default to 0.1. + """ + + def __init__(self, + prob: float = 0.1, + mean: float = 0.0, + std: float = 0.1) -> None: + super().__init__() + assert 0.0 <= prob <= 1.0 and std >= 0.0 + self.prob = prob + self.mean = mean + self.std = std + + def transform(self, results: Dict) -> Dict: + """Call function to add random Gaussian noise to image. + + Args: + results (dict): Result dict. + + Returns: + dict: Result dict with random Gaussian noise. + """ + if np.random.rand() < self.prob: + rand_std = np.random.uniform(0, self.std) + noise = np.random.normal( + self.mean, rand_std, size=results['img'].shape) + # noise is float64 array, convert to the results['img'].dtype + noise = noise.astype(results['img'].dtype) + results['img'] = results['img'] + noise + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' + repr_str += f'mean={self.mean}, ' + repr_str += f'std={self.std})' + return repr_str + + +@TRANSFORMS.register_module() +class BioMedicalGaussianBlur(BaseTransform): + """Add Gaussian blur with random sigma to image. + + Modified from https://github.com/MIC-DKFZ/batchgenerators/blob/7651ece69faf55263dd582a9f5cbd149ed9c3ad0/batchgenerators/transforms/noise_transforms.py#L81 # noqa:E501 + + Copyright (c) German Cancer Research Center (DKFZ) + Licensed under the Apache License, Version 2.0 + + Required Keys: + + - img (np.ndarray): Biomedical image with shape (N, Z, Y, X), + N is the number of modalities, and data type is float32. + + Modified Keys: + + - img + + Args: + sigma_range (Tuple[float, float]|float): range to randomly + select sigma value. Default to (0.5, 1.0). + prob (float): Probability to apply Gaussian blur + for each sample. Default to 0.2. + prob_per_channel (float): Probability to apply Gaussian blur + for each channel (axis N of the image). Default to 0.5. + different_sigma_per_channel (bool): whether to use different + sigma for each channel (axis N of the image). Default to True. + different_sigma_per_axis (bool): whether to use different + sigma for axis Z, X and Y of the image. Default to True. + """ + + def __init__(self, + sigma_range: Tuple[float, float] = (0.5, 1.0), + prob: float = 0.2, + prob_per_channel: float = 0.5, + different_sigma_per_channel: bool = True, + different_sigma_per_axis: bool = True) -> None: + super().__init__() + assert 0.0 <= prob <= 1.0 + assert 0.0 <= prob_per_channel <= 1.0 + assert isinstance(sigma_range, Sequence) and len(sigma_range) == 2 + self.sigma_range = sigma_range + self.prob = prob + self.prob_per_channel = prob_per_channel + self.different_sigma_per_channel = different_sigma_per_channel + self.different_sigma_per_axis = different_sigma_per_axis + + def _get_valid_sigma(self, value_range) -> Tuple[float, ...]: + """Ensure the `value_range` to be either a single value or a sequence + of two values. If the `value_range` is a sequence, generate a random + value with `[value_range[0], value_range[1]]` based on uniform + sampling. + + Modified from https://github.com/MIC-DKFZ/batchgenerators/blob/7651ece69faf55263dd582a9f5cbd149ed9c3ad0/batchgenerators/augmentations/utils.py#L625 # noqa:E501 + + Args: + value_range (tuple|list|float|int): the input value range + """ + if (isinstance(value_range, (list, tuple))): + if (value_range[0] == value_range[1]): + value = value_range[0] + else: + orig_type = type(value_range[0]) + value = np.random.uniform(value_range[0], value_range[1]) + value = orig_type(value) + return value + + def _gaussian_blur(self, data_sample: np.ndarray) -> np.ndarray: + """Random generate sigma and apply Gaussian Blur to the data + Args: + data_sample (np.ndarray): data sample with multiple modalities, + the data shape is (N, Z, Y, X) + """ + sigma = None + for c in range(data_sample.shape[0]): + if np.random.rand() < self.prob_per_channel: + # if no `sigma` is generated, generate one + # if `self.different_sigma_per_channel` is True, + # re-generate random sigma for each channel + if (sigma is None or self.different_sigma_per_channel): + if (not self.different_sigma_per_axis): + sigma = self._get_valid_sigma(self.sigma_range) + else: + sigma = [ + self._get_valid_sigma(self.sigma_range) + for _ in data_sample.shape[1:] + ] + # apply gaussian filter with `sigma` + data_sample[c] = gaussian_filter( + data_sample[c], sigma, order=0) + return data_sample + + def transform(self, results: Dict) -> Dict: + """Call function to add random Gaussian blur to image. + + Args: + results (dict): Result dict. + + Returns: + dict: Result dict with random Gaussian noise. + """ + if np.random.rand() < self.prob: + results['img'] = self._gaussian_blur(results['img']) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' + repr_str += f'prob_per_channel={self.prob_per_channel}, ' + repr_str += f'sigma_range={self.sigma_range}, ' + repr_str += 'different_sigma_per_channel=' \ + f'{self.different_sigma_per_channel}, ' + repr_str += 'different_sigma_per_axis=' \ + f'{self.different_sigma_per_axis})' + return repr_str + + +@TRANSFORMS.register_module() +class BioMedicalRandomGamma(BaseTransform): + """Using random gamma correction to process the biomedical image. + + Modified from + https://github.com/MIC-DKFZ/batchgenerators/blob/master/batchgenerators/transforms/color_transforms.py#L132 # noqa:E501 + With licence: Apache 2.0 + + Required Keys: + + - img (np.ndarray): Biomedical image with shape (N, Z, Y, X), + N is the number of modalities, and data type is float32. + + Modified Keys: + - img + + Args: + prob (float): The probability to perform this transform. Default: 0.5. + gamma_range (Tuple[float]): Range of gamma values. Default: (0.5, 2). + invert_image (bool): Whether invert the image before applying gamma + augmentation. Default: False. + per_channel (bool): Whether perform the transform each channel + individually. Default: False + retain_stats (bool): Gamma transformation will alter the mean and std + of the data in the patch. If retain_stats=True, the data will be + transformed to match the mean and standard deviation before gamma + augmentation. Default: False. + """ + + def __init__(self, + prob: float = 0.5, + gamma_range: Tuple[float] = (0.5, 2), + invert_image: bool = False, + per_channel: bool = False, + retain_stats: bool = False): + assert 0 <= prob and prob <= 1 + assert isinstance(gamma_range, tuple) and len(gamma_range) == 2 + assert isinstance(invert_image, bool) + assert isinstance(per_channel, bool) + assert isinstance(retain_stats, bool) + self.prob = prob + self.gamma_range = gamma_range + self.invert_image = invert_image + self.per_channel = per_channel + self.retain_stats = retain_stats + + @cache_randomness + def _do_gamma(self): + """Whether do adjust gamma for image.""" + return np.random.rand() < self.prob + + def _adjust_gamma(self, img: np.array): + """Gamma adjustment for image. + + Args: + img (np.array): Input image before gamma adjust. + + Returns: + np.arrays: Image after gamma adjust. + """ + + if self.invert_image: + img = -img + + def _do_adjust(img): + if retain_stats_here: + img_mean = img.mean() + img_std = img.std() + if np.random.random() < 0.5 and self.gamma_range[0] < 1: + gamma = np.random.uniform(self.gamma_range[0], 1) + else: + gamma = np.random.uniform( + max(self.gamma_range[0], 1), self.gamma_range[1]) + img_min = img.min() + img_range = img.max() - img_min # range + img = np.power(((img - img_min) / float(img_range + 1e-7)), + gamma) * img_range + img_min + if retain_stats_here: + img = img - img.mean() + img = img / (img.std() + 1e-8) * img_std + img = img + img_mean + return img + + if not self.per_channel: + retain_stats_here = self.retain_stats + img = _do_adjust(img) + else: + for c in range(img.shape[0]): + img[c] = _do_adjust(img[c]) + if self.invert_image: + img = -img + return img + + def transform(self, results: dict) -> dict: + """Call function to perform random gamma correction + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with random gamma correction performed. + """ + do_gamma = self._do_gamma() + + if do_gamma: + results['img'] = self._adjust_gamma(results['img']) + else: + pass + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' + repr_str += f'gamma_range={self.gamma_range},' + repr_str += f'invert_image={self.invert_image},' + repr_str += f'per_channel={self.per_channel},' + repr_str += f'retain_stats={self.retain_stats}' + return repr_str + + +@TRANSFORMS.register_module() +class BioMedical3DPad(BaseTransform): + """Pad the biomedical 3d image & biomedical 3d semantic segmentation maps. + + Required Keys: + + - img (np.ndarry): Biomedical image with shape (N, Z, Y, X) by default, + N is the number of modalities. + - gt_seg_map (np.ndarray, optional): Biomedical seg map with shape + (Z, Y, X) by default. + + Modified Keys: + + - img (np.ndarry): Biomedical image with shape (N, Z, Y, X) by default, + N is the number of modalities. + - gt_seg_map (np.ndarray, optional): Biomedical seg map with shape + (Z, Y, X) by default. + + Added Keys: + + - pad_shape (Tuple[int, int, int]): The padded shape. + + Args: + pad_shape (Tuple[int, int, int]): Fixed padding size. + Expected padding shape (Z, Y, X). + pad_val (float): Padding value for biomedical image. + The padding mode is set to "constant". The value + to be filled in padding area. Default: 0. + seg_pad_val (int): Padding value for biomedical 3d semantic + segmentation maps. The padding mode is set to "constant". + The value to be filled in padding area. Default: 0. + """ + + def __init__(self, + pad_shape: Tuple[int, int, int], + pad_val: float = 0., + seg_pad_val: int = 0) -> None: + + # check pad_shape + assert pad_shape is not None + if not isinstance(pad_shape, tuple): + assert len(pad_shape) == 3 + + self.pad_shape = pad_shape + self.pad_val = pad_val + self.seg_pad_val = seg_pad_val + + def _pad_img(self, results: dict) -> None: + """Pad images according to ``self.pad_shape`` + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: The dict contains the padded image and shape + information. + """ + padded_img = self._to_pad( + results['img'], pad_shape=self.pad_shape, pad_val=self.pad_val) + + results['img'] = padded_img + results['pad_shape'] = padded_img.shape[1:] + + def _pad_seg(self, results: dict) -> None: + """Pad semantic segmentation map according to ``self.pad_shape`` if + ``gt_seg_map`` is not None in results dict. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Update the padded gt seg map in dict. + """ + if results.get('gt_seg_map', None) is not None: + pad_gt_seg = self._to_pad( + results['gt_seg_map'][None, ...], + pad_shape=results['pad_shape'], + pad_val=self.seg_pad_val) + results['gt_seg_map'] = pad_gt_seg[1:] + + @staticmethod + def _to_pad(img: np.ndarray, + pad_shape: Tuple[int, int, int], + pad_val: Union[int, float] = 0) -> np.ndarray: + """Pad the given 3d image to a certain shape with specified padding + value. + + Args: + img (ndarray): Biomedical image with shape (N, Z, Y, X) + to be padded. N is the number of modalities. + pad_shape (Tuple[int,int,int]): Expected padding shape (Z, Y, X). + pad_val (float, int): Values to be filled in padding areas + and the padding_mode is set to 'constant'. Default: 0. + + Returns: + ndarray: The padded image. + """ + # compute pad width + d = max(pad_shape[0] - img.shape[1], 0) + pad_d = (d // 2, d - d // 2) + h = max(pad_shape[1] - img.shape[2], 0) + pad_h = (h // 2, h - h // 2) + w = max(pad_shape[2] - img.shape[2], 0) + pad_w = (w // 2, w - w // 2) + + pad_list = [(0, 0), pad_d, pad_h, pad_w] + + img = np.pad(img, pad_list, mode='constant', constant_values=pad_val) + return img + + def transform(self, results: dict) -> dict: + """Call function to pad images, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Updated result dict. + """ + self._pad_img(results) + self._pad_seg(results) + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'pad_shape={self.pad_shape}, ' + repr_str += f'pad_val={self.pad_val}), ' + repr_str += f'seg_pad_val={self.seg_pad_val})' + return repr_str + + +@TRANSFORMS.register_module() +class BioMedical3DRandomFlip(BaseTransform): + """Flip biomedical 3D images and segmentations. + + Modified from https://github.com/MIC-DKFZ/batchgenerators/blob/master/batchgenerators/transforms/spatial_transforms.py # noqa:E501 + + Copyright 2021 Division of + Medical Image Computing, German Cancer Research Center (DKFZ) and Applied + Computer Vision Lab, Helmholtz Imaging Platform. + Licensed under the Apache-2.0 License. + + Required Keys: + + - img (np.ndarry): Biomedical image with shape (N, Z, Y, X) by default, + N is the number of modalities. + - gt_seg_map (np.ndarray, optional): Biomedical seg map with shape + (Z, Y, X) by default. + + Modified Keys: + + - img (np.ndarry): Biomedical image with shape (N, Z, Y, X) by default, + N is the number of modalities. + - gt_seg_map (np.ndarray, optional): Biomedical seg map with shape + (Z, Y, X) by default. + + Added Keys: + + - do_flip + - flip_axes + + Args: + prob (float): Flipping probability. + axes (Tuple[int, ...]): Flipping axes with order 'ZXY'. + swap_label_pairs (Optional[List[Tuple[int, int]]]): + The segmentation label pairs that are swapped when flipping. + """ + + def __init__(self, + prob: float, + axes: Tuple[int, ...], + swap_label_pairs: Optional[List[Tuple[int, int]]] = None): + self.prob = prob + self.axes = axes + self.swap_label_pairs = swap_label_pairs + assert prob >= 0 and prob <= 1 + if axes is not None: + assert max(axes) <= 2 + + @staticmethod + def _flip(img, direction: Tuple[bool, bool, bool]) -> np.ndarray: + if direction[0]: + img[:, :] = img[:, ::-1] + if direction[1]: + img[:, :, :] = img[:, :, ::-1] + if direction[2]: + img[:, :, :, :] = img[:, :, :, ::-1] + return img + + def _do_flip(self, img: np.ndarray) -> Tuple[bool, bool, bool]: + """Call function to determine which axis to flip. + + Args: + img (np.ndarry): Image or segmentation map array. + Returns: + tuple: Flip action, whether to flip on the z, x, and y axes. + """ + flip_c, flip_x, flip_y = False, False, False + if self.axes is not None: + flip_c = 0 in self.axes and np.random.rand() < self.prob + flip_x = 1 in self.axes and np.random.rand() < self.prob + if len(img.shape) == 4: + flip_y = 2 in self.axes and np.random.rand() < self.prob + return flip_c, flip_x, flip_y + + def _swap_label(self, seg: np.ndarray) -> np.ndarray: + out = seg.copy() + for first, second in self.swap_label_pairs: + first_area = (seg == first) + second_area = (seg == second) + out[first_area] = second + out[second_area] = first + return out + + def transform(self, results: Dict) -> Dict: + """Call function to flip and swap pair labels. + + Args: + results (dict): Result dict. + Returns: + dict: Flipped results, 'do_flip', 'flip_axes' keys are added into + result dict. + """ + # get actual flipped axis + if 'do_flip' not in results: + results['do_flip'] = self._do_flip(results['img']) + if 'flip_axes' not in results: + results['flip_axes'] = self.axes + # flip image + results['img'] = self._flip( + results['img'], direction=results['do_flip']) + # flip seg + if results['gt_seg_map'] is not None: + if results['gt_seg_map'].shape != results['img'].shape: + results['gt_seg_map'] = results['gt_seg_map'][None, :] + results['gt_seg_map'] = self._flip( + results['gt_seg_map'], direction=results['do_flip']) + results['gt_seg_map'] = results['gt_seg_map'].squeeze() + # swap label pairs + if self.swap_label_pairs is not None: + results['gt_seg_map'] = self._swap_label(results['gt_seg_map']) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, axes={self.axes}, ' \ + f'swap_label_pairs={self.swap_label_pairs})' + return repr_str + + +@TRANSFORMS.register_module() +class Albu(BaseTransform): + """Albumentation augmentation. Adds custom transformations from + Albumentations library. Please, visit + `https://albumentations.readthedocs.io` to get more information. An example + of ``transforms`` is as followed: + + .. code-block:: + [ + dict( + type='ShiftScaleRotate', + shift_limit=0.0625, + scale_limit=0.0, + rotate_limit=0, + interpolation=1, + p=0.5), + dict( + type='RandomBrightnessContrast', + brightness_limit=[0.1, 0.3], + contrast_limit=[0.1, 0.3], + p=0.2), + dict(type='ChannelShuffle', p=0.1), + dict( + type='OneOf', + transforms=[ + dict(type='Blur', blur_limit=3, p=1.0), + dict(type='MedianBlur', blur_limit=3, p=1.0) + ], + p=0.1), + ] + Args: + transforms (list[dict]): A list of albu transformations + keymap (dict): Contains {'input key':'albumentation-style key'} + update_pad_shape (bool): Whether to update padding shape according to \ + the output shape of the last transform + """ + + def __init__(self, + transforms: List[dict], + keymap: Optional[dict] = None, + update_pad_shape: bool = False): + if not ALBU_INSTALLED: + raise ImportError( + 'albumentations is not installed, ' + 'we suggest install albumentation by ' + '"pip install albumentations>=0.3.2 --no-binary qudida,albumentations"' # noqa + ) + + # Args will be modified later, copying it will be safer + transforms = copy.deepcopy(transforms) + + self.transforms = transforms + self.keymap = keymap + self.update_pad_shape = update_pad_shape + + self.aug = Compose([self.albu_builder(t) for t in self.transforms]) + + if not keymap: + self.keymap_to_albu = {'img': 'image', 'gt_seg_map': 'mask'} + else: + self.keymap_to_albu = copy.deepcopy(keymap) + self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()} + + def albu_builder(self, cfg: dict) -> object: + """Build a callable object from a dict containing albu arguments. + + Args: + cfg (dict): Config dict. It should at least contain the key "type". + + Returns: + Callable: A callable object. + """ + + assert isinstance(cfg, dict) and 'type' in cfg + args = cfg.copy() + + obj_type = args.pop('type') + if mmengine.is_str(obj_type): + if not ALBU_INSTALLED: + raise ImportError( + 'albumentations is not installed, ' + 'we suggest install albumentation by ' + '"pip install albumentations>=0.3.2 --no-binary qudida,albumentations"' # noqa + ) + obj_cls = getattr(albumentations, obj_type) + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: + raise TypeError( + f'type must be a valid type or str, but got {type(obj_type)}') + + if 'transforms' in args: + args['transforms'] = [ + self.albu_builder(t) for t in args['transforms'] + ] + + return obj_cls(**args) + + @staticmethod + def mapper(d: dict, keymap: dict): + """Dictionary mapper. + + Renames keys according to keymap provided. + Args: + d (dict): old dict + keymap (dict): {'old_key':'new_key'} + Returns: + dict: new dict. + """ + + updated_dict = {} + for k, _ in zip(d.keys(), d.values()): + new_k = keymap.get(k, k) + updated_dict[new_k] = d[k] + return updated_dict + + def transform(self, results): + # dict to albumentations format + results = self.mapper(results, self.keymap_to_albu) + + # Convert to RGB since Albumentations works with RGB images + results['image'] = cv2.cvtColor(results['image'], cv2.COLOR_BGR2RGB) + + results = self.aug(**results) + + # Convert back to BGR + results['image'] = cv2.cvtColor(results['image'], cv2.COLOR_RGB2BGR) + + # back to the original format + results = self.mapper(results, self.keymap_back) + + # update final shape + if self.update_pad_shape: + results['pad_shape'] = results['img'].shape + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + f'(transforms={self.transforms})' + return repr_str + + +@TRANSFORMS.register_module() +class ConcatCDInput(BaseTransform): + """Concat images for change detection. + + Required Keys: + + - img + - img2 + + Args: + input_keys (tuple): Input image keys for change detection. + Default: ('img', 'img2'). + """ + + def __init__(self, input_keys=('img', 'img2')): + self.input_keys = input_keys + + def transform(self, results: dict) -> dict: + img = [] + for input_key in self.input_keys: + img.append(results.pop(input_key)) + results['img'] = np.concatenate(img, axis=2) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(input_keys={self.input_keys}, ' + return repr_str + + +@TRANSFORMS.register_module() +class RandomDepthMix(BaseTransform): + """This class implements the RandomDepthMix transform. + + Args: + prob (float): Probability of applying the transformation. + Defaults to 0.25. + mix_scale_ratio (float): Ratio to scale the mix width. + Defaults to 0.75. + """ + + def __init__( + self, + prob: float = 0.25, + mix_scale_ratio: float = 0.75, + ): + super().__init__() + + self.prob = prob + self.mix_scale_ratio = mix_scale_ratio + + def transform(self, results: dict) -> dict: + if random.random() > self.prob: + return results + + h, w = results['img_shape'][:2] + left = int(w * random.random()) + width_ratio = self.mix_scale_ratio * random.random() + width = int(max(1, (w - left) * width_ratio)) + + img = results['img'] + depth_rescale_factor = results.get('depth_rescale_factor', 1) + depth_map = results['gt_depth_map'] / depth_rescale_factor + + if img.ndim == 3: + for c in range(img.shape[-1]): + img[:, left:left + width, c] = depth_map[:, left:left + width] + elif img.ndim == 2: + img[:, left:left + width] = depth_map[:, left:left + width] + else: + raise ValueError(f'Invalid image shape ({img.shape})') + + results['img'] = img + return results diff --git a/mmseg/datasets/voc.py b/mmseg/datasets/voc.py new file mode 100644 index 0000000000000000000000000000000000000000..5e5d6025c03760953a82f80e337185afc51f1386 --- /dev/null +++ b/mmseg/datasets/voc.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class PascalVOCDataset(BaseSegDataset): + """Pascal VOC dataset. + + Args: + split (str): Split txt file for Pascal VOC. + """ + METAINFO = dict( + classes=('background', 'aeroplane', 'bicycle', 'bird', 'boat', + 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', + 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', + 'sofa', 'train', 'tvmonitor'), + palette=[[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], + [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128], + [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0], + [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128], + [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0], + [0, 64, 128]]) + + def __init__(self, + ann_file, + img_suffix='.jpg', + seg_map_suffix='.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, + seg_map_suffix=seg_map_suffix, + ann_file=ann_file, + **kwargs) + assert fileio.exists(self.data_prefix['img_path'], + self.backend_args) and osp.isfile(self.ann_file) diff --git a/mmseg/engine/__init__.py b/mmseg/engine/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..98139a0047fd2f076d659ba5aed2cd3452dbd235 --- /dev/null +++ b/mmseg/engine/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .hooks import SegVisualizationHook +from .optimizers import (ForceDefaultOptimWrapperConstructor, + LayerDecayOptimizerConstructor, + LearningRateDecayOptimizerConstructor) +from .schedulers import PolyLRRatio + +__all__ = [ + 'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor', + 'SegVisualizationHook', 'PolyLRRatio', + 'ForceDefaultOptimWrapperConstructor' +] diff --git a/mmseg/engine/__pycache__/__init__.cpython-39.pyc b/mmseg/engine/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad6ee2e430b628c295033b8b09e7ada90001f222 Binary files /dev/null and b/mmseg/engine/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/engine/hooks/__init__.py b/mmseg/engine/hooks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..03d3ce18e44d99b346494ce29f5851fd086f1e0b --- /dev/null +++ b/mmseg/engine/hooks/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .visualization_hook import SegVisualizationHook +from .snnet_hook import SNNetHook +__all__ = ['SegVisualizationHook', 'SNNetHook'] diff --git a/mmseg/engine/hooks/__pycache__/__init__.cpython-39.pyc b/mmseg/engine/hooks/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..899b473eaea8c91a42cf125f72782b90df3bf892 Binary files /dev/null and b/mmseg/engine/hooks/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/engine/hooks/__pycache__/snnet_hook.cpython-39.pyc b/mmseg/engine/hooks/__pycache__/snnet_hook.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..39cf5eb304778baefa3831e272d80741bab8eb0b Binary files /dev/null and b/mmseg/engine/hooks/__pycache__/snnet_hook.cpython-39.pyc differ diff --git a/mmseg/engine/hooks/__pycache__/visualization_hook.cpython-39.pyc b/mmseg/engine/hooks/__pycache__/visualization_hook.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c45f8a4aa0975308e8ced6707d5e1c485741b616 Binary files /dev/null and b/mmseg/engine/hooks/__pycache__/visualization_hook.cpython-39.pyc differ diff --git a/mmseg/engine/hooks/snnet_hook.py b/mmseg/engine/hooks/snnet_hook.py new file mode 100644 index 0000000000000000000000000000000000000000..061ce85899a28c1c8414842bb336b50a6bfd3134 --- /dev/null +++ b/mmseg/engine/hooks/snnet_hook.py @@ -0,0 +1,68 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Sequence + +from mmengine.hooks import Hook +from mmengine.model import is_model_wrapper + +from mmseg.registry import HOOKS +import torch +import json +import os + +def group_subnets_by_flops(data, flops_step=10): + sorted_data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} + candidate_idx = [] + grouped_cands = [] + last_flops = 0 + for cfg_id, flops in sorted_data.items(): + # flops, _ = values + flops = flops / 1e9 + if abs(last_flops - flops) > flops_step: + if len(candidate_idx) > 0: + grouped_cands.append(candidate_idx) + candidate_idx = [int(cfg_id)] + last_flops = flops + else: + candidate_idx.append(int(cfg_id)) + + if len(candidate_idx) > 0: + grouped_cands.append(candidate_idx) + + return grouped_cands + + + +def initialize_model_stitching_layer(model, dataiter): + images = [] + total_samples = 50 + while len(images) < total_samples: + item = next(dataiter) + data = model.data_preprocessor(item, True) + images.extend(data['inputs']) + + images = torch.stack(images, dim=0) + samples = images.cuda() + model.backbone.initialize_stitching_weights(samples) + +@HOOKS.register_module() +class SNNetHook(Hook): + """Docstring for NewHook. + """ + + def before_train(self, runner) -> None: + if is_model_wrapper(runner.model): + model = runner.model.module + else: + model = runner.model + if not runner._resume: + initialize_model_stitching_layer(model, runner.train_loop.dataloader_iterator) + + # cfg = Config.fromfile(runner._cfg_file) + cfg_name = runner.cfg.filename.split('/')[-1].split('.')[0] + with open(os.path.join('./model_flops', f'snnet_flops_{cfg_name}.json'), 'r') as f: + flops_params = json.load(f) + + flops_step = 10 + grouped_subnet = group_subnets_by_flops(flops_params, flops_step) + model.backbone.flops_grouped_cfgs = grouped_subnet + diff --git a/mmseg/engine/hooks/visualization_hook.py b/mmseg/engine/hooks/visualization_hook.py new file mode 100644 index 0000000000000000000000000000000000000000..ea238c6969183eee8f31bf0bd97f81c89e73a327 --- /dev/null +++ b/mmseg/engine/hooks/visualization_hook.py @@ -0,0 +1,97 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings +from typing import Optional, Sequence + +import mmcv +import mmengine.fileio as fileio +from mmengine.hooks import Hook +from mmengine.runner import Runner +from mmengine.visualization import Visualizer + +from mmseg.registry import HOOKS +from mmseg.structures import SegDataSample + + +@HOOKS.register_module() +class SegVisualizationHook(Hook): + """Segmentation Visualization Hook. Used to visualize validation and + testing process prediction results. + + In the testing phase: + + 1. If ``show`` is True, it means that only the prediction results are + visualized without storing data, so ``vis_backends`` needs to + be excluded. + + Args: + draw (bool): whether to draw prediction results. If it is False, + it means that no drawing will be done. Defaults to False. + interval (int): The interval of visualization. Defaults to 50. + show (bool): Whether to display the drawn image. Default to False. + wait_time (float): The interval of show (s). Defaults to 0. + backend_args (dict, Optional): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to None. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. + """ + + def __init__(self, + draw: bool = False, + interval: int = 50, + show: bool = False, + wait_time: float = 0., + backend_args: Optional[dict] = None): + self._visualizer: Visualizer = Visualizer.get_current_instance() + self.interval = interval + self.show = show + if self.show: + # No need to think about vis backends. + self._visualizer._vis_backends = {} + warnings.warn('The show is True, it means that only ' + 'the prediction results are visualized ' + 'without storing data, so vis_backends ' + 'needs to be excluded.') + + self.wait_time = wait_time + self.backend_args = backend_args.copy() if backend_args else None + self.draw = draw + if not self.draw: + warnings.warn('The draw is False, it means that the ' + 'hook for visualization will not take ' + 'effect. The results will NOT be ' + 'visualized or stored.') + + def _after_iter(self, + runner: Runner, + batch_idx: int, + data_batch: dict, + outputs: Sequence[SegDataSample], + mode: str = 'val') -> None: + """Run after every ``self.interval`` validation iterations. + + Args: + runner (:obj:`Runner`): The runner of the validation process. + batch_idx (int): The index of the current batch in the val loop. + data_batch (dict): Data from dataloader. + outputs (Sequence[:obj:`SegDataSample`]): Outputs from model. + mode (str): mode (str): Current mode of runner. Defaults to 'val'. + """ + if self.draw is False or mode == 'train': + return + + if self.every_n_inner_iters(batch_idx, self.interval): + for output in outputs: + img_path = output.img_path + img_bytes = fileio.get( + img_path, backend_args=self.backend_args) + img = mmcv.imfrombytes(img_bytes, channel_order='rgb') + window_name = f'{mode}_{osp.basename(img_path)}' + + self._visualizer.add_datasample( + window_name, + img, + data_sample=output, + show=self.show, + wait_time=self.wait_time, + step=runner.iter) diff --git a/mmseg/engine/optimizers/__init__.py b/mmseg/engine/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e4cf58741febfc20ea33664ea8e1b1ac68bbb327 --- /dev/null +++ b/mmseg/engine/optimizers/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .force_default_constructor import ForceDefaultOptimWrapperConstructor +from .layer_decay_optimizer_constructor import ( + LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) + +__all__ = [ + 'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor', + 'ForceDefaultOptimWrapperConstructor' +] diff --git a/mmseg/engine/optimizers/__pycache__/__init__.cpython-39.pyc b/mmseg/engine/optimizers/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c413059b803ec9cf7709a228f485ea0ef3295fcc Binary files /dev/null and b/mmseg/engine/optimizers/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/engine/optimizers/__pycache__/force_default_constructor.cpython-39.pyc b/mmseg/engine/optimizers/__pycache__/force_default_constructor.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..498e0a51702a57a6d08c22d8a8c1b64acfedf432 Binary files /dev/null and b/mmseg/engine/optimizers/__pycache__/force_default_constructor.cpython-39.pyc differ diff --git a/mmseg/engine/optimizers/__pycache__/layer_decay_optimizer_constructor.cpython-39.pyc b/mmseg/engine/optimizers/__pycache__/layer_decay_optimizer_constructor.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34c6f101bc50831f9c05e1058e660ed05794628e Binary files /dev/null and b/mmseg/engine/optimizers/__pycache__/layer_decay_optimizer_constructor.cpython-39.pyc differ diff --git a/mmseg/engine/optimizers/force_default_constructor.py b/mmseg/engine/optimizers/force_default_constructor.py new file mode 100644 index 0000000000000000000000000000000000000000..12c642ad411bfd547d63c894c84636e2f1896128 --- /dev/null +++ b/mmseg/engine/optimizers/force_default_constructor.py @@ -0,0 +1,255 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging +from typing import List, Optional, Union + +import torch +import torch.nn as nn +from mmengine.logging import print_log +from mmengine.optim import DefaultOptimWrapperConstructor +from mmengine.utils.dl_utils import mmcv_full_available +from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm, _InstanceNorm +from torch.nn import GroupNorm, LayerNorm + +from mmseg.registry import OPTIM_WRAPPER_CONSTRUCTORS + + +@OPTIM_WRAPPER_CONSTRUCTORS.register_module() +class ForceDefaultOptimWrapperConstructor(DefaultOptimWrapperConstructor): + """Default constructor with forced optimizer settings. + + This constructor extends the default constructor to add an option for + forcing default optimizer settings. This is useful for ensuring that + certain parameters or layers strictly adhere to pre-defined default + settings, regardless of any custom settings specified. + + By default, each parameter share the same optimizer settings, and we + provide an argument ``paramwise_cfg`` to specify parameter-wise settings. + It is a dict and may contain various fields like 'custom_keys', + 'bias_lr_mult', etc., as well as the additional field + `force_default_settings` which allows for enforcing default settings on + optimizer parameters. + + - ``custom_keys`` (dict): Specified parameters-wise settings by keys. If + one of the keys in ``custom_keys`` is a substring of the name of one + parameter, then the setting of the parameter will be specified by + ``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will + be ignored. It should be noted that the aforementioned ``key`` is the + longest key that is a substring of the name of the parameter. If there + are multiple matched keys with the same length, then the key with lower + alphabet order will be chosen. + ``custom_keys[key]`` should be a dict and may contain fields ``lr_mult`` + and ``decay_mult``. See Example 2 below. + - ``bias_lr_mult`` (float): It will be multiplied to the learning + rate for all bias parameters (except for those in normalization + layers and offset layers of DCN). + - ``bias_decay_mult`` (float): It will be multiplied to the weight + decay for all bias parameters (except for those in + normalization layers, depthwise conv layers, offset layers of DCN). + - ``norm_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of normalization + layers. + - ``flat_decay_mult`` (float): It will be multiplied to the weight + decay for all one-dimensional parameters + - ``dwconv_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of depthwise conv + layers. + - ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning + rate for parameters of offset layer in the deformable convs + of a model. + - ``bypass_duplicate`` (bool): If true, the duplicate parameters + would not be added into optimizer. Defaults to False. + - ``force_default_settings`` (bool): If true, this will override any + custom settings defined by ``custom_keys`` and enforce the use of + default settings for optimizer parameters like ``bias_lr_mult``. + This is particularly useful when you want to ensure that certain layers + or parameters adhere strictly to the pre-defined default settings. + + Note: + + 1. If the option ``dcn_offset_lr_mult`` is used, the constructor will + override the effect of ``bias_lr_mult`` in the bias of offset layer. + So be careful when using both ``bias_lr_mult`` and + ``dcn_offset_lr_mult``. If you wish to apply both of them to the offset + layer in deformable convs, set ``dcn_offset_lr_mult`` to the original + ``dcn_offset_lr_mult`` * ``bias_lr_mult``. + + 2. If the option ``dcn_offset_lr_mult`` is used, the constructor will + apply it to all the DCN layers in the model. So be careful when the + model contains multiple DCN layers in places other than backbone. + + 3. When the option ``force_default_settings`` is true, it will override + any custom settings provided in ``custom_keys``. This ensures that the + default settings for the optimizer parameters are used. + + Args: + optim_wrapper_cfg (dict): The config dict of the optimizer wrapper. + + Required fields of ``optim_wrapper_cfg`` are + + - ``type``: class name of the OptimizerWrapper + - ``optimizer``: The configuration of optimizer. + + Optional fields of ``optim_wrapper_cfg`` are + + - any arguments of the corresponding optimizer wrapper type, + e.g., accumulative_counts, clip_grad, etc. + + Required fields of ``optimizer`` are + + - `type`: class name of the optimizer. + + Optional fields of ``optimizer`` are + + - any arguments of the corresponding optimizer type, e.g., + lr, weight_decay, momentum, etc. + + paramwise_cfg (dict, optional): Parameter-wise options. + + Example 1: + >>> model = torch.nn.modules.Conv1d(1, 1, 1) + >>> optim_wrapper_cfg = dict( + >>> dict(type='OptimWrapper', optimizer=dict(type='SGD', lr=0.01, + >>> momentum=0.9, weight_decay=0.0001)) + >>> paramwise_cfg = dict(norm_decay_mult=0.) + >>> optim_wrapper_builder = DefaultOptimWrapperConstructor( + >>> optim_wrapper_cfg, paramwise_cfg) + >>> optim_wrapper = optim_wrapper_builder(model) + + Example 2: + >>> # assume model have attribute model.backbone and model.cls_head + >>> optim_wrapper_cfg = dict(type='OptimWrapper', optimizer=dict( + >>> type='SGD', lr=0.01, weight_decay=0.95)) + >>> paramwise_cfg = dict(custom_keys={ + >>> 'backbone': dict(lr_mult=0.1, decay_mult=0.9)}) + >>> optim_wrapper_builder = DefaultOptimWrapperConstructor( + >>> optim_wrapper_cfg, paramwise_cfg) + >>> optim_wrapper = optim_wrapper_builder(model) + >>> # Then the `lr` and `weight_decay` for model.backbone is + >>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for + >>> # model.cls_head is (0.01, 0.95). + """ + + def add_params(self, + params: List[dict], + module: nn.Module, + prefix: str = '', + is_dcn_module: Optional[Union[int, float]] = None) -> None: + """Add all parameters of module to the params list. + + The parameters of the given module will be added to the list of param + groups, with specific rules defined by paramwise_cfg. + + Args: + params (list[dict]): A list of param groups, it will be modified + in place. + module (nn.Module): The module to be added. + prefix (str): The prefix of the module + is_dcn_module (int|float|None): If the current module is a + submodule of DCN, `is_dcn_module` will be passed to + control conv_offset layer's learning rate. Defaults to None. + """ + # get param-wise options + custom_keys = self.paramwise_cfg.get('custom_keys', {}) + # first sort with alphabet order and then sort with reversed len of str + sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True) + + bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', None) + bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', None) + norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', None) + dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', None) + flat_decay_mult = self.paramwise_cfg.get('flat_decay_mult', None) + bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False) + dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', None) + force_default_settings = self.paramwise_cfg.get( + 'force_default_settings', False) + + # special rules for norm layers and depth-wise conv layers + is_norm = isinstance(module, + (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)) + is_dwconv = ( + isinstance(module, torch.nn.Conv2d) + and module.in_channels == module.groups) + + for name, param in module.named_parameters(recurse=False): + param_group = {'params': [param]} + if bypass_duplicate and self._is_in(param_group, params): + print_log( + f'{prefix} is duplicate. It is skipped since ' + f'bypass_duplicate={bypass_duplicate}', + logger='current', + level=logging.WARNING) + continue + if not param.requires_grad: + params.append(param_group) + continue + + # if the parameter match one of the custom keys, ignore other rules + is_custom = False + for key in sorted_keys: + if key in f'{prefix}.{name}': + is_custom = True + lr_mult = custom_keys[key].get('lr_mult', 1.) + param_group['lr'] = self.base_lr * lr_mult + if self.base_wd is not None: + decay_mult = custom_keys[key].get('decay_mult', 1.) + param_group['weight_decay'] = self.base_wd * decay_mult + # add custom settings to param_group + for k, v in custom_keys[key].items(): + param_group[k] = v + break + + if not is_custom or force_default_settings: + # bias_lr_mult affects all bias parameters + # except for norm.bias dcn.conv_offset.bias + if name == 'bias' and not ( + is_norm or is_dcn_module) and bias_lr_mult is not None: + param_group['lr'] = self.base_lr * bias_lr_mult + + if (prefix.find('conv_offset') != -1 and is_dcn_module + and dcn_offset_lr_mult is not None + and isinstance(module, torch.nn.Conv2d)): + # deal with both dcn_offset's bias & weight + param_group['lr'] = self.base_lr * dcn_offset_lr_mult + + # apply weight decay policies + if self.base_wd is not None: + # norm decay + if is_norm and norm_decay_mult is not None: + param_group[ + 'weight_decay'] = self.base_wd * norm_decay_mult + # bias lr and decay + elif (name == 'bias' and not is_dcn_module + and bias_decay_mult is not None): + param_group[ + 'weight_decay'] = self.base_wd * bias_decay_mult + # depth-wise conv + elif is_dwconv and dwconv_decay_mult is not None: + param_group[ + 'weight_decay'] = self.base_wd * dwconv_decay_mult + # flatten parameters except dcn offset + elif (param.ndim == 1 and not is_dcn_module + and flat_decay_mult is not None): + param_group[ + 'weight_decay'] = self.base_wd * flat_decay_mult + params.append(param_group) + for key, value in param_group.items(): + if key == 'params': + continue + full_name = f'{prefix}.{name}' if prefix else name + print_log( + f'paramwise_options -- {full_name}:{key}={value}', + logger='current') + + if mmcv_full_available(): + from mmcv.ops import DeformConv2d, ModulatedDeformConv2d + is_dcn_module = isinstance(module, + (DeformConv2d, ModulatedDeformConv2d)) + else: + is_dcn_module = False + for child_name, child_mod in module.named_children(): + child_prefix = f'{prefix}.{child_name}' if prefix else child_name + self.add_params( + params, + child_mod, + prefix=child_prefix, + is_dcn_module=is_dcn_module) diff --git a/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py b/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py new file mode 100644 index 0000000000000000000000000000000000000000..fdae3ca698c65879056b969f04185f80452ff8d0 --- /dev/null +++ b/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py @@ -0,0 +1,207 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json +import warnings + +from mmengine.dist import get_dist_info +from mmengine.logging import print_log +from mmengine.optim import DefaultOptimWrapperConstructor + +from mmseg.registry import OPTIM_WRAPPER_CONSTRUCTORS + + +def get_layer_id_for_convnext(var_name, max_layer_id): + """Get the layer id to set the different learning rates in ``layer_wise`` + decay_type. + + Args: + var_name (str): The key of the model. + max_layer_id (int): Maximum number of backbone layers. + + Returns: + int: The id number corresponding to different learning rate in + ``LearningRateDecayOptimizerConstructor``. + """ + + if var_name in ('backbone.cls_token', 'backbone.mask_token', + 'backbone.pos_embed'): + return 0 + elif var_name.startswith('backbone.downsample_layers'): + stage_id = int(var_name.split('.')[2]) + if stage_id == 0: + layer_id = 0 + elif stage_id == 1: + layer_id = 2 + elif stage_id == 2: + layer_id = 3 + elif stage_id == 3: + layer_id = max_layer_id + return layer_id + elif var_name.startswith('backbone.stages'): + stage_id = int(var_name.split('.')[2]) + block_id = int(var_name.split('.')[3]) + if stage_id == 0: + layer_id = 1 + elif stage_id == 1: + layer_id = 2 + elif stage_id == 2: + layer_id = 3 + block_id // 3 + elif stage_id == 3: + layer_id = max_layer_id + return layer_id + else: + return max_layer_id + 1 + + +def get_stage_id_for_convnext(var_name, max_stage_id): + """Get the stage id to set the different learning rates in ``stage_wise`` + decay_type. + + Args: + var_name (str): The key of the model. + max_stage_id (int): Maximum number of backbone layers. + + Returns: + int: The id number corresponding to different learning rate in + ``LearningRateDecayOptimizerConstructor``. + """ + + if var_name in ('backbone.cls_token', 'backbone.mask_token', + 'backbone.pos_embed'): + return 0 + elif var_name.startswith('backbone.downsample_layers'): + return 0 + elif var_name.startswith('backbone.stages'): + stage_id = int(var_name.split('.')[2]) + return stage_id + 1 + else: + return max_stage_id - 1 + + +def get_layer_id_for_vit(var_name, max_layer_id): + """Get the layer id to set the different learning rates. + + Args: + var_name (str): The key of the model. + num_max_layer (int): Maximum number of backbone layers. + + Returns: + int: Returns the layer id of the key. + """ + + if var_name in ('backbone.cls_token', 'backbone.mask_token', + 'backbone.pos_embed'): + return 0 + elif var_name.startswith('backbone.patch_embed'): + return 0 + elif var_name.startswith('backbone.layers'): + layer_id = int(var_name.split('.')[2]) + return layer_id + 1 + else: + return max_layer_id - 1 + + +@OPTIM_WRAPPER_CONSTRUCTORS.register_module() +class LearningRateDecayOptimizerConstructor(DefaultOptimWrapperConstructor): + """Different learning rates are set for different layers of backbone. + + Note: Currently, this optimizer constructor is built for ConvNeXt, + BEiT and MAE. + """ + + def add_params(self, params, module, **kwargs): + """Add all parameters of module to the params list. + + The parameters of the given module will be added to the list of param + groups, with specific rules defined by paramwise_cfg. + + Args: + params (list[dict]): A list of param groups, it will be modified + in place. + module (nn.Module): The module to be added. + """ + + parameter_groups = {} + print_log(f'self.paramwise_cfg is {self.paramwise_cfg}') + num_layers = self.paramwise_cfg.get('num_layers') + 2 + decay_rate = self.paramwise_cfg.get('decay_rate') + decay_type = self.paramwise_cfg.get('decay_type', 'layer_wise') + print_log('Build LearningRateDecayOptimizerConstructor ' + f'{decay_type} {decay_rate} - {num_layers}') + weight_decay = self.base_wd + for name, param in module.named_parameters(): + if not param.requires_grad: + continue # frozen weights + if len(param.shape) == 1 or name.endswith('.bias') or name in ( + 'pos_embed', 'cls_token'): + group_name = 'no_decay' + this_weight_decay = 0. + else: + group_name = 'decay' + this_weight_decay = weight_decay + if 'layer_wise' in decay_type: + if 'ConvNeXt' in module.backbone.__class__.__name__: + layer_id = get_layer_id_for_convnext( + name, self.paramwise_cfg.get('num_layers')) + print_log(f'set param {name} as id {layer_id}') + elif 'BEiT' in module.backbone.__class__.__name__ or \ + 'MAE' in module.backbone.__class__.__name__: + layer_id = get_layer_id_for_vit(name, num_layers) + print_log(f'set param {name} as id {layer_id}') + else: + raise NotImplementedError() + elif decay_type == 'stage_wise': + if 'ConvNeXt' in module.backbone.__class__.__name__: + layer_id = get_stage_id_for_convnext(name, num_layers) + print_log(f'set param {name} as id {layer_id}') + else: + raise NotImplementedError() + group_name = f'layer_{layer_id}_{group_name}' + + if group_name not in parameter_groups: + scale = decay_rate**(num_layers - layer_id - 1) + + parameter_groups[group_name] = { + 'weight_decay': this_weight_decay, + 'params': [], + 'param_names': [], + 'lr_scale': scale, + 'group_name': group_name, + 'lr': scale * self.base_lr, + } + + parameter_groups[group_name]['params'].append(param) + parameter_groups[group_name]['param_names'].append(name) + rank, _ = get_dist_info() + if rank == 0: + to_display = {} + for key in parameter_groups: + to_display[key] = { + 'param_names': parameter_groups[key]['param_names'], + 'lr_scale': parameter_groups[key]['lr_scale'], + 'lr': parameter_groups[key]['lr'], + 'weight_decay': parameter_groups[key]['weight_decay'], + } + print_log(f'Param groups = {json.dumps(to_display, indent=2)}') + params.extend(parameter_groups.values()) + + +@OPTIM_WRAPPER_CONSTRUCTORS.register_module() +class LayerDecayOptimizerConstructor(LearningRateDecayOptimizerConstructor): + """Different learning rates are set for different layers of backbone. + + Note: Currently, this optimizer constructor is built for BEiT, + and it will be deprecated. + Please use ``LearningRateDecayOptimizerConstructor`` instead. + """ + + def __init__(self, optim_wrapper_cfg, paramwise_cfg): + warnings.warn('DeprecationWarning: Original ' + 'LayerDecayOptimizerConstructor of BEiT ' + 'will be deprecated. Please use ' + 'LearningRateDecayOptimizerConstructor instead, ' + 'and set decay_type = layer_wise_vit in paramwise_cfg.') + paramwise_cfg.update({'decay_type': 'layer_wise_vit'}) + warnings.warn('DeprecationWarning: Layer_decay_rate will ' + 'be deleted, please use decay_rate instead.') + paramwise_cfg['decay_rate'] = paramwise_cfg.pop('layer_decay_rate') + super().__init__(optim_wrapper_cfg, paramwise_cfg) diff --git a/mmseg/engine/schedulers/__init__.py b/mmseg/engine/schedulers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3cd3f6211345bb3627b76d683291f48efd934a77 --- /dev/null +++ b/mmseg/engine/schedulers/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .poly_ratio_scheduler import PolyLRRatio + +__all__ = ['PolyLRRatio'] diff --git a/mmseg/engine/schedulers/__pycache__/__init__.cpython-39.pyc b/mmseg/engine/schedulers/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41011230f01659c6bced8d7c3c66f9b7e4b5a544 Binary files /dev/null and b/mmseg/engine/schedulers/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/engine/schedulers/__pycache__/poly_ratio_scheduler.cpython-39.pyc b/mmseg/engine/schedulers/__pycache__/poly_ratio_scheduler.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..23025de108d2302efd08dd20c3424f35a0219d77 Binary files /dev/null and b/mmseg/engine/schedulers/__pycache__/poly_ratio_scheduler.cpython-39.pyc differ diff --git a/mmseg/engine/schedulers/poly_ratio_scheduler.py b/mmseg/engine/schedulers/poly_ratio_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..057203acc9cc9fc72306d2039669b90f35704436 --- /dev/null +++ b/mmseg/engine/schedulers/poly_ratio_scheduler.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + +from mmengine.optim.scheduler import PolyLR + +from mmseg.registry import PARAM_SCHEDULERS + + +@PARAM_SCHEDULERS.register_module() +class PolyLRRatio(PolyLR): + """Implements polynomial learning rate decay with ratio. + + This scheduler adjusts the learning rate of each parameter group + following a polynomial decay equation. The decay can occur in + conjunction with external parameter adjustments made outside this + scheduler. + + Args: + optimizer (Optimizer or OptimWrapper): Wrapped optimizer. + eta_min (float): Minimum learning rate at the end of scheduling. + Defaults to 0. + eta_min_ratio (float, optional): The ratio of the minimum parameter + value to the base parameter value. Either `eta_min` or + `eta_min_ratio` should be specified. Defaults to None. + power (float): The power of the polynomial. Defaults to 1.0. + begin (int): Step at which to start updating the parameters. + Defaults to 0. + end (int): Step at which to stop updating the parameters. + Defaults to INF. + last_step (int): The index of last step. Used for resume without + state dict. Defaults to -1. + by_epoch (bool): Whether the scheduled parameters are updated by + epochs. Defaults to True. + verbose (bool): Whether to print the value for each update. + Defaults to False. + """ + + def __init__(self, eta_min_ratio: Optional[int] = None, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.eta_min_ratio = eta_min_ratio + + def _get_value(self): + """Compute value using chainable form of the scheduler.""" + + if self.last_step == 0: + return [ + group[self.param_name] for group in self.optimizer.param_groups + ] + + param_groups_value = [] + for base_value, param_group in zip(self.base_values, + self.optimizer.param_groups): + eta_min = self.eta_min if self.eta_min_ratio is None else \ + base_value * self.eta_min_ratio + step_ratio = (1 - 1 / + (self.total_iters - self.last_step + 1))**self.power + step_value = (param_group[self.param_name] - + eta_min) * step_ratio + eta_min + param_groups_value.append(step_value) + + return param_groups_value diff --git a/mmseg/evaluation/__init__.py b/mmseg/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..82b3a8d68d3aefcc23542fc1006eaddde05ca2ab --- /dev/null +++ b/mmseg/evaluation/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .metrics import CityscapesMetric, DepthMetric, IoUMetric + +__all__ = ['IoUMetric', 'CityscapesMetric', 'DepthMetric'] diff --git a/mmseg/evaluation/__pycache__/__init__.cpython-39.pyc b/mmseg/evaluation/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de3bed0ed91167daa12fabdc1654900616902225 Binary files /dev/null and b/mmseg/evaluation/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/evaluation/metrics/__init__.py b/mmseg/evaluation/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..848d4713dc8c0b6a08569d536bb72bd04ca1b1cc --- /dev/null +++ b/mmseg/evaluation/metrics/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .citys_metric import CityscapesMetric +from .depth_metric import DepthMetric +from .iou_metric import IoUMetric + +__all__ = ['IoUMetric', 'CityscapesMetric', 'DepthMetric'] diff --git a/mmseg/evaluation/metrics/__pycache__/__init__.cpython-39.pyc b/mmseg/evaluation/metrics/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2d82cee2c3a7c1a79c42568027558366501f84ff Binary files /dev/null and b/mmseg/evaluation/metrics/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/evaluation/metrics/__pycache__/citys_metric.cpython-39.pyc b/mmseg/evaluation/metrics/__pycache__/citys_metric.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ad8239bc09de2d20992ea02bb7698441f423c57 Binary files /dev/null and b/mmseg/evaluation/metrics/__pycache__/citys_metric.cpython-39.pyc differ diff --git a/mmseg/evaluation/metrics/__pycache__/depth_metric.cpython-39.pyc b/mmseg/evaluation/metrics/__pycache__/depth_metric.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e680fc5ad0f2ba30b72fe0e2e8e4af76daa45db Binary files /dev/null and b/mmseg/evaluation/metrics/__pycache__/depth_metric.cpython-39.pyc differ diff --git a/mmseg/evaluation/metrics/__pycache__/iou_metric.cpython-39.pyc b/mmseg/evaluation/metrics/__pycache__/iou_metric.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a85edc9dbc5a718d9db157e364407afa418da461 Binary files /dev/null and b/mmseg/evaluation/metrics/__pycache__/iou_metric.cpython-39.pyc differ diff --git a/mmseg/evaluation/metrics/citys_metric.py b/mmseg/evaluation/metrics/citys_metric.py new file mode 100644 index 0000000000000000000000000000000000000000..32984653c3fa9c13d8c6a7402033001012b5031f --- /dev/null +++ b/mmseg/evaluation/metrics/citys_metric.py @@ -0,0 +1,158 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import shutil +from collections import OrderedDict +from typing import Dict, Optional, Sequence + +try: + + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa + import cityscapesscripts.helpers.labels as CSLabels +except ImportError: + CSLabels = None + CSEval = None + +import numpy as np +from mmengine.dist import is_main_process, master_only +from mmengine.evaluator import BaseMetric +from mmengine.logging import MMLogger, print_log +from mmengine.utils import mkdir_or_exist +from PIL import Image + +from mmseg.registry import METRICS + + +@METRICS.register_module() +class CityscapesMetric(BaseMetric): + """Cityscapes evaluation metric. + + Args: + output_dir (str): The directory for output prediction + ignore_index (int): Index that will be ignored in evaluation. + Default: 255. + format_only (bool): Only format result for results commit without + perform evaluation. It is useful when you want to format the result + to a specific format and submit it to the test server. + Defaults to False. + keep_results (bool): Whether to keep the results. When ``format_only`` + is True, ``keep_results`` must be True. Defaults to False. + collect_device (str): Device name used for collecting results from + different ranks during distributed training. Must be 'cpu' or + 'gpu'. Defaults to 'cpu'. + prefix (str, optional): The prefix that will be added in the metric + names to disambiguate homonymous metrics of different evaluators. + If prefix is not provided in the argument, self.default_prefix + will be used instead. Defaults to None. + """ + + def __init__(self, + output_dir: str, + ignore_index: int = 255, + format_only: bool = False, + keep_results: bool = False, + collect_device: str = 'cpu', + prefix: Optional[str] = None, + **kwargs) -> None: + super().__init__(collect_device=collect_device, prefix=prefix) + if CSEval is None: + raise ImportError('Please run "pip install cityscapesscripts" to ' + 'install cityscapesscripts first.') + self.output_dir = output_dir + self.ignore_index = ignore_index + + self.format_only = format_only + if format_only: + assert keep_results, ( + 'When format_only is True, the results must be keep, please ' + f'set keep_results as True, but got {keep_results}') + self.keep_results = keep_results + self.prefix = prefix + if is_main_process(): + mkdir_or_exist(self.output_dir) + + @master_only + def __del__(self) -> None: + """Clean up.""" + if not self.keep_results: + shutil.rmtree(self.output_dir) + + def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: + """Process one batch of data and data_samples. + + The processed results should be stored in ``self.results``, which will + be used to computed the metrics when all batches have been processed. + + Args: + data_batch (dict): A batch of data from the dataloader. + data_samples (Sequence[dict]): A batch of outputs from the model. + """ + mkdir_or_exist(self.output_dir) + + for data_sample in data_samples: + pred_label = data_sample['pred_sem_seg']['data'][0].cpu().numpy() + # when evaluating with official cityscapesscripts, + # labelIds should be used + pred_label = self._convert_to_label_id(pred_label) + basename = osp.splitext(osp.basename(data_sample['img_path']))[0] + png_filename = osp.abspath( + osp.join(self.output_dir, f'{basename}.png')) + output = Image.fromarray(pred_label.astype(np.uint8)).convert('P') + output.save(png_filename) + if self.format_only: + # format_only always for test dataset without ground truth + gt_filename = '' + else: + # when evaluating with official cityscapesscripts, + # **_gtFine_labelIds.png is used + gt_filename = data_sample['seg_map_path'].replace( + 'labelTrainIds.png', 'labelIds.png') + self.results.append((png_filename, gt_filename)) + + def compute_metrics(self, results: list) -> Dict[str, float]: + """Compute the metrics from processed results. + + Args: + results (list): Testing results of the dataset. + + Returns: + dict[str: float]: Cityscapes evaluation results. + """ + logger: MMLogger = MMLogger.get_current_instance() + if self.format_only: + logger.info(f'results are saved to {osp.dirname(self.output_dir)}') + return OrderedDict() + + msg = 'Evaluating in Cityscapes style' + if logger is None: + msg = '\n' + msg + print_log(msg, logger=logger) + + eval_results = dict() + print_log( + f'Evaluating results under {self.output_dir} ...', logger=logger) + + CSEval.args.evalInstLevelScore = True + CSEval.args.predictionPath = osp.abspath(self.output_dir) + CSEval.args.evalPixelAccuracy = True + CSEval.args.JSONOutput = False + + pred_list, gt_list = zip(*results) + metric = dict() + eval_results.update( + CSEval.evaluateImgLists(pred_list, gt_list, CSEval.args)) + metric['averageScoreCategories'] = eval_results[ + 'averageScoreCategories'] + metric['averageScoreInstCategories'] = eval_results[ + 'averageScoreInstCategories'] + return metric + + @staticmethod + def _convert_to_label_id(result): + """Convert trainId to id for cityscapes.""" + if isinstance(result, str): + result = np.load(result) + result_copy = result.copy() + for trainId, label in CSLabels.trainId2label.items(): + result_copy[result == trainId] = label.id + + return result_copy diff --git a/mmseg/evaluation/metrics/depth_metric.py b/mmseg/evaluation/metrics/depth_metric.py new file mode 100644 index 0000000000000000000000000000000000000000..621d4a31c9fe69cdbf83790e8f320218f755557a --- /dev/null +++ b/mmseg/evaluation/metrics/depth_metric.py @@ -0,0 +1,212 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from collections import OrderedDict, defaultdict +from typing import Dict, List, Optional, Sequence + +import cv2 +import numpy as np +import torch +from mmengine.dist import is_main_process +from mmengine.evaluator import BaseMetric +from mmengine.logging import MMLogger, print_log +from mmengine.utils import mkdir_or_exist +from prettytable import PrettyTable +from torch import Tensor + +from mmseg.registry import METRICS + + +@METRICS.register_module() +class DepthMetric(BaseMetric): + """Depth estimation evaluation metric. + + Args: + depth_metrics (List[str], optional): List of metrics to compute. If + not specified, defaults to all metrics in self.METRICS. + min_depth_eval (float): Minimum depth value for evaluation. + Defaults to 0.0. + max_depth_eval (float): Maximum depth value for evaluation. + Defaults to infinity. + crop_type (str, optional): Specifies the type of cropping to be used + during evaluation. This option can affect how the evaluation mask + is generated. Currently, 'nyu_crop' is supported, but other + types can be added in future. Defaults to None if no cropping + should be applied. + depth_scale_factor (float): Factor to scale the depth values. + Defaults to 1.0. + collect_device (str): Device name used for collecting results from + different ranks during distributed training. Must be 'cpu' or + 'gpu'. Defaults to 'cpu'. + output_dir (str): The directory for output prediction. Defaults to + None. + format_only (bool): Only format result for results commit without + perform evaluation. It is useful when you want to save the result + to a specific format and submit it to the test server. + Defaults to False. + prefix (str, optional): The prefix that will be added in the metric + names to disambiguate homonymous metrics of different evaluators. + If prefix is not provided in the argument, self.default_prefix + will be used instead. Defaults to None. + """ + METRICS = ('d1', 'd2', 'd3', 'abs_rel', 'sq_rel', 'rmse', 'rmse_log', + 'log10', 'silog') + + def __init__(self, + depth_metrics: Optional[List[str]] = None, + min_depth_eval: float = 0.0, + max_depth_eval: float = float('inf'), + crop_type: Optional[str] = None, + depth_scale_factor: float = 1.0, + collect_device: str = 'cpu', + output_dir: Optional[str] = None, + format_only: bool = False, + prefix: Optional[str] = None, + **kwargs) -> None: + super().__init__(collect_device=collect_device, prefix=prefix) + + if depth_metrics is None: + self.metrics = self.METRICS + elif isinstance(depth_metrics, [tuple, list]): + for metric in depth_metrics: + assert metric in self.METRICS, f'the metric {metric} is not ' \ + f'supported. Please use metrics in {self.METRICS}' + self.metrics = depth_metrics + + # Validate crop_type, if provided + assert crop_type in [ + None, 'nyu_crop' + ], (f'Invalid value for crop_type: {crop_type}. Supported values are ' + 'None or \'nyu_crop\'.') + self.crop_type = crop_type + self.min_depth_eval = min_depth_eval + self.max_depth_eval = max_depth_eval + self.output_dir = output_dir + if self.output_dir and is_main_process(): + mkdir_or_exist(self.output_dir) + self.format_only = format_only + self.depth_scale_factor = depth_scale_factor + + def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: + """Process one batch of data and data_samples. + + The processed results should be stored in ``self.results``, which will + be used to compute the metrics when all batches have been processed. + + Args: + data_batch (dict): A batch of data from the dataloader. + data_samples (Sequence[dict]): A batch of outputs from the model. + """ + for data_sample in data_samples: + pred_label = data_sample['pred_depth_map']['data'].squeeze() + # format_only always for test dataset without ground truth + if not self.format_only: + gt_depth = data_sample['gt_depth_map']['data'].squeeze().to( + pred_label) + + eval_mask = self._get_eval_mask(gt_depth) + self.results.append( + (gt_depth[eval_mask], pred_label[eval_mask])) + # format_result + if self.output_dir is not None: + basename = osp.splitext(osp.basename( + data_sample['img_path']))[0] + png_filename = osp.abspath( + osp.join(self.output_dir, f'{basename}.png')) + output_mask = pred_label.cpu().numpy( + ) * self.depth_scale_factor + + cv2.imwrite(png_filename, output_mask.astype(np.uint16), + [cv2.IMWRITE_PNG_COMPRESSION, 0]) + + def _get_eval_mask(self, gt_depth: Tensor): + """Generates an evaluation mask based on ground truth depth and + cropping. + + Args: + gt_depth (Tensor): Ground truth depth map. + + Returns: + Tensor: Boolean mask where evaluation should be performed. + """ + valid_mask = torch.logical_and(gt_depth > self.min_depth_eval, + gt_depth < self.max_depth_eval) + + if self.crop_type == 'nyu_crop': + # this implementation is adapted from + # https://github.com/zhyever/Monocular-Depth-Estimation-Toolbox/blob/main/depth/datasets/nyu.py # noqa + crop_mask = torch.zeros_like(valid_mask) + crop_mask[45:471, 41:601] = 1 + else: + crop_mask = torch.ones_like(valid_mask) + + eval_mask = torch.logical_and(valid_mask, crop_mask) + return eval_mask + + @staticmethod + def _calc_all_metrics(gt_depth, pred_depth): + """Computes final evaluation metrics based on accumulated results.""" + assert gt_depth.shape == pred_depth.shape + + thresh = torch.max((gt_depth / pred_depth), (pred_depth / gt_depth)) + diff = pred_depth - gt_depth + diff_log = torch.log(pred_depth) - torch.log(gt_depth) + + d1 = torch.sum(thresh < 1.25).float() / len(thresh) + d2 = torch.sum(thresh < 1.25**2).float() / len(thresh) + d3 = torch.sum(thresh < 1.25**3).float() / len(thresh) + + abs_rel = torch.mean(torch.abs(diff) / gt_depth) + sq_rel = torch.mean(torch.pow(diff, 2) / gt_depth) + + rmse = torch.sqrt(torch.mean(torch.pow(diff, 2))) + rmse_log = torch.sqrt(torch.mean(torch.pow(diff_log, 2))) + + log10 = torch.mean( + torch.abs(torch.log10(pred_depth) - torch.log10(gt_depth))) + silog = torch.sqrt( + torch.pow(diff_log, 2).mean() - + 0.5 * torch.pow(diff_log.mean(), 2)) + + return { + 'd1': d1.item(), + 'd2': d2.item(), + 'd3': d3.item(), + 'abs_rel': abs_rel.item(), + 'sq_rel': sq_rel.item(), + 'rmse': rmse.item(), + 'rmse_log': rmse_log.item(), + 'log10': log10.item(), + 'silog': silog.item() + } + + def compute_metrics(self, results: list) -> Dict[str, float]: + """Compute the metrics from processed results. + + Args: + results (list): The processed results of each batch. + + Returns: + Dict[str, float]: The computed metrics. The keys are the names of + the metrics, and the values are corresponding results. The keys + are identical with self.metrics. + """ + logger: MMLogger = MMLogger.get_current_instance() + if self.format_only: + logger.info(f'results are saved to {osp.dirname(self.output_dir)}') + return OrderedDict() + + metrics = defaultdict(list) + for gt_depth, pred_depth in results: + for key, value in self._calc_all_metrics(gt_depth, + pred_depth).items(): + metrics[key].append(value) + metrics = {k: sum(metrics[k]) / len(metrics[k]) for k in self.metrics} + + table_data = PrettyTable() + for key, val in metrics.items(): + table_data.add_column(key, [round(val, 5)]) + + print_log('results:', logger) + print_log('\n' + table_data.get_string(), logger=logger) + + return metrics diff --git a/mmseg/evaluation/metrics/iou_metric.py b/mmseg/evaluation/metrics/iou_metric.py new file mode 100644 index 0000000000000000000000000000000000000000..16014c74001d7295f9fff8f03ef185077e3f613b --- /dev/null +++ b/mmseg/evaluation/metrics/iou_metric.py @@ -0,0 +1,286 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from collections import OrderedDict +from typing import Dict, List, Optional, Sequence + +import numpy as np +import torch +from mmengine.dist import is_main_process +from mmengine.evaluator import BaseMetric +from mmengine.logging import MMLogger, print_log +from mmengine.utils import mkdir_or_exist +from PIL import Image +from prettytable import PrettyTable + +from mmseg.registry import METRICS + + +@METRICS.register_module() +class IoUMetric(BaseMetric): + """IoU evaluation metric. + + Args: + ignore_index (int): Index that will be ignored in evaluation. + Default: 255. + iou_metrics (list[str] | str): Metrics to be calculated, the options + includes 'mIoU', 'mDice' and 'mFscore'. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + beta (int): Determines the weight of recall in the combined score. + Default: 1. + collect_device (str): Device name used for collecting results from + different ranks during distributed training. Must be 'cpu' or + 'gpu'. Defaults to 'cpu'. + output_dir (str): The directory for output prediction. Defaults to + None. + format_only (bool): Only format result for results commit without + perform evaluation. It is useful when you want to save the result + to a specific format and submit it to the test server. + Defaults to False. + prefix (str, optional): The prefix that will be added in the metric + names to disambiguate homonymous metrics of different evaluators. + If prefix is not provided in the argument, self.default_prefix + will be used instead. Defaults to None. + """ + + def __init__(self, + ignore_index: int = 255, + iou_metrics: List[str] = ['mIoU'], + nan_to_num: Optional[int] = None, + beta: int = 1, + collect_device: str = 'cpu', + output_dir: Optional[str] = None, + format_only: bool = False, + prefix: Optional[str] = None, + **kwargs) -> None: + super().__init__(collect_device=collect_device, prefix=prefix) + + self.ignore_index = ignore_index + self.metrics = iou_metrics + self.nan_to_num = nan_to_num + self.beta = beta + self.output_dir = output_dir + if self.output_dir and is_main_process(): + mkdir_or_exist(self.output_dir) + self.format_only = format_only + + def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: + """Process one batch of data and data_samples. + + The processed results should be stored in ``self.results``, which will + be used to compute the metrics when all batches have been processed. + + Args: + data_batch (dict): A batch of data from the dataloader. + data_samples (Sequence[dict]): A batch of outputs from the model. + """ + num_classes = len(self.dataset_meta['classes']) + for data_sample in data_samples: + pred_label = data_sample['pred_sem_seg']['data'].squeeze() + # format_only always for test dataset without ground truth + if not self.format_only: + label = data_sample['gt_sem_seg']['data'].squeeze().to( + pred_label) + self.results.append( + self.intersect_and_union(pred_label, label, num_classes, + self.ignore_index)) + # format_result + if self.output_dir is not None: + basename = osp.splitext(osp.basename( + data_sample['img_path']))[0] + png_filename = osp.abspath( + osp.join(self.output_dir, f'{basename}.png')) + output_mask = pred_label.cpu().numpy() + # The index range of official ADE20k dataset is from 0 to 150. + # But the index range of output is from 0 to 149. + # That is because we set reduce_zero_label=True. + if data_sample.get('reduce_zero_label', False): + output_mask = output_mask + 1 + output = Image.fromarray(output_mask.astype(np.uint8)) + output.save(png_filename) + + def compute_metrics(self, results: list) -> Dict[str, float]: + """Compute the metrics from processed results. + + Args: + results (list): The processed results of each batch. + + Returns: + Dict[str, float]: The computed metrics. The keys are the names of + the metrics, and the values are corresponding results. The key + mainly includes aAcc, mIoU, mAcc, mDice, mFscore, mPrecision, + mRecall. + """ + logger: MMLogger = MMLogger.get_current_instance() + if self.format_only: + logger.info(f'results are saved to {osp.dirname(self.output_dir)}') + return OrderedDict() + # convert list of tuples to tuple of lists, e.g. + # [(A_1, B_1, C_1, D_1), ..., (A_n, B_n, C_n, D_n)] to + # ([A_1, ..., A_n], ..., [D_1, ..., D_n]) + results = tuple(zip(*results)) + assert len(results) == 4 + + total_area_intersect = sum(results[0]) + total_area_union = sum(results[1]) + total_area_pred_label = sum(results[2]) + total_area_label = sum(results[3]) + ret_metrics = self.total_area_to_metrics( + total_area_intersect, total_area_union, total_area_pred_label, + total_area_label, self.metrics, self.nan_to_num, self.beta) + + class_names = self.dataset_meta['classes'] + + # summary table + ret_metrics_summary = OrderedDict({ + ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + metrics = dict() + for key, val in ret_metrics_summary.items(): + if key == 'aAcc': + metrics[key] = val + else: + metrics['m' + key] = val + + # each class table + ret_metrics.pop('aAcc', None) + ret_metrics_class = OrderedDict({ + ret_metric: np.round(ret_metric_value * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + ret_metrics_class.update({'Class': class_names}) + ret_metrics_class.move_to_end('Class', last=False) + class_table_data = PrettyTable() + for key, val in ret_metrics_class.items(): + class_table_data.add_column(key, val) + + print_log('per class results:', logger) + print_log('\n' + class_table_data.get_string(), logger=logger) + + return metrics + + @staticmethod + def intersect_and_union(pred_label: torch.tensor, label: torch.tensor, + num_classes: int, ignore_index: int): + """Calculate Intersection and Union. + + Args: + pred_label (torch.tensor): Prediction segmentation map + or predict result filename. The shape is (H, W). + label (torch.tensor): Ground truth segmentation map + or label filename. The shape is (H, W). + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + + Returns: + torch.Tensor: The intersection of prediction and ground truth + histogram on all classes. + torch.Tensor: The union of prediction and ground truth histogram on + all classes. + torch.Tensor: The prediction histogram on all classes. + torch.Tensor: The ground truth histogram on all classes. + """ + + mask = (label != ignore_index) + pred_label = pred_label[mask] + label = label[mask] + + intersect = pred_label[pred_label == label] + area_intersect = torch.histc( + intersect.float(), bins=(num_classes), min=0, + max=num_classes - 1).cpu() + area_pred_label = torch.histc( + pred_label.float(), bins=(num_classes), min=0, + max=num_classes - 1).cpu() + area_label = torch.histc( + label.float(), bins=(num_classes), min=0, + max=num_classes - 1).cpu() + area_union = area_pred_label + area_label - area_intersect + return area_intersect, area_union, area_pred_label, area_label + + @staticmethod + def total_area_to_metrics(total_area_intersect: np.ndarray, + total_area_union: np.ndarray, + total_area_pred_label: np.ndarray, + total_area_label: np.ndarray, + metrics: List[str] = ['mIoU'], + nan_to_num: Optional[int] = None, + beta: int = 1): + """Calculate evaluation metrics + Args: + total_area_intersect (np.ndarray): The intersection of prediction + and ground truth histogram on all classes. + total_area_union (np.ndarray): The union of prediction and ground + truth histogram on all classes. + total_area_pred_label (np.ndarray): The prediction histogram on + all classes. + total_area_label (np.ndarray): The ground truth histogram on + all classes. + metrics (List[str] | str): Metrics to be evaluated, 'mIoU' and + 'mDice'. + nan_to_num (int, optional): If specified, NaN values will be + replaced by the numbers defined by the user. Default: None. + beta (int): Determines the weight of recall in the combined score. + Default: 1. + Returns: + Dict[str, np.ndarray]: per category evaluation metrics, + shape (num_classes, ). + """ + + def f_score(precision, recall, beta=1): + """calculate the f-score value. + + Args: + precision (float | torch.Tensor): The precision value. + recall (float | torch.Tensor): The recall value. + beta (int): Determines the weight of recall in the combined + score. Default: 1. + + Returns: + [torch.tensor]: The f-score value. + """ + score = (1 + beta**2) * (precision * recall) / ( + (beta**2 * precision) + recall) + return score + + if isinstance(metrics, str): + metrics = [metrics] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metrics).issubset(set(allowed_metrics)): + raise KeyError(f'metrics {metrics} is not supported') + + all_acc = total_area_intersect.sum() / total_area_label.sum() + ret_metrics = OrderedDict({'aAcc': all_acc}) + for metric in metrics: + if metric == 'mIoU': + iou = total_area_intersect / total_area_union + acc = total_area_intersect / total_area_label + ret_metrics['IoU'] = iou + ret_metrics['Acc'] = acc + elif metric == 'mDice': + dice = 2 * total_area_intersect / ( + total_area_pred_label + total_area_label) + acc = total_area_intersect / total_area_label + ret_metrics['Dice'] = dice + ret_metrics['Acc'] = acc + elif metric == 'mFscore': + precision = total_area_intersect / total_area_pred_label + recall = total_area_intersect / total_area_label + f_value = torch.tensor([ + f_score(x[0], x[1], beta) for x in zip(precision, recall) + ]) + ret_metrics['Fscore'] = f_value + ret_metrics['Precision'] = precision + ret_metrics['Recall'] = recall + + ret_metrics = { + metric: value.numpy() + for metric, value in ret_metrics.items() + } + if nan_to_num is not None: + ret_metrics = OrderedDict({ + metric: np.nan_to_num(metric_value, nan=nan_to_num) + for metric, metric_value in ret_metrics.items() + }) + return ret_metrics diff --git a/mmseg/models/__init__.py b/mmseg/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a98951283c1ac4047c5f5ca3cdc827a43c42cf60 --- /dev/null +++ b/mmseg/models/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .assigners import * # noqa: F401,F403 +from .backbones import * # noqa: F401,F403 +from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, + build_head, build_loss, build_segmentor) +from .data_preprocessor import SegDataPreProcessor +from .decode_heads import * # noqa: F401,F403 +from .losses import * # noqa: F401,F403 +from .necks import * # noqa: F401,F403 +from .segmentors import * # noqa: F401,F403 +from .text_encoder import * # noqa: F401,F403 + +__all__ = [ + 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone', + 'build_head', 'build_loss', 'build_segmentor', 'SegDataPreProcessor' +] diff --git a/mmseg/models/__pycache__/__init__.cpython-39.pyc b/mmseg/models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d477164b12d261a2eb7fe7a546c0675234eb5bda Binary files /dev/null and b/mmseg/models/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/models/__pycache__/builder.cpython-39.pyc b/mmseg/models/__pycache__/builder.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07a30c48262f97e090174461e0f30c9d4ea8837d Binary files /dev/null and b/mmseg/models/__pycache__/builder.cpython-39.pyc differ diff --git a/mmseg/models/__pycache__/data_preprocessor.cpython-39.pyc b/mmseg/models/__pycache__/data_preprocessor.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7de87fa6f728a048ed4cffad5cce709fbc8f2e5 Binary files /dev/null and b/mmseg/models/__pycache__/data_preprocessor.cpython-39.pyc differ diff --git a/mmseg/models/assigners/__init__.py b/mmseg/models/assigners/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d49b1b18b9e3e6d4e3b19c48eb1c80cbb1205f69 --- /dev/null +++ b/mmseg/models/assigners/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base_assigner import BaseAssigner +from .hungarian_assigner import HungarianAssigner +from .match_cost import ClassificationCost, CrossEntropyLossCost, DiceCost + +__all__ = [ + 'BaseAssigner', + 'HungarianAssigner', + 'ClassificationCost', + 'CrossEntropyLossCost', + 'DiceCost', +] diff --git a/mmseg/models/assigners/__pycache__/__init__.cpython-39.pyc b/mmseg/models/assigners/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..afca105e825b2f7b85d230fd834b7269b6859c93 Binary files /dev/null and b/mmseg/models/assigners/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/models/assigners/__pycache__/base_assigner.cpython-39.pyc b/mmseg/models/assigners/__pycache__/base_assigner.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a55edbfd0dcdda392ad42fac9058c513ec5b52a Binary files /dev/null and b/mmseg/models/assigners/__pycache__/base_assigner.cpython-39.pyc differ diff --git a/mmseg/models/assigners/__pycache__/hungarian_assigner.cpython-39.pyc b/mmseg/models/assigners/__pycache__/hungarian_assigner.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..945a97c9150eb2cd5754cb715cb8957e3edf7826 Binary files /dev/null and b/mmseg/models/assigners/__pycache__/hungarian_assigner.cpython-39.pyc differ diff --git a/mmseg/models/assigners/__pycache__/match_cost.cpython-39.pyc b/mmseg/models/assigners/__pycache__/match_cost.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..968005e173dbb05c9b8a221bd941f5f0df9cd1bb Binary files /dev/null and b/mmseg/models/assigners/__pycache__/match_cost.cpython-39.pyc differ diff --git a/mmseg/models/assigners/base_assigner.py b/mmseg/models/assigners/base_assigner.py new file mode 100644 index 0000000000000000000000000000000000000000..97895cdac2789a62c3e8a381caaf944679f1e5a4 --- /dev/null +++ b/mmseg/models/assigners/base_assigner.py @@ -0,0 +1,18 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod +from typing import Optional + +from mmengine.structures import InstanceData + + +class BaseAssigner(metaclass=ABCMeta): + """Base assigner that assigns masks to ground truth class labels.""" + + @abstractmethod + def assign(self, + pred_instances: InstanceData, + gt_instances: InstanceData, + gt_instances_ignore: Optional[InstanceData] = None, + **kwargs): + """Assign masks to either a ground truth class label or a negative + label.""" diff --git a/mmseg/models/assigners/hungarian_assigner.py b/mmseg/models/assigners/hungarian_assigner.py new file mode 100644 index 0000000000000000000000000000000000000000..28868f0a04e7feaf3de20e39fac5059d789047d3 --- /dev/null +++ b/mmseg/models/assigners/hungarian_assigner.py @@ -0,0 +1,86 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Union + +import torch +from mmengine import ConfigDict +from mmengine.structures import InstanceData +from scipy.optimize import linear_sum_assignment +from torch.cuda.amp import autocast + +from mmseg.registry import TASK_UTILS +from .base_assigner import BaseAssigner + + +@TASK_UTILS.register_module() +class HungarianAssigner(BaseAssigner): + """Computes one-to-one matching between prediction masks and ground truth. + + This class uses bipartite matching-based assignment to computes an + assignment between the prediction masks and the ground truth. The + assignment result is based on the weighted sum of match costs. The + Hungarian algorithm is used to calculate the best matching with the + minimum cost. The prediction masks that are not matched are classified + as background. + + Args: + match_costs (ConfigDict|List[ConfigDict]): Match cost configs. + """ + + def __init__( + self, match_costs: Union[List[Union[dict, ConfigDict]], dict, + ConfigDict] + ) -> None: + + if isinstance(match_costs, dict): + match_costs = [match_costs] + elif isinstance(match_costs, list): + assert len(match_costs) > 0, \ + 'match_costs must not be a empty list.' + + self.match_costs = [ + TASK_UTILS.build(match_cost) for match_cost in match_costs + ] + + def assign(self, pred_instances: InstanceData, gt_instances: InstanceData, + **kwargs): + """Computes one-to-one matching based on the weighted costs. + + This method assign each query prediction to a ground truth or + background. The assignment first calculates the cost for each + category assigned to each query mask, and then uses the + Hungarian algorithm to calculate the minimum cost as the best + match. + + Args: + pred_instances (InstanceData): Instances of model + predictions. It includes "masks", with shape + (n, h, w) or (n, l), and "cls", with shape (n, num_classes+1) + gt_instances (InstanceData): Ground truth of instance + annotations. It includes "labels", with shape (k, ), + and "masks", with shape (k, h, w) or (k, l). + + Returns: + matched_quiery_inds (Tensor): The indexes of matched quieres. + matched_label_inds (Tensor): The indexes of matched labels. + """ + # compute weighted cost + cost_list = [] + with autocast(enabled=False): + for match_cost in self.match_costs: + cost = match_cost( + pred_instances=pred_instances, gt_instances=gt_instances) + cost_list.append(cost) + cost = torch.stack(cost_list).sum(dim=0) + + device = cost.device + # do Hungarian matching on CPU using linear_sum_assignment + cost = cost.detach().cpu() + if linear_sum_assignment is None: + raise ImportError('Please run "pip install scipy" ' + 'to install scipy first.') + + matched_quiery_inds, matched_label_inds = linear_sum_assignment(cost) + matched_quiery_inds = torch.from_numpy(matched_quiery_inds).to(device) + matched_label_inds = torch.from_numpy(matched_label_inds).to(device) + + return matched_quiery_inds, matched_label_inds diff --git a/mmseg/models/assigners/match_cost.py b/mmseg/models/assigners/match_cost.py new file mode 100644 index 0000000000000000000000000000000000000000..560df852902fa7a2167cc7cfdf86595bf8d6e3f8 --- /dev/null +++ b/mmseg/models/assigners/match_cost.py @@ -0,0 +1,231 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import abstractmethod +from typing import Union + +import torch +import torch.nn.functional as F +from mmengine.structures import InstanceData +from torch import Tensor + +from mmseg.registry import TASK_UTILS + + +class BaseMatchCost: + """Base match cost class. + + Args: + weight (Union[float, int]): Cost weight. Defaults to 1. + """ + + def __init__(self, weight: Union[float, int] = 1.) -> None: + self.weight = weight + + @abstractmethod + def __call__(self, pred_instances: InstanceData, + gt_instances: InstanceData, **kwargs) -> Tensor: + """Compute match cost. + + Args: + pred_instances (InstanceData): Instances of model predictions. + It often includes "labels" and "scores". + gt_instances (InstanceData): Ground truth of instance + annotations. It usually includes "labels". + + Returns: + Tensor: Match Cost matrix of shape (num_preds, num_gts). + """ + pass + + +@TASK_UTILS.register_module() +class ClassificationCost(BaseMatchCost): + """ClsSoftmaxCost. + + Args: + weight (Union[float, int]): Cost weight. Defaults to 1. + + Examples: + >>> from mmseg.models.assigners import ClassificationCost + >>> import torch + >>> self = ClassificationCost() + >>> cls_pred = torch.rand(4, 3) + >>> gt_labels = torch.tensor([0, 1, 2]) + >>> factor = torch.tensor([10, 8, 10, 8]) + >>> self(cls_pred, gt_labels) + tensor([[-0.3430, -0.3525, -0.3045], + [-0.3077, -0.2931, -0.3992], + [-0.3664, -0.3455, -0.2881], + [-0.3343, -0.2701, -0.3956]]) + """ + + def __init__(self, weight: Union[float, int] = 1) -> None: + super().__init__(weight=weight) + + def __call__(self, pred_instances: InstanceData, + gt_instances: InstanceData, **kwargs) -> Tensor: + """Compute match cost. + + Args: + pred_instances (InstanceData): "scores" inside is + predicted classification logits, of shape + (num_queries, num_class). + gt_instances (InstanceData): "labels" inside should have + shape (num_gt, ). + + Returns: + Tensor: Match Cost matrix of shape (num_preds, num_gts). + """ + assert hasattr(pred_instances, 'scores'), \ + "pred_instances must contain 'scores'" + assert hasattr(gt_instances, 'labels'), \ + "gt_instances must contain 'labels'" + pred_scores = pred_instances.scores + gt_labels = gt_instances.labels + + pred_scores = pred_scores.softmax(-1) + cls_cost = -pred_scores[:, gt_labels] + + return cls_cost * self.weight + + +@TASK_UTILS.register_module() +class DiceCost(BaseMatchCost): + """Cost of mask assignments based on dice losses. + + Args: + pred_act (bool): Whether to apply sigmoid to mask_pred. + Defaults to False. + eps (float): Defaults to 1e-3. + naive_dice (bool): If True, use the naive dice loss + in which the power of the number in the denominator is + the first power. If False, use the second power that + is adopted by K-Net and SOLO. Defaults to True. + weight (Union[float, int]): Cost weight. Defaults to 1. + """ + + def __init__(self, + pred_act: bool = False, + eps: float = 1e-3, + naive_dice: bool = True, + weight: Union[float, int] = 1.) -> None: + super().__init__(weight=weight) + self.pred_act = pred_act + self.eps = eps + self.naive_dice = naive_dice + + def _binary_mask_dice_loss(self, mask_preds: Tensor, + gt_masks: Tensor) -> Tensor: + """ + Args: + mask_preds (Tensor): Mask prediction in shape (num_queries, *). + gt_masks (Tensor): Ground truth in shape (num_gt, *) + store 0 or 1, 0 for negative class and 1 for + positive class. + + Returns: + Tensor: Dice cost matrix in shape (num_queries, num_gt). + """ + mask_preds = mask_preds.flatten(1) + gt_masks = gt_masks.flatten(1).float() + numerator = 2 * torch.einsum('nc,mc->nm', mask_preds, gt_masks) + if self.naive_dice: + denominator = mask_preds.sum(-1)[:, None] + \ + gt_masks.sum(-1)[None, :] + else: + denominator = mask_preds.pow(2).sum(1)[:, None] + \ + gt_masks.pow(2).sum(1)[None, :] + loss = 1 - (numerator + self.eps) / (denominator + self.eps) + return loss + + def __call__(self, pred_instances: InstanceData, + gt_instances: InstanceData, **kwargs) -> Tensor: + """Compute match cost. + + Args: + pred_instances (InstanceData): Predicted instances which + must contain "masks". + gt_instances (InstanceData): Ground truth which must contain + "mask". + + Returns: + Tensor: Match Cost matrix of shape (num_preds, num_gts). + """ + assert hasattr(pred_instances, 'masks'), \ + "pred_instances must contain 'masks'" + assert hasattr(gt_instances, 'masks'), \ + "gt_instances must contain 'masks'" + pred_masks = pred_instances.masks + gt_masks = gt_instances.masks + + if self.pred_act: + pred_masks = pred_masks.sigmoid() + dice_cost = self._binary_mask_dice_loss(pred_masks, gt_masks) + return dice_cost * self.weight + + +@TASK_UTILS.register_module() +class CrossEntropyLossCost(BaseMatchCost): + """CrossEntropyLossCost. + + Args: + use_sigmoid (bool): Whether the prediction uses sigmoid + of softmax. Defaults to True. + weight (Union[float, int]): Cost weight. Defaults to 1. + """ + + def __init__(self, + use_sigmoid: bool = True, + weight: Union[float, int] = 1.) -> None: + super().__init__(weight=weight) + self.use_sigmoid = use_sigmoid + + def _binary_cross_entropy(self, cls_pred: Tensor, + gt_labels: Tensor) -> Tensor: + """ + Args: + cls_pred (Tensor): The prediction with shape (num_queries, 1, *) or + (num_queries, *). + gt_labels (Tensor): The learning label of prediction with + shape (num_gt, *). + + Returns: + Tensor: Cross entropy cost matrix in shape (num_queries, num_gt). + """ + cls_pred = cls_pred.flatten(1).float() + gt_labels = gt_labels.flatten(1).float() + n = cls_pred.shape[1] + pos = F.binary_cross_entropy_with_logits( + cls_pred, torch.ones_like(cls_pred), reduction='none') + neg = F.binary_cross_entropy_with_logits( + cls_pred, torch.zeros_like(cls_pred), reduction='none') + cls_cost = torch.einsum('nc,mc->nm', pos, gt_labels) + \ + torch.einsum('nc,mc->nm', neg, 1 - gt_labels) + cls_cost = cls_cost / n + + return cls_cost + + def __call__(self, pred_instances: InstanceData, + gt_instances: InstanceData, **kwargs) -> Tensor: + """Compute match cost. + + Args: + pred_instances (:obj:`InstanceData`): Predicted instances which + must contain ``masks``. + gt_instances (:obj:`InstanceData`): Ground truth which must contain + ``masks``. + + Returns: + Tensor: Match Cost matrix of shape (num_preds, num_gts). + """ + assert hasattr(pred_instances, 'masks'), \ + "pred_instances must contain 'masks'" + assert hasattr(gt_instances, 'masks'), \ + "gt_instances must contain 'masks'" + pred_masks = pred_instances.masks + gt_masks = gt_instances.masks + if self.use_sigmoid: + cls_cost = self._binary_cross_entropy(pred_masks, gt_masks) + else: + raise NotImplementedError + + return cls_cost * self.weight diff --git a/mmseg/models/backbones/__init__.py b/mmseg/models/backbones/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f0661725f947176b6ae18cb9eaa684f94753a93 --- /dev/null +++ b/mmseg/models/backbones/__init__.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .beit import BEiT +from .bisenetv1 import BiSeNetV1 +from .bisenetv2 import BiSeNetV2 +from .cgnet import CGNet +from .ddrnet import DDRNet +from .erfnet import ERFNet +from .fast_scnn import FastSCNN +from .hrnet import HRNet +from .icnet import ICNet +from .mae import MAE +from .mit import MixVisionTransformer +from .mobilenet_v2 import MobileNetV2 +from .mobilenet_v3 import MobileNetV3 +from .mscan import MSCAN +from .pidnet import PIDNet +from .resnest import ResNeSt +from .resnet import ResNet, ResNetV1c, ResNetV1d +from .resnext import ResNeXt +from .stdc import STDCContextPathNet, STDCNet +from .swin import SwinTransformer +from .timm_backbone import TIMMBackbone +from .twins import PCPVT, SVT +from .unet import UNet +from .vit import VisionTransformer +from .vpd import VPD +from .snnet import SNNetv1, SNNetv2 + +__all__ = [ + 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN', + 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', + 'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer', + 'BiSeNetV1', 'BiSeNetV2', 'ICNet', 'TIMMBackbone', 'ERFNet', 'PCPVT', + 'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE', 'PIDNet', 'MSCAN', + 'DDRNet', 'VPD', 'SNNetv1', 'SNNetv2', +] diff --git a/mmseg/models/backbones/__pycache__/__init__.cpython-39.pyc b/mmseg/models/backbones/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e5ab2c71a90c7db849602ef5a4040b37766c907 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/beit.cpython-39.pyc b/mmseg/models/backbones/__pycache__/beit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae8d1d869abc959af2e690002865fd2f14695df8 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/beit.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/bisenetv1.cpython-39.pyc b/mmseg/models/backbones/__pycache__/bisenetv1.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ec2090cb08a3fd99b4dfd9584bad5fa17f77529c Binary files /dev/null and b/mmseg/models/backbones/__pycache__/bisenetv1.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/bisenetv2.cpython-39.pyc b/mmseg/models/backbones/__pycache__/bisenetv2.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..376027027fc67618fb2efe2c8671004f82edacfc Binary files /dev/null and b/mmseg/models/backbones/__pycache__/bisenetv2.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/cgnet.cpython-39.pyc b/mmseg/models/backbones/__pycache__/cgnet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3fa13a35e959bd83eeff54a548a707c884c7830 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/cgnet.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/ddrnet.cpython-39.pyc b/mmseg/models/backbones/__pycache__/ddrnet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e8c2120607d6907c1e67a9b41d3577897cf36fa Binary files /dev/null and b/mmseg/models/backbones/__pycache__/ddrnet.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/erfnet.cpython-39.pyc b/mmseg/models/backbones/__pycache__/erfnet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f56733e53302185f857ae69295decd6c71464fe Binary files /dev/null and b/mmseg/models/backbones/__pycache__/erfnet.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/fast_scnn.cpython-39.pyc b/mmseg/models/backbones/__pycache__/fast_scnn.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..efa209eeeb37ee3bcd18a7ab088b37e56489505a Binary files /dev/null and b/mmseg/models/backbones/__pycache__/fast_scnn.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/hrnet.cpython-39.pyc b/mmseg/models/backbones/__pycache__/hrnet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3641dc92c83336a8b427e77ace43636545d17061 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/hrnet.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/icnet.cpython-39.pyc b/mmseg/models/backbones/__pycache__/icnet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09d14981a0e5d89a63ca5fb34ad3b9abfcf36db5 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/icnet.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/lora.cpython-39.pyc b/mmseg/models/backbones/__pycache__/lora.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ecd3a2d9e6bfe553f2d50106b6604b19364d77cb Binary files /dev/null and b/mmseg/models/backbones/__pycache__/lora.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/mae.cpython-39.pyc b/mmseg/models/backbones/__pycache__/mae.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b01b809040d2c491ca101ff9daf12141c323244d Binary files /dev/null and b/mmseg/models/backbones/__pycache__/mae.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/mit.cpython-39.pyc b/mmseg/models/backbones/__pycache__/mit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f7ad6ce5da9171edffb07069e53fb4596f0e121c Binary files /dev/null and b/mmseg/models/backbones/__pycache__/mit.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/mobilenet_v2.cpython-39.pyc b/mmseg/models/backbones/__pycache__/mobilenet_v2.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..68015210b9e3a6ff31b1ba073916a2b3cb0ef925 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/mobilenet_v2.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/mobilenet_v3.cpython-39.pyc b/mmseg/models/backbones/__pycache__/mobilenet_v3.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb992d842e3ad3ff95ca0cbc0ca780f4dc377bc0 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/mobilenet_v3.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/mscan.cpython-39.pyc b/mmseg/models/backbones/__pycache__/mscan.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..18816613fd56d0550a3f7c5b0764aaeea5a5f61a Binary files /dev/null and b/mmseg/models/backbones/__pycache__/mscan.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/pidnet.cpython-39.pyc b/mmseg/models/backbones/__pycache__/pidnet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..579fa65839af83d9f63f2f0b2cd8bb3fd3f28e8a Binary files /dev/null and b/mmseg/models/backbones/__pycache__/pidnet.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/resnest.cpython-39.pyc b/mmseg/models/backbones/__pycache__/resnest.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3640e73f94cccb65678e9f0bc1cbc41d3e984427 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/resnest.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/resnet.cpython-39.pyc b/mmseg/models/backbones/__pycache__/resnet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b5031e7900273c3fe8105821b717526d2ee5552 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/resnet.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/resnext.cpython-39.pyc b/mmseg/models/backbones/__pycache__/resnext.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7b5084c7f296f8fb51d93df455e1ef4d37bfba3 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/resnext.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/snnet.cpython-39.pyc b/mmseg/models/backbones/__pycache__/snnet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd15b8e590f92ed127291db96fca34738265dab9 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/snnet.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/stdc.cpython-39.pyc b/mmseg/models/backbones/__pycache__/stdc.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..14e8f33307515499e52e34d09645913c873df531 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/stdc.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/swin.cpython-39.pyc b/mmseg/models/backbones/__pycache__/swin.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13cf474d4982801ccb5742b9f5be7c825e68df5c Binary files /dev/null and b/mmseg/models/backbones/__pycache__/swin.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/timm_backbone.cpython-39.pyc b/mmseg/models/backbones/__pycache__/timm_backbone.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a881664623141d49eeab60c66cd09f7acd2d371f Binary files /dev/null and b/mmseg/models/backbones/__pycache__/timm_backbone.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/twins.cpython-39.pyc b/mmseg/models/backbones/__pycache__/twins.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4697be79c63cca894de3e89533114b8c524b7271 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/twins.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/unet.cpython-39.pyc b/mmseg/models/backbones/__pycache__/unet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..900520216a42b0a0fd55ea1a9109ce4f2563acd2 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/unet.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/vit.cpython-39.pyc b/mmseg/models/backbones/__pycache__/vit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b642a2e511a1cab5b10e7fc8817ce5dc7bd0881 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/vit.cpython-39.pyc differ diff --git a/mmseg/models/backbones/__pycache__/vpd.cpython-39.pyc b/mmseg/models/backbones/__pycache__/vpd.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fed08a95107481fbf163aab49dc7599228f1dab1 Binary files /dev/null and b/mmseg/models/backbones/__pycache__/vpd.cpython-39.pyc differ diff --git a/mmseg/models/backbones/beit.py b/mmseg/models/backbones/beit.py new file mode 100644 index 0000000000000000000000000000000000000000..e5da71e729256a9dd12b70d32886c9db27d9fa3c --- /dev/null +++ b/mmseg/models/backbones/beit.py @@ -0,0 +1,554 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import build_norm_layer +from mmcv.cnn.bricks.drop import build_dropout +from mmengine.model import BaseModule, ModuleList +from mmengine.model.weight_init import (constant_init, kaiming_init, + trunc_normal_) +from mmengine.runner.checkpoint import _load_checkpoint +from scipy import interpolate +from torch.nn.modules.batchnorm import _BatchNorm +from torch.nn.modules.utils import _pair as to_2tuple + +from mmseg.registry import MODELS +from ..utils import PatchEmbed +from .vit import TransformerEncoderLayer as VisionTransformerEncoderLayer + + +class BEiTAttention(BaseModule): + """Window based multi-head self-attention (W-MSA) module with relative + position bias. + + Args: + embed_dims (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (tuple[int]): The height and width of the window. + bias (bool): The option to add leanable bias for q, k, v. If bias is + True, it will add leanable bias. If bias is 'qv_bias', it will only + add leanable bias for q, v. If bias is False, it will not add bias + for q, k, v. Default to 'qv_bias'. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + attn_drop_rate (float): Dropout ratio of attention weight. + Default: 0.0 + proj_drop_rate (float): Dropout ratio of output. Default: 0. + init_cfg (dict | None, optional): The Config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + window_size, + bias='qv_bias', + qk_scale=None, + attn_drop_rate=0., + proj_drop_rate=0., + init_cfg=None, + **kwargs): + super().__init__(init_cfg=init_cfg) + self.embed_dims = embed_dims + self.num_heads = num_heads + head_embed_dims = embed_dims // num_heads + self.bias = bias + self.scale = qk_scale or head_embed_dims**-0.5 + + qkv_bias = bias + if bias == 'qv_bias': + self._init_qv_bias() + qkv_bias = False + + self.window_size = window_size + self._init_rel_pos_embedding() + + self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop_rate) + self.proj = nn.Linear(embed_dims, embed_dims) + self.proj_drop = nn.Dropout(proj_drop_rate) + + def _init_qv_bias(self): + self.q_bias = nn.Parameter(torch.zeros(self.embed_dims)) + self.v_bias = nn.Parameter(torch.zeros(self.embed_dims)) + + def _init_rel_pos_embedding(self): + Wh, Ww = self.window_size + # cls to token & token 2 cls & cls to cls + self.num_relative_distance = (2 * Wh - 1) * (2 * Ww - 1) + 3 + # relative_position_bias_table shape is (2*Wh-1 * 2*Ww-1 + 3, nH) + self.relative_position_bias_table = nn.Parameter( + torch.zeros(self.num_relative_distance, self.num_heads)) + + # get pair-wise relative position index for + # each token inside the window + coords_h = torch.arange(Wh) + coords_w = torch.arange(Ww) + # coords shape is (2, Wh, Ww) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) + # coords_flatten shape is (2, Wh*Ww) + coords_flatten = torch.flatten(coords, 1) + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :]) + # relative_coords shape is (Wh*Ww, Wh*Ww, 2) + relative_coords = relative_coords.permute(1, 2, 0).contiguous() + # shift to start from 0 + relative_coords[:, :, 0] += Wh - 1 + relative_coords[:, :, 1] += Ww - 1 + relative_coords[:, :, 0] *= 2 * Ww - 1 + relative_position_index = torch.zeros( + size=(Wh * Ww + 1, ) * 2, dtype=relative_coords.dtype) + # relative_position_index shape is (Wh*Ww, Wh*Ww) + relative_position_index[1:, 1:] = relative_coords.sum(-1) + relative_position_index[0, 0:] = self.num_relative_distance - 3 + relative_position_index[0:, 0] = self.num_relative_distance - 2 + relative_position_index[0, 0] = self.num_relative_distance - 1 + + self.register_buffer('relative_position_index', + relative_position_index) + + def init_weights(self): + trunc_normal_(self.relative_position_bias_table, std=0.02) + + def forward(self, x): + """ + Args: + x (tensor): input features with shape of (num_windows*B, N, C). + """ + B, N, C = x.shape + + if self.bias == 'qv_bias': + k_bias = torch.zeros_like(self.v_bias, requires_grad=False) + qkv_bias = torch.cat((self.q_bias, k_bias, self.v_bias)) + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + else: + qkv = self.qkv(x) + + qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + if self.relative_position_bias_table is not None: + Wh = self.window_size[0] + Ww = self.window_size[1] + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1)].view( + Wh * Ww + 1, Wh * Ww + 1, -1) + relative_position_bias = relative_position_bias.permute( + 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class BEiTTransformerEncoderLayer(VisionTransformerEncoderLayer): + """Implements one encoder layer in Vision Transformer. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + attn_drop_rate (float): The drop out rate for attention layer. + Default: 0.0. + drop_path_rate (float): Stochastic depth rate. Default 0.0. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + bias (bool): The option to add leanable bias for q, k, v. If bias is + True, it will add leanable bias. If bias is 'qv_bias', it will only + add leanable bias for q, v. If bias is False, it will not add bias + for q, k, v. Default to 'qv_bias'. + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + window_size (tuple[int], optional): The height and width of the window. + Default: None. + init_values (float, optional): Initialize the values of BEiTAttention + and FFN with learnable scaling. Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + attn_drop_rate=0., + drop_path_rate=0., + num_fcs=2, + bias='qv_bias', + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + window_size=None, + attn_cfg=dict(), + ffn_cfg=dict(add_identity=False), + init_values=None): + attn_cfg.update(dict(window_size=window_size, qk_scale=None)) + + super().__init__( + embed_dims=embed_dims, + num_heads=num_heads, + feedforward_channels=feedforward_channels, + attn_drop_rate=attn_drop_rate, + drop_path_rate=0., + drop_rate=0., + num_fcs=num_fcs, + qkv_bias=bias, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + attn_cfg=attn_cfg, + ffn_cfg=ffn_cfg) + + # NOTE: drop path for stochastic depth, we shall see if + # this is better than dropout here + dropout_layer = dict(type='DropPath', drop_prob=drop_path_rate) + self.drop_path = build_dropout( + dropout_layer) if dropout_layer else nn.Identity() + self.gamma_1 = nn.Parameter( + init_values * torch.ones(embed_dims), requires_grad=True) + self.gamma_2 = nn.Parameter( + init_values * torch.ones(embed_dims), requires_grad=True) + + def build_attn(self, attn_cfg): + self.attn = BEiTAttention(**attn_cfg) + + def forward(self, x): + x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x))) + x = x + self.drop_path(self.gamma_2 * self.ffn(self.norm2(x))) + return x + + +@MODELS.register_module() +class BEiT(BaseModule): + """BERT Pre-Training of Image Transformers. + + Args: + img_size (int | tuple): Input image size. Default: 224. + patch_size (int): The patch size. Default: 16. + in_channels (int): Number of input channels. Default: 3. + embed_dims (int): Embedding dimension. Default: 768. + num_layers (int): Depth of transformer. Default: 12. + num_heads (int): Number of attention heads. Default: 12. + mlp_ratio (int): Ratio of mlp hidden dim to embedding dim. + Default: 4. + out_indices (list | tuple | int): Output from which stages. + Default: -1. + qv_bias (bool): Enable bias for qv if True. Default: True. + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): Stochastic depth rate. Default 0.0. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + patch_norm (bool): Whether to add a norm in PatchEmbed Block. + Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + pretrained (str, optional): Model pretrained path. Default: None. + init_values (float): Initialize the values of BEiTAttention and FFN + with learnable scaling. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=-1, + qv_bias=True, + attn_drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + patch_norm=False, + final_norm=False, + num_fcs=2, + norm_eval=False, + pretrained=None, + init_values=0.1, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + if isinstance(img_size, int): + img_size = to_2tuple(img_size) + elif isinstance(img_size, tuple): + if len(img_size) == 1: + img_size = to_2tuple(img_size[0]) + assert len(img_size) == 2, \ + f'The size of image should have length 1 or 2, ' \ + f'but got {len(img_size)}' + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + + self.in_channels = in_channels + self.img_size = img_size + self.patch_size = patch_size + self.norm_eval = norm_eval + self.pretrained = pretrained + self.num_layers = num_layers + self.embed_dims = embed_dims + self.num_heads = num_heads + self.mlp_ratio = mlp_ratio + self.attn_drop_rate = attn_drop_rate + self.drop_path_rate = drop_path_rate + self.num_fcs = num_fcs + self.qv_bias = qv_bias + self.act_cfg = act_cfg + self.norm_cfg = norm_cfg + self.patch_norm = patch_norm + self.init_values = init_values + self.window_size = (img_size[0] // patch_size, + img_size[1] // patch_size) + self.patch_shape = self.window_size + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims)) + + self._build_patch_embedding() + self._build_layers() + + if isinstance(out_indices, int): + if out_indices == -1: + out_indices = num_layers - 1 + self.out_indices = [out_indices] + elif isinstance(out_indices, list) or isinstance(out_indices, tuple): + self.out_indices = out_indices + else: + raise TypeError('out_indices must be type of int, list or tuple') + + self.final_norm = final_norm + if final_norm: + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, embed_dims, postfix=1) + self.add_module(self.norm1_name, norm1) + + def _build_patch_embedding(self): + """Build patch embedding layer.""" + self.patch_embed = PatchEmbed( + in_channels=self.in_channels, + embed_dims=self.embed_dims, + conv_type='Conv2d', + kernel_size=self.patch_size, + stride=self.patch_size, + padding=0, + norm_cfg=self.norm_cfg if self.patch_norm else None, + init_cfg=None) + + def _build_layers(self): + """Build transformer encoding layers.""" + + dpr = [ + x.item() + for x in torch.linspace(0, self.drop_path_rate, self.num_layers) + ] + self.layers = ModuleList() + for i in range(self.num_layers): + self.layers.append( + BEiTTransformerEncoderLayer( + embed_dims=self.embed_dims, + num_heads=self.num_heads, + feedforward_channels=self.mlp_ratio * self.embed_dims, + attn_drop_rate=self.attn_drop_rate, + drop_path_rate=dpr[i], + num_fcs=self.num_fcs, + bias='qv_bias' if self.qv_bias else False, + act_cfg=self.act_cfg, + norm_cfg=self.norm_cfg, + window_size=self.window_size, + init_values=self.init_values)) + + @property + def norm1(self): + return getattr(self, self.norm1_name) + + def _geometric_sequence_interpolation(self, src_size, dst_size, sequence, + num): + """Get new sequence via geometric sequence interpolation. + + Args: + src_size (int): Pos_embedding size in pre-trained model. + dst_size (int): Pos_embedding size in the current model. + sequence (tensor): The relative position bias of the pretrain + model after removing the extra tokens. + num (int): Number of attention heads. + Returns: + new_sequence (tensor): Geometric sequence interpolate the + pre-trained relative position bias to the size of + the current model. + """ + + def geometric_progression(a, r, n): + return a * (1.0 - r**n) / (1.0 - r) + + # Here is a binary function. + left, right = 1.01, 1.5 + while right - left > 1e-6: + q = (left + right) / 2.0 + gp = geometric_progression(1, q, src_size // 2) + if gp > dst_size // 2: + right = q + else: + left = q + # The position of each interpolated point is determined + # by the ratio obtained by dichotomy. + dis = [] + cur = 1 + for i in range(src_size // 2): + dis.append(cur) + cur += q**(i + 1) + r_ids = [-_ for _ in reversed(dis)] + x = r_ids + [0] + dis + y = r_ids + [0] + dis + t = dst_size // 2.0 + dx = np.arange(-t, t + 0.1, 1.0) + dy = np.arange(-t, t + 0.1, 1.0) + # Interpolation functions are being executed and called. + new_sequence = [] + for i in range(num): + z = sequence[:, i].view(src_size, src_size).float().numpy() + f = interpolate.interp2d(x, y, z, kind='cubic') + new_sequence.append( + torch.Tensor(f(dx, dy)).contiguous().view(-1, 1).to(sequence)) + new_sequence = torch.cat(new_sequence, dim=-1) + return new_sequence + + def resize_rel_pos_embed(self, checkpoint): + """Resize relative pos_embed weights. + + This function is modified from + https://github.com/microsoft/unilm/blob/master/beit/semantic_segmentation/mmcv_custom/checkpoint.py. # noqa: E501 + Copyright (c) Microsoft Corporation + Licensed under the MIT License + Args: + checkpoint (dict): Key and value of the pretrain model. + Returns: + state_dict (dict): Interpolate the relative pos_embed weights + in the pre-train model to the current model size. + """ + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + all_keys = list(state_dict.keys()) + for key in all_keys: + if 'relative_position_index' in key: + state_dict.pop(key) + # In order to keep the center of pos_bias as consistent as + # possible after interpolation, and vice versa in the edge + # area, the geometric sequence interpolation method is adopted. + if 'relative_position_bias_table' in key: + rel_pos_bias = state_dict[key] + src_num_pos, num_attn_heads = rel_pos_bias.size() + dst_num_pos, _ = self.state_dict()[key].size() + dst_patch_shape = self.patch_shape + if dst_patch_shape[0] != dst_patch_shape[1]: + raise NotImplementedError() + # Count the number of extra tokens. + num_extra_tokens = dst_num_pos - ( + dst_patch_shape[0] * 2 - 1) * ( + dst_patch_shape[1] * 2 - 1) + src_size = int((src_num_pos - num_extra_tokens)**0.5) + dst_size = int((dst_num_pos - num_extra_tokens)**0.5) + if src_size != dst_size: + extra_tokens = rel_pos_bias[-num_extra_tokens:, :] + rel_pos_bias = rel_pos_bias[:-num_extra_tokens, :] + new_rel_pos_bias = self._geometric_sequence_interpolation( + src_size, dst_size, rel_pos_bias, num_attn_heads) + new_rel_pos_bias = torch.cat( + (new_rel_pos_bias, extra_tokens), dim=0) + state_dict[key] = new_rel_pos_bias + + return state_dict + + def init_weights(self): + + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + self.apply(_init_weights) + + if (isinstance(self.init_cfg, dict) + and self.init_cfg.get('type') == 'Pretrained'): + checkpoint = _load_checkpoint( + self.init_cfg['checkpoint'], logger=None, map_location='cpu') + state_dict = self.resize_rel_pos_embed(checkpoint) + self.load_state_dict(state_dict, False) + elif self.init_cfg is not None: + super().init_weights() + else: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + # Copyright 2019 Ross Wightman + # Licensed under the Apache License, Version 2.0 (the "License") + trunc_normal_(self.cls_token, std=.02) + for n, m in self.named_modules(): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + if 'ffn' in n: + nn.init.normal_(m.bias, mean=0., std=1e-6) + else: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Conv2d): + kaiming_init(m, mode='fan_in', bias=0.) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m, val=1.0, bias=0.) + + def forward(self, inputs): + B = inputs.shape[0] + + x, hw_shape = self.patch_embed(inputs) + + # stole cls_tokens impl from Phil Wang, thanks + cls_tokens = self.cls_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + + outs = [] + for i, layer in enumerate(self.layers): + x = layer(x) + if i == len(self.layers) - 1: + if self.final_norm: + x = self.norm1(x) + if i in self.out_indices: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + B, _, C = out.shape + out = out.reshape(B, hw_shape[0], hw_shape[1], + C).permute(0, 3, 1, 2).contiguous() + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + super().train(mode) + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, nn.LayerNorm): + m.eval() diff --git a/mmseg/models/backbones/bisenetv1.py b/mmseg/models/backbones/bisenetv1.py new file mode 100644 index 0000000000000000000000000000000000000000..ca58bf9c597836937bc384739ff77001b5402942 --- /dev/null +++ b/mmseg/models/backbones/bisenetv1.py @@ -0,0 +1,332 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule + +from mmseg.registry import MODELS +from ..utils import resize + + +class SpatialPath(BaseModule): + """Spatial Path to preserve the spatial size of the original input image + and encode affluent spatial information. + + Args: + in_channels(int): The number of channels of input + image. Default: 3. + num_channels (Tuple[int]): The number of channels of + each layers in Spatial Path. + Default: (64, 64, 64, 128). + Returns: + x (torch.Tensor): Feature map for Feature Fusion Module. + """ + + def __init__(self, + in_channels=3, + num_channels=(64, 64, 64, 128), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + assert len(num_channels) == 4, 'Length of input channels \ + of Spatial Path must be 4!' + + self.layers = [] + for i in range(len(num_channels)): + layer_name = f'layer{i + 1}' + self.layers.append(layer_name) + if i == 0: + self.add_module( + layer_name, + ConvModule( + in_channels=in_channels, + out_channels=num_channels[i], + kernel_size=7, + stride=2, + padding=3, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + elif i == len(num_channels) - 1: + self.add_module( + layer_name, + ConvModule( + in_channels=num_channels[i - 1], + out_channels=num_channels[i], + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + else: + self.add_module( + layer_name, + ConvModule( + in_channels=num_channels[i - 1], + out_channels=num_channels[i], + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, x): + for i, layer_name in enumerate(self.layers): + layer_stage = getattr(self, layer_name) + x = layer_stage(x) + return x + + +class AttentionRefinementModule(BaseModule): + """Attention Refinement Module (ARM) to refine the features of each stage. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + Returns: + x_out (torch.Tensor): Feature map of Attention Refinement Module. + """ + + def __init__(self, + in_channels, + out_channel, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.conv_layer = ConvModule( + in_channels=in_channels, + out_channels=out_channel, + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.atten_conv_layer = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + in_channels=out_channel, + out_channels=out_channel, + kernel_size=1, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None), nn.Sigmoid()) + + def forward(self, x): + x = self.conv_layer(x) + x_atten = self.atten_conv_layer(x) + x_out = x * x_atten + return x_out + + +class ContextPath(BaseModule): + """Context Path to provide sufficient receptive field. + + Args: + backbone_cfg:(dict): Config of backbone of + Context Path. + context_channels (Tuple[int]): The number of channel numbers + of various modules in Context Path. + Default: (128, 256, 512). + align_corners (bool, optional): The align_corners argument of + resize operation. Default: False. + Returns: + x_16_up, x_32_up (torch.Tensor, torch.Tensor): Two feature maps + undergoing upsampling from 1/16 and 1/32 downsampling + feature maps. These two feature maps are used for Feature + Fusion Module and Auxiliary Head. + """ + + def __init__(self, + backbone_cfg, + context_channels=(128, 256, 512), + align_corners=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + assert len(context_channels) == 3, 'Length of input channels \ + of Context Path must be 3!' + + self.backbone = MODELS.build(backbone_cfg) + + self.align_corners = align_corners + self.arm16 = AttentionRefinementModule(context_channels[1], + context_channels[0]) + self.arm32 = AttentionRefinementModule(context_channels[2], + context_channels[0]) + self.conv_head32 = ConvModule( + in_channels=context_channels[0], + out_channels=context_channels[0], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv_head16 = ConvModule( + in_channels=context_channels[0], + out_channels=context_channels[0], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.gap_conv = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + in_channels=context_channels[2], + out_channels=context_channels[0], + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, x): + x_4, x_8, x_16, x_32 = self.backbone(x) + x_gap = self.gap_conv(x_32) + + x_32_arm = self.arm32(x_32) + x_32_sum = x_32_arm + x_gap + x_32_up = resize(input=x_32_sum, size=x_16.shape[2:], mode='nearest') + x_32_up = self.conv_head32(x_32_up) + + x_16_arm = self.arm16(x_16) + x_16_sum = x_16_arm + x_32_up + x_16_up = resize(input=x_16_sum, size=x_8.shape[2:], mode='nearest') + x_16_up = self.conv_head16(x_16_up) + + return x_16_up, x_32_up + + +class FeatureFusionModule(BaseModule): + """Feature Fusion Module to fuse low level output feature of Spatial Path + and high level output feature of Context Path. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + Returns: + x_out (torch.Tensor): Feature map of Feature Fusion Module. + """ + + def __init__(self, + in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.conv1 = ConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.gap = nn.AdaptiveAvgPool2d((1, 1)) + self.conv_atten = nn.Sequential( + ConvModule( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), nn.Sigmoid()) + + def forward(self, x_sp, x_cp): + x_concat = torch.cat([x_sp, x_cp], dim=1) + x_fuse = self.conv1(x_concat) + x_atten = self.gap(x_fuse) + # Note: No BN and more 1x1 conv in paper. + x_atten = self.conv_atten(x_atten) + x_atten = x_fuse * x_atten + x_out = x_atten + x_fuse + return x_out + + +@MODELS.register_module() +class BiSeNetV1(BaseModule): + """BiSeNetV1 backbone. + + This backbone is the implementation of `BiSeNet: Bilateral + Segmentation Network for Real-time Semantic + Segmentation `_. + + Args: + backbone_cfg:(dict): Config of backbone of + Context Path. + in_channels (int): The number of channels of input + image. Default: 3. + spatial_channels (Tuple[int]): Size of channel numbers of + various layers in Spatial Path. + Default: (64, 64, 64, 128). + context_channels (Tuple[int]): Size of channel numbers of + various modules in Context Path. + Default: (128, 256, 512). + out_indices (Tuple[int] | int, optional): Output from which stages. + Default: (0, 1, 2). + align_corners (bool, optional): The align_corners argument of + resize operation in Bilateral Guided Aggregation Layer. + Default: False. + out_channels(int): The number of channels of output. + It must be the same with `in_channels` of decode_head. + Default: 256. + """ + + def __init__(self, + backbone_cfg, + in_channels=3, + spatial_channels=(64, 64, 64, 128), + context_channels=(128, 256, 512), + out_indices=(0, 1, 2), + align_corners=False, + out_channels=256, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + init_cfg=None): + + super().__init__(init_cfg=init_cfg) + assert len(spatial_channels) == 4, 'Length of input channels \ + of Spatial Path must be 4!' + + assert len(context_channels) == 3, 'Length of input channels \ + of Context Path must be 3!' + + self.out_indices = out_indices + self.align_corners = align_corners + self.context_path = ContextPath(backbone_cfg, context_channels, + self.align_corners) + self.spatial_path = SpatialPath(in_channels, spatial_channels) + self.ffm = FeatureFusionModule(context_channels[1], out_channels) + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + def forward(self, x): + # stole refactoring code from Coin Cheung, thanks + x_context8, x_context16 = self.context_path(x) + x_spatial = self.spatial_path(x) + x_fuse = self.ffm(x_spatial, x_context8) + + outs = [x_fuse, x_context8, x_context16] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/mmseg/models/backbones/bisenetv2.py b/mmseg/models/backbones/bisenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..32aa49822f7d0c3bd4839b3796a15689e1f4cbc0 --- /dev/null +++ b/mmseg/models/backbones/bisenetv2.py @@ -0,0 +1,622 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, + build_activation_layer, build_norm_layer) +from mmengine.model import BaseModule + +from mmseg.registry import MODELS +from ..utils import resize + + +class DetailBranch(BaseModule): + """Detail Branch with wide channels and shallow layers to capture low-level + details and generate high-resolution feature representation. + + Args: + detail_channels (Tuple[int]): Size of channel numbers of each stage + in Detail Branch, in paper it has 3 stages. + Default: (64, 64, 128). + in_channels (int): Number of channels of input image. Default: 3. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + x (torch.Tensor): Feature map of Detail Branch. + """ + + def __init__(self, + detail_channels=(64, 64, 128), + in_channels=3, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + detail_branch = [] + for i in range(len(detail_channels)): + if i == 0: + detail_branch.append( + nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=detail_channels[i], + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=detail_channels[i], + out_channels=detail_channels[i], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg))) + else: + detail_branch.append( + nn.Sequential( + ConvModule( + in_channels=detail_channels[i - 1], + out_channels=detail_channels[i], + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=detail_channels[i], + out_channels=detail_channels[i], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=detail_channels[i], + out_channels=detail_channels[i], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg))) + self.detail_branch = nn.ModuleList(detail_branch) + + def forward(self, x): + for stage in self.detail_branch: + x = stage(x) + return x + + +class StemBlock(BaseModule): + """Stem Block at the beginning of Semantic Branch. + + Args: + in_channels (int): Number of input channels. + Default: 3. + out_channels (int): Number of output channels. + Default: 16. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + x (torch.Tensor): First feature map in Semantic Branch. + """ + + def __init__(self, + in_channels=3, + out_channels=16, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + self.conv_first = ConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.convs = nn.Sequential( + ConvModule( + in_channels=out_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=out_channels // 2, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.pool = nn.MaxPool2d( + kernel_size=3, stride=2, padding=1, ceil_mode=False) + self.fuse_last = ConvModule( + in_channels=out_channels * 2, + out_channels=out_channels, + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x): + x = self.conv_first(x) + x_left = self.convs(x) + x_right = self.pool(x) + x = self.fuse_last(torch.cat([x_left, x_right], dim=1)) + return x + + +class GELayer(BaseModule): + """Gather-and-Expansion Layer. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + exp_ratio (int): Expansion ratio for middle channels. + Default: 6. + stride (int): Stride of GELayer. Default: 1 + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + x (torch.Tensor): Intermediate feature map in + Semantic Branch. + """ + + def __init__(self, + in_channels, + out_channels, + exp_ratio=6, + stride=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + mid_channel = in_channels * exp_ratio + self.conv1 = ConvModule( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if stride == 1: + self.dwconv = nn.Sequential( + # ReLU in ConvModule not shown in paper + ConvModule( + in_channels=in_channels, + out_channels=mid_channel, + kernel_size=3, + stride=stride, + padding=1, + groups=in_channels, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.shortcut = None + else: + self.dwconv = nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=mid_channel, + kernel_size=3, + stride=stride, + padding=1, + groups=in_channels, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None), + # ReLU in ConvModule not shown in paper + ConvModule( + in_channels=mid_channel, + out_channels=mid_channel, + kernel_size=3, + stride=1, + padding=1, + groups=mid_channel, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ) + self.shortcut = nn.Sequential( + DepthwiseSeparableConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + padding=1, + dw_norm_cfg=norm_cfg, + dw_act_cfg=None, + pw_norm_cfg=norm_cfg, + pw_act_cfg=None, + )) + + self.conv2 = nn.Sequential( + ConvModule( + in_channels=mid_channel, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None, + )) + + self.act = build_activation_layer(act_cfg) + + def forward(self, x): + identity = x + x = self.conv1(x) + x = self.dwconv(x) + x = self.conv2(x) + if self.shortcut is not None: + shortcut = self.shortcut(identity) + x = x + shortcut + else: + x = x + identity + x = self.act(x) + return x + + +class CEBlock(BaseModule): + """Context Embedding Block for large receptive filed in Semantic Branch. + + Args: + in_channels (int): Number of input channels. + Default: 3. + out_channels (int): Number of output channels. + Default: 16. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + x (torch.Tensor): Last feature map in Semantic Branch. + """ + + def __init__(self, + in_channels=3, + out_channels=16, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.in_channels = in_channels + self.out_channels = out_channels + self.gap = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + build_norm_layer(norm_cfg, self.in_channels)[1]) + self.conv_gap = ConvModule( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + # Note: in paper here is naive conv2d, no bn-relu + self.conv_last = ConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x): + identity = x + x = self.gap(x) + x = self.conv_gap(x) + x = identity + x + x = self.conv_last(x) + return x + + +class SemanticBranch(BaseModule): + """Semantic Branch which is lightweight with narrow channels and deep + layers to obtain high-level semantic context. + + Args: + semantic_channels(Tuple[int]): Size of channel numbers of + various stages in Semantic Branch. + Default: (16, 32, 64, 128). + in_channels (int): Number of channels of input image. Default: 3. + exp_ratio (int): Expansion ratio for middle channels. + Default: 6. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + semantic_outs (List[torch.Tensor]): List of several feature maps + for auxiliary heads (Booster) and Bilateral + Guided Aggregation Layer. + """ + + def __init__(self, + semantic_channels=(16, 32, 64, 128), + in_channels=3, + exp_ratio=6, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.in_channels = in_channels + self.semantic_channels = semantic_channels + self.semantic_stages = [] + for i in range(len(semantic_channels)): + stage_name = f'stage{i + 1}' + self.semantic_stages.append(stage_name) + if i == 0: + self.add_module( + stage_name, + StemBlock(self.in_channels, semantic_channels[i])) + elif i == (len(semantic_channels) - 1): + self.add_module( + stage_name, + nn.Sequential( + GELayer(semantic_channels[i - 1], semantic_channels[i], + exp_ratio, 2), + GELayer(semantic_channels[i], semantic_channels[i], + exp_ratio, 1), + GELayer(semantic_channels[i], semantic_channels[i], + exp_ratio, 1), + GELayer(semantic_channels[i], semantic_channels[i], + exp_ratio, 1))) + else: + self.add_module( + stage_name, + nn.Sequential( + GELayer(semantic_channels[i - 1], semantic_channels[i], + exp_ratio, 2), + GELayer(semantic_channels[i], semantic_channels[i], + exp_ratio, 1))) + + self.add_module(f'stage{len(semantic_channels)}_CEBlock', + CEBlock(semantic_channels[-1], semantic_channels[-1])) + self.semantic_stages.append(f'stage{len(semantic_channels)}_CEBlock') + + def forward(self, x): + semantic_outs = [] + for stage_name in self.semantic_stages: + semantic_stage = getattr(self, stage_name) + x = semantic_stage(x) + semantic_outs.append(x) + return semantic_outs + + +class BGALayer(BaseModule): + """Bilateral Guided Aggregation Layer to fuse the complementary information + from both Detail Branch and Semantic Branch. + + Args: + out_channels (int): Number of output channels. + Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + output (torch.Tensor): Output feature map for Segment heads. + """ + + def __init__(self, + out_channels=128, + align_corners=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.out_channels = out_channels + self.align_corners = align_corners + self.detail_dwconv = nn.Sequential( + DepthwiseSeparableConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + dw_norm_cfg=norm_cfg, + dw_act_cfg=None, + pw_norm_cfg=None, + pw_act_cfg=None, + )) + self.detail_down = nn.Sequential( + ConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None), + nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)) + self.semantic_conv = nn.Sequential( + ConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None)) + self.semantic_dwconv = nn.Sequential( + DepthwiseSeparableConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + dw_norm_cfg=norm_cfg, + dw_act_cfg=None, + pw_norm_cfg=None, + pw_act_cfg=None, + )) + self.conv = ConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + inplace=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + + def forward(self, x_d, x_s): + detail_dwconv = self.detail_dwconv(x_d) + detail_down = self.detail_down(x_d) + semantic_conv = self.semantic_conv(x_s) + semantic_dwconv = self.semantic_dwconv(x_s) + semantic_conv = resize( + input=semantic_conv, + size=detail_dwconv.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + fuse_1 = detail_dwconv * torch.sigmoid(semantic_conv) + fuse_2 = detail_down * torch.sigmoid(semantic_dwconv) + fuse_2 = resize( + input=fuse_2, + size=fuse_1.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + output = self.conv(fuse_1 + fuse_2) + return output + + +@MODELS.register_module() +class BiSeNetV2(BaseModule): + """BiSeNetV2: Bilateral Network with Guided Aggregation for + Real-time Semantic Segmentation. + + This backbone is the implementation of + `BiSeNetV2 `_. + + Args: + in_channels (int): Number of channel of input image. Default: 3. + detail_channels (Tuple[int], optional): Channels of each stage + in Detail Branch. Default: (64, 64, 128). + semantic_channels (Tuple[int], optional): Channels of each stage + in Semantic Branch. Default: (16, 32, 64, 128). + See Table 1 and Figure 3 of paper for more details. + semantic_expansion_ratio (int, optional): The expansion factor + expanding channel number of middle channels in Semantic Branch. + Default: 6. + bga_channels (int, optional): Number of middle channels in + Bilateral Guided Aggregation Layer. Default: 128. + out_indices (Tuple[int] | int, optional): Output from which stages. + Default: (0, 1, 2, 3, 4). + align_corners (bool, optional): The align_corners argument of + resize operation in Bilateral Guided Aggregation Layer. + Default: False. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=3, + detail_channels=(64, 64, 128), + semantic_channels=(16, 32, 64, 128), + semantic_expansion_ratio=6, + bga_channels=128, + out_indices=(0, 1, 2, 3, 4), + align_corners=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + if init_cfg is None: + init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm']) + ] + super().__init__(init_cfg=init_cfg) + self.in_channels = in_channels + self.out_indices = out_indices + self.detail_channels = detail_channels + self.semantic_channels = semantic_channels + self.semantic_expansion_ratio = semantic_expansion_ratio + self.bga_channels = bga_channels + self.align_corners = align_corners + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + self.detail = DetailBranch(self.detail_channels, self.in_channels) + self.semantic = SemanticBranch(self.semantic_channels, + self.in_channels, + self.semantic_expansion_ratio) + self.bga = BGALayer(self.bga_channels, self.align_corners) + + def forward(self, x): + # stole refactoring code from Coin Cheung, thanks + x_detail = self.detail(x) + x_semantic_lst = self.semantic(x) + x_head = self.bga(x_detail, x_semantic_lst[-1]) + outs = [x_head] + x_semantic_lst[:-1] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/mmseg/models/backbones/cgnet.py b/mmseg/models/backbones/cgnet.py new file mode 100644 index 0000000000000000000000000000000000000000..b74b494f53466d1c608e50d088632aa952a5e534 --- /dev/null +++ b/mmseg/models/backbones/cgnet.py @@ -0,0 +1,372 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import ConvModule, build_conv_layer, build_norm_layer +from mmengine.model import BaseModule +from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm + +from mmseg.registry import MODELS + + +class GlobalContextExtractor(nn.Module): + """Global Context Extractor for CGNet. + + This class is employed to refine the joint feature of both local feature + and surrounding context. + + Args: + channel (int): Number of input feature channels. + reduction (int): Reductions for global context extractor. Default: 16. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, channel, reduction=16, with_cp=False): + super().__init__() + self.channel = channel + self.reduction = reduction + assert reduction >= 1 and channel >= reduction + self.with_cp = with_cp + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), nn.Sigmoid()) + + def forward(self, x): + + def _inner_forward(x): + num_batch, num_channel = x.size()[:2] + y = self.avg_pool(x).view(num_batch, num_channel) + y = self.fc(y).view(num_batch, num_channel, 1, 1) + return x * y + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class ContextGuidedBlock(nn.Module): + """Context Guided Block for CGNet. + + This class consists of four components: local feature extractor, + surrounding feature extractor, joint feature extractor and global + context extractor. + + Args: + in_channels (int): Number of input feature channels. + out_channels (int): Number of output feature channels. + dilation (int): Dilation rate for surrounding context extractor. + Default: 2. + reduction (int): Reduction for global context extractor. Default: 16. + skip_connect (bool): Add input to output or not. Default: True. + downsample (bool): Downsample the input to 1/2 or not. Default: False. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels, + out_channels, + dilation=2, + reduction=16, + skip_connect=True, + downsample=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + with_cp=False): + super().__init__() + self.with_cp = with_cp + self.downsample = downsample + + channels = out_channels if downsample else out_channels // 2 + if 'type' in act_cfg and act_cfg['type'] == 'PReLU': + act_cfg['num_parameters'] = channels + kernel_size = 3 if downsample else 1 + stride = 2 if downsample else 1 + padding = (kernel_size - 1) // 2 + + self.conv1x1 = ConvModule( + in_channels, + channels, + kernel_size, + stride, + padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.f_loc = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=1, + groups=channels, + bias=False) + self.f_sur = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=dilation, + groups=channels, + dilation=dilation, + bias=False) + + self.bn = build_norm_layer(norm_cfg, 2 * channels)[1] + self.activate = nn.PReLU(2 * channels) + + if downsample: + self.bottleneck = build_conv_layer( + conv_cfg, + 2 * channels, + out_channels, + kernel_size=1, + bias=False) + + self.skip_connect = skip_connect and not downsample + self.f_glo = GlobalContextExtractor(out_channels, reduction, with_cp) + + def forward(self, x): + + def _inner_forward(x): + out = self.conv1x1(x) + loc = self.f_loc(out) + sur = self.f_sur(out) + + joi_feat = torch.cat([loc, sur], 1) # the joint feature + joi_feat = self.bn(joi_feat) + joi_feat = self.activate(joi_feat) + if self.downsample: + joi_feat = self.bottleneck(joi_feat) # channel = out_channels + # f_glo is employed to refine the joint feature + out = self.f_glo(joi_feat) + + if self.skip_connect: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InputInjection(nn.Module): + """Downsampling module for CGNet.""" + + def __init__(self, num_downsampling): + super().__init__() + self.pool = nn.ModuleList() + for i in range(num_downsampling): + self.pool.append(nn.AvgPool2d(3, stride=2, padding=1)) + + def forward(self, x): + for pool in self.pool: + x = pool(x) + return x + + +@MODELS.register_module() +class CGNet(BaseModule): + """CGNet backbone. + + This backbone is the implementation of `A Light-weight Context Guided + Network for Semantic Segmentation `_. + + Args: + in_channels (int): Number of input image channels. Normally 3. + num_channels (tuple[int]): Numbers of feature channels at each stages. + Default: (32, 64, 128). + num_blocks (tuple[int]): Numbers of CG blocks at stage 1 and stage 2. + Default: (3, 21). + dilations (tuple[int]): Dilation rate for surrounding context + extractors at stage 1 and stage 2. Default: (2, 4). + reductions (tuple[int]): Reductions for global context extractors at + stage 1 and stage 2. Default: (8, 16). + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + pretrained (str, optional): model pretrained path. Default: None + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + """ + + def __init__(self, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + norm_eval=False, + with_cp=False, + pretrained=None, + init_cfg=None): + + super().__init__(init_cfg) + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer=['Conv2d', 'Linear']), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']), + dict(type='Constant', val=0, layer='PReLU') + ] + else: + raise TypeError('pretrained must be a str or None') + + self.in_channels = in_channels + self.num_channels = num_channels + assert isinstance(self.num_channels, tuple) and len( + self.num_channels) == 3 + self.num_blocks = num_blocks + assert isinstance(self.num_blocks, tuple) and len(self.num_blocks) == 2 + self.dilations = dilations + assert isinstance(self.dilations, tuple) and len(self.dilations) == 2 + self.reductions = reductions + assert isinstance(self.reductions, tuple) and len(self.reductions) == 2 + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + if 'type' in self.act_cfg and self.act_cfg['type'] == 'PReLU': + self.act_cfg['num_parameters'] = num_channels[0] + self.norm_eval = norm_eval + self.with_cp = with_cp + + cur_channels = in_channels + self.stem = nn.ModuleList() + for i in range(3): + self.stem.append( + ConvModule( + cur_channels, + num_channels[0], + 3, + 2 if i == 0 else 1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + cur_channels = num_channels[0] + + self.inject_2x = InputInjection(1) # down-sample for Input, factor=2 + self.inject_4x = InputInjection(2) # down-sample for Input, factor=4 + + cur_channels += in_channels + self.norm_prelu_0 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 1 + self.level1 = nn.ModuleList() + for i in range(num_blocks[0]): + self.level1.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[1], + num_channels[1], + dilations[0], + reductions[0], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[1] + in_channels + self.norm_prelu_1 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 2 + self.level2 = nn.ModuleList() + for i in range(num_blocks[1]): + self.level2.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[2], + num_channels[2], + dilations[1], + reductions[1], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[2] + self.norm_prelu_2 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + def forward(self, x): + output = [] + + # stage 0 + inp_2x = self.inject_2x(x) + inp_4x = self.inject_4x(x) + for layer in self.stem: + x = layer(x) + x = self.norm_prelu_0(torch.cat([x, inp_2x], 1)) + output.append(x) + + # stage 1 + for i, layer in enumerate(self.level1): + x = layer(x) + if i == 0: + down1 = x + x = self.norm_prelu_1(torch.cat([x, down1, inp_4x], 1)) + output.append(x) + + # stage 2 + for i, layer in enumerate(self.level2): + x = layer(x) + if i == 0: + down2 = x + x = self.norm_prelu_2(torch.cat([down2, x], 1)) + output.append(x) + + return output + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super().train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/mmseg/models/backbones/ddrnet.py b/mmseg/models/backbones/ddrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..4508aade82b484abfcca593825649031db7cbdd0 --- /dev/null +++ b/mmseg/models/backbones/ddrnet.py @@ -0,0 +1,222 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import ConvModule, build_norm_layer +from mmengine.model import BaseModule + +from mmseg.models.utils import DAPPM, BasicBlock, Bottleneck, resize +from mmseg.registry import MODELS +from mmseg.utils import OptConfigType + + +@MODELS.register_module() +class DDRNet(BaseModule): + """DDRNet backbone. + + This backbone is the implementation of `Deep Dual-resolution Networks for + Real-time and Accurate Semantic Segmentation of Road Scenes + `_. + Modified from https://github.com/ydhongHIT/DDRNet. + + Args: + in_channels (int): Number of input image channels. Default: 3. + channels: (int): The base channels of DDRNet. Default: 32. + ppm_channels (int): The channels of PPM module. Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict to build norm layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict, optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 32, + ppm_channels: int = 128, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN', requires_grad=True), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.ppm_channels = ppm_channels + + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stage 0-2 + self.stem = self._make_stem_layer(in_channels, channels, num_blocks=2) + self.relu = nn.ReLU() + + # low resolution(context) branch + self.context_branch_layers = nn.ModuleList() + for i in range(3): + self.context_branch_layers.append( + self._make_layer( + block=BasicBlock if i < 2 else Bottleneck, + inplanes=channels * 2**(i + 1), + planes=channels * 8 if i > 0 else channels * 4, + num_blocks=2 if i < 2 else 1, + stride=2)) + + # bilateral fusion + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.down_1 = ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.down_2 = nn.Sequential( + ConvModule( + channels * 2, + channels * 4, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels * 4, + channels * 8, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=None)) + + # high resolution(spatial) branch + self.spatial_branch_layers = nn.ModuleList() + for i in range(3): + self.spatial_branch_layers.append( + self._make_layer( + block=BasicBlock if i < 2 else Bottleneck, + inplanes=channels * 2, + planes=channels * 2, + num_blocks=2 if i < 2 else 1, + )) + + self.spp = DAPPM( + channels * 16, ppm_channels, channels * 4, num_scales=5) + + def _make_stem_layer(self, in_channels, channels, num_blocks): + layers = [ + ConvModule( + in_channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + ] + + layers.extend([ + self._make_layer(BasicBlock, channels, channels, num_blocks), + nn.ReLU(), + self._make_layer( + BasicBlock, channels, channels * 2, num_blocks, stride=2), + nn.ReLU(), + ]) + + return nn.Sequential(*layers) + + def _make_layer(self, block, inplanes, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [ + block( + in_channels=inplanes, + channels=planes, + stride=stride, + downsample=downsample) + ] + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels=inplanes, + channels=planes, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + + return nn.Sequential(*layers) + + def forward(self, x): + """Forward function.""" + out_size = (x.shape[-2] // 8, x.shape[-1] // 8) + + # stage 0-2 + x = self.stem(x) + + # stage3 + x_c = self.context_branch_layers[0](x) + x_s = self.spatial_branch_layers[0](x) + comp_c = self.compression_1(self.relu(x_c)) + x_c += self.down_1(self.relu(x_s)) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + if self.training: + temp_context = x_s.clone() + + # stage4 + x_c = self.context_branch_layers[1](self.relu(x_c)) + x_s = self.spatial_branch_layers[1](self.relu(x_s)) + comp_c = self.compression_2(self.relu(x_c)) + x_c += self.down_2(self.relu(x_s)) + x_s += resize( + comp_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + # stage5 + x_s = self.spatial_branch_layers[2](self.relu(x_s)) + x_c = self.context_branch_layers[2](self.relu(x_c)) + x_c = self.spp(x_c) + x_c = resize( + x_c, + size=out_size, + mode='bilinear', + align_corners=self.align_corners) + + return (temp_context, x_s + x_c) if self.training else x_s + x_c diff --git a/mmseg/models/backbones/erfnet.py b/mmseg/models/backbones/erfnet.py new file mode 100644 index 0000000000000000000000000000000000000000..2c5ec672a086b5d67568514140023ce402eef92f --- /dev/null +++ b/mmseg/models/backbones/erfnet.py @@ -0,0 +1,329 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import build_activation_layer, build_conv_layer, build_norm_layer +from mmengine.model import BaseModule + +from mmseg.registry import MODELS +from ..utils import resize + + +class DownsamplerBlock(BaseModule): + """Downsampler block of ERFNet. + + This module is a little different from basical ConvModule. + The features from Conv and MaxPool layers are + concatenated before BatchNorm. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN', eps=1e-3), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + self.conv = build_conv_layer( + self.conv_cfg, + in_channels, + out_channels - in_channels, + kernel_size=3, + stride=2, + padding=1) + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + self.bn = build_norm_layer(self.norm_cfg, out_channels)[1] + self.act = build_activation_layer(self.act_cfg) + + def forward(self, input): + conv_out = self.conv(input) + pool_out = self.pool(input) + pool_out = resize( + input=pool_out, + size=conv_out.size()[2:], + mode='bilinear', + align_corners=False) + output = torch.cat([conv_out, pool_out], 1) + output = self.bn(output) + output = self.act(output) + return output + + +class NonBottleneck1d(BaseModule): + """Non-bottleneck block of ERFNet. + + Args: + channels (int): Number of channels in Non-bottleneck block. + drop_rate (float): Probability of an element to be zeroed. + Default 0. + dilation (int): Dilation rate for last two conv layers. + Default 1. + num_conv_layer (int): Number of 3x1 and 1x3 convolution layers. + Default 2. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + channels, + drop_rate=0, + dilation=1, + num_conv_layer=2, + conv_cfg=None, + norm_cfg=dict(type='BN', eps=1e-3), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.act = build_activation_layer(self.act_cfg) + + self.convs_layers = nn.ModuleList() + for conv_layer in range(num_conv_layer): + first_conv_padding = (1, 0) if conv_layer == 0 else (dilation, 0) + first_conv_dilation = 1 if conv_layer == 0 else (dilation, 1) + second_conv_padding = (0, 1) if conv_layer == 0 else (0, dilation) + second_conv_dilation = 1 if conv_layer == 0 else (1, dilation) + + self.convs_layers.append( + build_conv_layer( + self.conv_cfg, + channels, + channels, + kernel_size=(3, 1), + stride=1, + padding=first_conv_padding, + bias=True, + dilation=first_conv_dilation)) + self.convs_layers.append(self.act) + self.convs_layers.append( + build_conv_layer( + self.conv_cfg, + channels, + channels, + kernel_size=(1, 3), + stride=1, + padding=second_conv_padding, + bias=True, + dilation=second_conv_dilation)) + self.convs_layers.append( + build_norm_layer(self.norm_cfg, channels)[1]) + if conv_layer == 0: + self.convs_layers.append(self.act) + else: + self.convs_layers.append(nn.Dropout(p=drop_rate)) + + def forward(self, input): + output = input + for conv in self.convs_layers: + output = conv(output) + output = self.act(output + input) + return output + + +class UpsamplerBlock(BaseModule): + """Upsampler block of ERFNet. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN', eps=1e-3), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + self.conv = nn.ConvTranspose2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1, + output_padding=1, + bias=True) + self.bn = build_norm_layer(self.norm_cfg, out_channels)[1] + self.act = build_activation_layer(self.act_cfg) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + output = self.act(output) + return output + + +@MODELS.register_module() +class ERFNet(BaseModule): + """ERFNet backbone. + + This backbone is the implementation of `ERFNet: Efficient Residual + Factorized ConvNet for Real-time SemanticSegmentation + `_. + + Args: + in_channels (int): The number of channels of input + image. Default: 3. + enc_downsample_channels (Tuple[int]): Size of channel + numbers of various Downsampler block in encoder. + Default: (16, 64, 128). + enc_stage_non_bottlenecks (Tuple[int]): Number of stages of + Non-bottleneck block in encoder. + Default: (5, 8). + enc_non_bottleneck_dilations (Tuple[int]): Dilation rate of each + stage of Non-bottleneck block of encoder. + Default: (2, 4, 8, 16). + enc_non_bottleneck_channels (Tuple[int]): Size of channel + numbers of various Non-bottleneck block in encoder. + Default: (64, 128). + dec_upsample_channels (Tuple[int]): Size of channel numbers of + various Deconvolution block in decoder. + Default: (64, 16). + dec_stages_non_bottleneck (Tuple[int]): Number of stages of + Non-bottleneck block in decoder. + Default: (2, 2). + dec_non_bottleneck_channels (Tuple[int]): Size of channel + numbers of various Non-bottleneck block in decoder. + Default: (64, 16). + drop_rate (float): Probability of an element to be zeroed. + Default 0.1. + """ + + def __init__(self, + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + init_cfg=None): + + super().__init__(init_cfg=init_cfg) + assert len(enc_downsample_channels) \ + == len(dec_upsample_channels)+1, 'Number of downsample\ + block of encoder does not \ + match number of upsample block of decoder!' + assert len(enc_downsample_channels) \ + == len(enc_stage_non_bottlenecks)+1, 'Number of \ + downsample block of encoder does not match \ + number of Non-bottleneck block of encoder!' + assert len(enc_downsample_channels) \ + == len(enc_non_bottleneck_channels)+1, 'Number of \ + downsample block of encoder does not match \ + number of channels of Non-bottleneck block of encoder!' + assert enc_stage_non_bottlenecks[-1] \ + % len(enc_non_bottleneck_dilations) == 0, 'Number of \ + Non-bottleneck block of encoder does not match \ + number of Non-bottleneck block of encoder!' + assert len(dec_upsample_channels) \ + == len(dec_stages_non_bottleneck), 'Number of \ + upsample block of decoder does not match \ + number of Non-bottleneck block of decoder!' + assert len(dec_stages_non_bottleneck) \ + == len(dec_non_bottleneck_channels), 'Number of \ + Non-bottleneck block of decoder does not match \ + number of channels of Non-bottleneck block of decoder!' + + self.in_channels = in_channels + self.enc_downsample_channels = enc_downsample_channels + self.enc_stage_non_bottlenecks = enc_stage_non_bottlenecks + self.enc_non_bottleneck_dilations = enc_non_bottleneck_dilations + self.enc_non_bottleneck_channels = enc_non_bottleneck_channels + self.dec_upsample_channels = dec_upsample_channels + self.dec_stages_non_bottleneck = dec_stages_non_bottleneck + self.dec_non_bottleneck_channels = dec_non_bottleneck_channels + self.dropout_ratio = dropout_ratio + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + self.encoder.append( + DownsamplerBlock(self.in_channels, enc_downsample_channels[0])) + + for i in range(len(enc_downsample_channels) - 1): + self.encoder.append( + DownsamplerBlock(enc_downsample_channels[i], + enc_downsample_channels[i + 1])) + # Last part of encoder is some dilated NonBottleneck1d blocks. + if i == len(enc_downsample_channels) - 2: + iteration_times = int(enc_stage_non_bottlenecks[-1] / + len(enc_non_bottleneck_dilations)) + for j in range(iteration_times): + for k in range(len(enc_non_bottleneck_dilations)): + self.encoder.append( + NonBottleneck1d(enc_downsample_channels[-1], + self.dropout_ratio, + enc_non_bottleneck_dilations[k])) + else: + for j in range(enc_stage_non_bottlenecks[i]): + self.encoder.append( + NonBottleneck1d(enc_downsample_channels[i + 1], + self.dropout_ratio)) + + for i in range(len(dec_upsample_channels)): + if i == 0: + self.decoder.append( + UpsamplerBlock(enc_downsample_channels[-1], + dec_non_bottleneck_channels[i])) + else: + self.decoder.append( + UpsamplerBlock(dec_non_bottleneck_channels[i - 1], + dec_non_bottleneck_channels[i])) + for j in range(dec_stages_non_bottleneck[i]): + self.decoder.append( + NonBottleneck1d(dec_non_bottleneck_channels[i])) + + def forward(self, x): + for enc in self.encoder: + x = enc(x) + for dec in self.decoder: + x = dec(x) + return [x] diff --git a/mmseg/models/backbones/fast_scnn.py b/mmseg/models/backbones/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..6ff7a3191d2fee904c5200e0a526214a65f58b32 --- /dev/null +++ b/mmseg/models/backbones/fast_scnn.py @@ -0,0 +1,408 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule +from mmengine.model import BaseModule + +from mmseg.models.decode_heads.psp_head import PPM +from mmseg.registry import MODELS +from ..utils import InvertedResidual, resize + + +class LearningToDownsample(nn.Module): + """Learning to downsample module. + + Args: + in_channels (int): Number of input channels. + dw_channels (tuple[int]): Number of output channels of the first and + the second depthwise conv (dwconv) layers. + out_channels (int): Number of output channels of the whole + 'learning to downsample' module. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + dw_act_cfg (dict): In DepthwiseSeparableConvModule, activation config + of depthwise ConvModule. If it is 'default', it will be the same + as `act_cfg`. Default: None. + """ + + def __init__(self, + in_channels, + dw_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + dw_act_cfg=None): + super().__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.dw_act_cfg = dw_act_cfg + dw_channels1 = dw_channels[0] + dw_channels2 = dw_channels[1] + + self.conv = ConvModule( + in_channels, + dw_channels1, + 3, + stride=2, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.dsconv1 = DepthwiseSeparableConvModule( + dw_channels1, + dw_channels2, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + dw_act_cfg=self.dw_act_cfg) + + self.dsconv2 = DepthwiseSeparableConvModule( + dw_channels2, + out_channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + dw_act_cfg=self.dw_act_cfg) + + def forward(self, x): + x = self.conv(x) + x = self.dsconv1(x) + x = self.dsconv2(x) + return x + + +class GlobalFeatureExtractor(nn.Module): + """Global feature extractor module. + + Args: + in_channels (int): Number of input channels of the GFE module. + Default: 64 + block_channels (tuple[int]): Tuple of ints. Each int specifies the + number of output channels of each Inverted Residual module. + Default: (64, 96, 128) + out_channels(int): Number of output channels of the GFE module. + Default: 128 + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + Default: 6 + num_blocks (tuple[int]): Tuple of ints. Each int specifies the + number of times each Inverted Residual module is repeated. + The repeated Inverted Residual modules are called a 'group'. + Default: (3, 3, 3) + strides (tuple[int]): Tuple of ints. Each int specifies + the downsampling factor of each 'group'. + Default: (2, 2, 1) + pool_scales (tuple[int]): Tuple of ints. Each int specifies + the parameter required in 'global average pooling' within PPM. + Default: (1, 2, 3, 6) + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + expand_ratio=6, + num_blocks=(3, 3, 3), + strides=(2, 2, 1), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + super().__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + assert len(block_channels) == len(num_blocks) == 3 + self.bottleneck1 = self._make_layer(in_channels, block_channels[0], + num_blocks[0], strides[0], + expand_ratio) + self.bottleneck2 = self._make_layer(block_channels[0], + block_channels[1], num_blocks[1], + strides[1], expand_ratio) + self.bottleneck3 = self._make_layer(block_channels[1], + block_channels[2], num_blocks[2], + strides[2], expand_ratio) + self.ppm = PPM( + pool_scales, + block_channels[2], + block_channels[2] // 4, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=align_corners) + + self.out = ConvModule( + block_channels[2] * 2, + out_channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _make_layer(self, + in_channels, + out_channels, + blocks, + stride=1, + expand_ratio=6): + layers = [ + InvertedResidual( + in_channels, + out_channels, + stride, + expand_ratio, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + ] + for i in range(1, blocks): + layers.append( + InvertedResidual( + out_channels, + out_channels, + 1, + expand_ratio, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = torch.cat([x, *self.ppm(x)], dim=1) + x = self.out(x) + return x + + +class FeatureFusionModule(nn.Module): + """Feature fusion module. + + Args: + higher_in_channels (int): Number of input channels of the + higher-resolution branch. + lower_in_channels (int): Number of input channels of the + lower-resolution branch. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + dwconv_act_cfg (dict): Config of activation layers in 3x3 conv. + Default: dict(type='ReLU'). + conv_act_cfg (dict): Config of activation layers in the two 1x1 conv. + Default: None. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + """ + + def __init__(self, + higher_in_channels, + lower_in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dwconv_act_cfg=dict(type='ReLU'), + conv_act_cfg=None, + align_corners=False): + super().__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.dwconv_act_cfg = dwconv_act_cfg + self.conv_act_cfg = conv_act_cfg + self.align_corners = align_corners + self.dwconv = ConvModule( + lower_in_channels, + out_channels, + 3, + padding=1, + groups=out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.dwconv_act_cfg) + self.conv_lower_res = ConvModule( + out_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.conv_act_cfg) + + self.conv_higher_res = ConvModule( + higher_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.conv_act_cfg) + + self.relu = nn.ReLU(True) + + def forward(self, higher_res_feature, lower_res_feature): + lower_res_feature = resize( + lower_res_feature, + size=higher_res_feature.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + lower_res_feature = self.dwconv(lower_res_feature) + lower_res_feature = self.conv_lower_res(lower_res_feature) + + higher_res_feature = self.conv_higher_res(higher_res_feature) + out = higher_res_feature + lower_res_feature + return self.relu(out) + + +@MODELS.register_module() +class FastSCNN(BaseModule): + """Fast-SCNN Backbone. + + This backbone is the implementation of `Fast-SCNN: Fast Semantic + Segmentation Network `_. + + Args: + in_channels (int): Number of input image channels. Default: 3. + downsample_dw_channels (tuple[int]): Number of output channels after + the first conv layer & the second conv layer in + Learning-To-Downsample (LTD) module. + Default: (32, 48). + global_in_channels (int): Number of input channels of + Global Feature Extractor(GFE). + Equal to number of output channels of LTD. + Default: 64. + global_block_channels (tuple[int]): Tuple of integers that describe + the output channels for each of the MobileNet-v2 bottleneck + residual blocks in GFE. + Default: (64, 96, 128). + global_block_strides (tuple[int]): Tuple of integers + that describe the strides (downsampling factors) for each of the + MobileNet-v2 bottleneck residual blocks in GFE. + Default: (2, 2, 1). + global_out_channels (int): Number of output channels of GFE. + Default: 128. + higher_in_channels (int): Number of input channels of the higher + resolution branch in FFM. + Equal to global_in_channels. + Default: 64. + lower_in_channels (int): Number of input channels of the lower + resolution branch in FFM. + Equal to global_out_channels. + Default: 128. + fusion_out_channels (int): Number of output channels of FFM. + Default: 128. + out_indices (tuple): Tuple of indices of list + [higher_res_features, lower_res_features, fusion_output]. + Often set to (0,1,2) to enable aux. heads. + Default: (0, 1, 2). + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + dw_act_cfg (dict): In DepthwiseSeparableConvModule, activation config + of depthwise ConvModule. If it is 'default', it will be the same + as `act_cfg`. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + """ + + def __init__(self, + in_channels=3, + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + dw_act_cfg=None, + init_cfg=None): + + super().__init__(init_cfg) + + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm']) + ] + + if global_in_channels != higher_in_channels: + raise AssertionError('Global Input Channels must be the same \ + with Higher Input Channels!') + elif global_out_channels != lower_in_channels: + raise AssertionError('Global Output Channels must be the same \ + with Lower Input Channels!') + + self.in_channels = in_channels + self.downsample_dw_channels1 = downsample_dw_channels[0] + self.downsample_dw_channels2 = downsample_dw_channels[1] + self.global_in_channels = global_in_channels + self.global_block_channels = global_block_channels + self.global_block_strides = global_block_strides + self.global_out_channels = global_out_channels + self.higher_in_channels = higher_in_channels + self.lower_in_channels = lower_in_channels + self.fusion_out_channels = fusion_out_channels + self.out_indices = out_indices + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.learning_to_downsample = LearningToDownsample( + in_channels, + downsample_dw_channels, + global_in_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + dw_act_cfg=dw_act_cfg) + self.global_feature_extractor = GlobalFeatureExtractor( + global_in_channels, + global_block_channels, + global_out_channels, + strides=self.global_block_strides, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.feature_fusion = FeatureFusionModule( + higher_in_channels, + lower_in_channels, + fusion_out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + dwconv_act_cfg=self.act_cfg, + align_corners=self.align_corners) + + def forward(self, x): + higher_res_features = self.learning_to_downsample(x) + lower_res_features = self.global_feature_extractor(higher_res_features) + fusion_output = self.feature_fusion(higher_res_features, + lower_res_features) + + outs = [higher_res_features, lower_res_features, fusion_output] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/mmseg/models/backbones/hrnet.py b/mmseg/models/backbones/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..2da755e731cfea911d47729f455c54c3d38a68e4 --- /dev/null +++ b/mmseg/models/backbones/hrnet.py @@ -0,0 +1,642 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +from mmcv.cnn import build_conv_layer, build_norm_layer +from mmengine.model import BaseModule, ModuleList, Sequential +from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm + +from mmseg.registry import MODELS +from ..utils import Upsample, resize +from .resnet import BasicBlock, Bottleneck + + +class HRModule(BaseModule): + """High-Resolution Module for HRNet. + + In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange + is in this module. + """ + + def __init__(self, + num_branches, + blocks, + num_blocks, + in_channels, + num_channels, + multiscale_output=True, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + block_init_cfg=None, + init_cfg=None): + super().__init__(init_cfg) + self.block_init_cfg = block_init_cfg + self._check_branches(num_branches, num_blocks, in_channels, + num_channels) + + self.in_channels = in_channels + self.num_branches = num_branches + + self.multiscale_output = multiscale_output + self.norm_cfg = norm_cfg + self.conv_cfg = conv_cfg + self.with_cp = with_cp + self.branches = self._make_branches(num_branches, blocks, num_blocks, + num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(inplace=False) + + def _check_branches(self, num_branches, num_blocks, in_channels, + num_channels): + """Check branches configuration.""" + if num_branches != len(num_blocks): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' \ + f'{len(num_blocks)})' + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' \ + f'{len(num_channels)})' + raise ValueError(error_msg) + + if num_branches != len(in_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' \ + f'{len(in_channels)})' + raise ValueError(error_msg) + + def _make_one_branch(self, + branch_index, + block, + num_blocks, + num_channels, + stride=1): + """Build one branch.""" + downsample = None + if stride != 1 or \ + self.in_channels[branch_index] != \ + num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + self.in_channels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, num_channels[branch_index] * + block.expansion)[1]) + + layers = [] + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + init_cfg=self.block_init_cfg)) + self.in_channels[branch_index] = \ + num_channels[branch_index] * block.expansion + for i in range(1, num_blocks[branch_index]): + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + init_cfg=self.block_init_cfg)) + + return Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + """Build multiple branch.""" + branches = [] + + for i in range(num_branches): + branches.append( + self._make_one_branch(i, block, num_blocks, num_channels)) + + return ModuleList(branches) + + def _make_fuse_layers(self): + """Build fuse layer.""" + if self.num_branches == 1: + return None + + num_branches = self.num_branches + in_channels = self.in_channels + fuse_layers = [] + num_out_branches = num_branches if self.multiscale_output else 1 + for i in range(num_out_branches): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=1, + stride=1, + padding=0, + bias=False), + build_norm_layer(self.norm_cfg, in_channels[i])[1], + # we set align_corners=False for HRNet + Upsample( + scale_factor=2**(j - i), + mode='bilinear', + align_corners=False))) + elif j == i: + fuse_layer.append(None) + else: + conv_downsamples = [] + for k in range(i - j): + if k == i - j - 1: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[i])[1])) + else: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[j], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[j])[1], + nn.ReLU(inplace=False))) + fuse_layer.append(nn.Sequential(*conv_downsamples)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def forward(self, x): + """Forward function.""" + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + for i in range(len(self.fuse_layers)): + y = 0 + for j in range(self.num_branches): + if i == j: + y += x[j] + elif j > i: + y = y + resize( + self.fuse_layers[i][j](x[j]), + size=x[i].shape[2:], + mode='bilinear', + align_corners=False) + else: + y += self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + return x_fuse + + +@MODELS.register_module() +class HRNet(BaseModule): + """HRNet backbone. + + This backbone is the implementation of `High-Resolution Representations + for Labeling Pixels and Regions `_. + + Args: + extra (dict): Detailed configuration for each stage of HRNet. + There must be 4 stages, the configuration for each stage must have + 5 keys: + + - num_modules (int): The number of HRModule in this stage. + - num_branches (int): The number of branches in the HRModule. + - block (str): The type of convolution block. + - num_blocks (tuple): The number of blocks in each branch. + The length must be equal to num_branches. + - num_channels (tuple): The number of channels in each branch. + The length must be equal to num_branches. + in_channels (int): Number of input image channels. Normally 3. + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Use `BN` by default. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. Default: -1. + zero_init_residual (bool): Whether to use zero init for last norm layer + in resblocks to let them behave as identity. Default: False. + multiscale_output (bool): Whether to output multi-level features + produced by multiple branches. If False, only the first level + feature will be output. Default: True. + pretrained (str, optional): Model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Example: + >>> from mmseg.models import HRNet + >>> import torch + >>> extra = dict( + >>> stage1=dict( + >>> num_modules=1, + >>> num_branches=1, + >>> block='BOTTLENECK', + >>> num_blocks=(4, ), + >>> num_channels=(64, )), + >>> stage2=dict( + >>> num_modules=1, + >>> num_branches=2, + >>> block='BASIC', + >>> num_blocks=(4, 4), + >>> num_channels=(32, 64)), + >>> stage3=dict( + >>> num_modules=4, + >>> num_branches=3, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4), + >>> num_channels=(32, 64, 128)), + >>> stage4=dict( + >>> num_modules=3, + >>> num_branches=4, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4, 4), + >>> num_channels=(32, 64, 128, 256))) + >>> self = HRNet(extra, in_channels=1) + >>> self.eval() + >>> inputs = torch.rand(1, 1, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 32, 8, 8) + (1, 64, 4, 4) + (1, 128, 2, 2) + (1, 256, 1, 1) + """ + + blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck} + + def __init__(self, + extra, + in_channels=3, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + with_cp=False, + frozen_stages=-1, + zero_init_residual=False, + multiscale_output=True, + pretrained=None, + init_cfg=None): + super().__init__(init_cfg) + + self.pretrained = pretrained + self.zero_init_residual = zero_init_residual + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + + # Assert configurations of 4 stages are in extra + assert 'stage1' in extra and 'stage2' in extra \ + and 'stage3' in extra and 'stage4' in extra + # Assert whether the length of `num_blocks` and `num_channels` are + # equal to `num_branches` + for i in range(4): + cfg = extra[f'stage{i + 1}'] + assert len(cfg['num_blocks']) == cfg['num_branches'] and \ + len(cfg['num_channels']) == cfg['num_branches'] + + self.extra = extra + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + self.frozen_stages = frozen_stages + + # stem net + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) + self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) + + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + self.conv_cfg, + 64, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.relu = nn.ReLU(inplace=True) + + # stage 1 + self.stage1_cfg = self.extra['stage1'] + num_channels = self.stage1_cfg['num_channels'][0] + block_type = self.stage1_cfg['block'] + num_blocks = self.stage1_cfg['num_blocks'][0] + + block = self.blocks_dict[block_type] + stage1_out_channels = num_channels * block.expansion + self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) + + # stage 2 + self.stage2_cfg = self.extra['stage2'] + num_channels = self.stage2_cfg['num_channels'] + block_type = self.stage2_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition1 = self._make_transition_layer([stage1_out_channels], + num_channels) + self.stage2, pre_stage_channels = self._make_stage( + self.stage2_cfg, num_channels) + + # stage 3 + self.stage3_cfg = self.extra['stage3'] + num_channels = self.stage3_cfg['num_channels'] + block_type = self.stage3_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition2 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage3, pre_stage_channels = self._make_stage( + self.stage3_cfg, num_channels) + + # stage 4 + self.stage4_cfg = self.extra['stage4'] + num_channels = self.stage4_cfg['num_channels'] + block_type = self.stage4_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition3 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage4, pre_stage_channels = self._make_stage( + self.stage4_cfg, num_channels, multiscale_output=multiscale_output) + + self._freeze_stages() + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: the normalization layer named "norm2" """ + return getattr(self, self.norm2_name) + + def _make_transition_layer(self, num_channels_pre_layer, + num_channels_cur_layer): + """Make transition layer.""" + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + num_channels_pre_layer[i], + num_channels_cur_layer[i], + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + num_channels_cur_layer[i])[1], + nn.ReLU(inplace=True))) + else: + transition_layers.append(None) + else: + conv_downsamples = [] + for j in range(i + 1 - num_branches_pre): + in_channels = num_channels_pre_layer[-1] + out_channels = num_channels_cur_layer[i] \ + if j == i - num_branches_pre else in_channels + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + out_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, out_channels)[1], + nn.ReLU(inplace=True))) + transition_layers.append(nn.Sequential(*conv_downsamples)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, inplanes, planes, blocks, stride=1): + """Make each layer.""" + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [] + block_init_cfg = None + if self.pretrained is None and not hasattr( + self, 'init_cfg') and self.zero_init_residual: + if block is BasicBlock: + block_init_cfg = dict( + type='Constant', val=0, override=dict(name='norm2')) + elif block is Bottleneck: + block_init_cfg = dict( + type='Constant', val=0, override=dict(name='norm3')) + + layers.append( + block( + inplanes, + planes, + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + init_cfg=block_init_cfg)) + inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + inplanes, + planes, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + init_cfg=block_init_cfg)) + + return Sequential(*layers) + + def _make_stage(self, layer_config, in_channels, multiscale_output=True): + """Make each stage.""" + num_modules = layer_config['num_modules'] + num_branches = layer_config['num_branches'] + num_blocks = layer_config['num_blocks'] + num_channels = layer_config['num_channels'] + block = self.blocks_dict[layer_config['block']] + + hr_modules = [] + block_init_cfg = None + if self.pretrained is None and not hasattr( + self, 'init_cfg') and self.zero_init_residual: + if block is BasicBlock: + block_init_cfg = dict( + type='Constant', val=0, override=dict(name='norm2')) + elif block is Bottleneck: + block_init_cfg = dict( + type='Constant', val=0, override=dict(name='norm3')) + + for i in range(num_modules): + # multi_scale_output is only used for the last module + if not multiscale_output and i == num_modules - 1: + reset_multiscale_output = False + else: + reset_multiscale_output = True + + hr_modules.append( + HRModule( + num_branches, + block, + num_blocks, + in_channels, + num_channels, + reset_multiscale_output, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + block_init_cfg=block_init_cfg)) + + return Sequential(*hr_modules), in_channels + + def _freeze_stages(self): + """Freeze stages param and norm stats.""" + if self.frozen_stages >= 0: + + self.norm1.eval() + self.norm2.eval() + for m in [self.conv1, self.norm1, self.conv2, self.norm2]: + for param in m.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + if i == 1: + m = getattr(self, f'layer{i}') + t = getattr(self, f'transition{i}') + elif i == 4: + m = getattr(self, f'stage{i}') + else: + m = getattr(self, f'stage{i}') + t = getattr(self, f'transition{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + t.eval() + for param in t.parameters(): + param.requires_grad = False + + def forward(self, x): + """Forward function.""" + + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.norm2(x) + x = self.relu(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg['num_branches']): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg['num_branches']): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg['num_branches']): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage4(x_list) + + return y_list + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super().train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/mmseg/models/backbones/icnet.py b/mmseg/models/backbones/icnet.py new file mode 100644 index 0000000000000000000000000000000000000000..8ff3448569c5a3ec82a12726767fcbb48b3870d2 --- /dev/null +++ b/mmseg/models/backbones/icnet.py @@ -0,0 +1,166 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule + +from mmseg.registry import MODELS +from ..decode_heads.psp_head import PPM +from ..utils import resize + + +@MODELS.register_module() +class ICNet(BaseModule): + """ICNet for Real-Time Semantic Segmentation on High-Resolution Images. + + This backbone is the implementation of + `ICNet `_. + + Args: + backbone_cfg (dict): Config dict to build backbone. Usually it is + ResNet but it can also be other backbones. + in_channels (int): The number of input image channels. Default: 3. + layer_channels (Sequence[int]): The numbers of feature channels at + layer 2 and layer 4 in ResNet. It can also be other backbones. + Default: (512, 2048). + light_branch_middle_channels (int): The number of channels of the + middle layer in light branch. Default: 32. + psp_out_channels (int): The number of channels of the output of PSP + module. Default: 512. + out_channels (Sequence[int]): The numbers of output feature channels + at each branches. Default: (64, 256, 256). + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. Default: (1, 2, 3, 6). + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN'). + act_cfg (dict): Dictionary to construct and config act layer. + Default: dict(type='ReLU'). + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + backbone_cfg, + in_channels=3, + layer_channels=(512, 2048), + light_branch_middle_channels=32, + psp_out_channels=512, + out_channels=(64, 256, 256), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + align_corners=False, + init_cfg=None): + if backbone_cfg is None: + raise TypeError('backbone_cfg must be passed from config file!') + if init_cfg is None: + init_cfg = [ + dict(type='Kaiming', mode='fan_out', layer='Conv2d'), + dict(type='Constant', val=1, layer='_BatchNorm'), + dict(type='Normal', mean=0.01, layer='Linear') + ] + super().__init__(init_cfg=init_cfg) + self.align_corners = align_corners + self.backbone = MODELS.build(backbone_cfg) + + # Note: Default `ceil_mode` is false in nn.MaxPool2d, set + # `ceil_mode=True` to keep information in the corner of feature map. + self.backbone.maxpool = nn.MaxPool2d( + kernel_size=3, stride=2, padding=1, ceil_mode=True) + + self.psp_modules = PPM( + pool_scales=pool_scales, + in_channels=layer_channels[1], + channels=psp_out_channels, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + align_corners=align_corners) + + self.psp_bottleneck = ConvModule( + layer_channels[1] + len(pool_scales) * psp_out_channels, + psp_out_channels, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.conv_sub1 = nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=light_branch_middle_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg), + ConvModule( + in_channels=light_branch_middle_channels, + out_channels=light_branch_middle_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg), + ConvModule( + in_channels=light_branch_middle_channels, + out_channels=out_channels[0], + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg)) + + self.conv_sub2 = ConvModule( + layer_channels[0], + out_channels[1], + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg) + + self.conv_sub4 = ConvModule( + psp_out_channels, + out_channels[2], + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg) + + def forward(self, x): + output = [] + + # sub 1 + output.append(self.conv_sub1(x)) + + # sub 2 + x = resize( + x, + scale_factor=0.5, + mode='bilinear', + align_corners=self.align_corners) + x = self.backbone.stem(x) + x = self.backbone.maxpool(x) + x = self.backbone.layer1(x) + x = self.backbone.layer2(x) + output.append(self.conv_sub2(x)) + + # sub 4 + x = resize( + x, + scale_factor=0.5, + mode='bilinear', + align_corners=self.align_corners) + x = self.backbone.layer3(x) + x = self.backbone.layer4(x) + psp_outs = self.psp_modules(x) + [x] + psp_outs = torch.cat(psp_outs, dim=1) + x = self.psp_bottleneck(psp_outs) + + output.append(self.conv_sub4(x)) + + return output diff --git a/mmseg/models/backbones/lora.py b/mmseg/models/backbones/lora.py new file mode 100644 index 0000000000000000000000000000000000000000..cf38653d8b63a90b5f7135858fb057a246992e41 --- /dev/null +++ b/mmseg/models/backbones/lora.py @@ -0,0 +1,165 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import math + +class LoRALayer(): + def __init__( + self, + r: int, + lora_alpha: int, + lora_dropout: float, + merge_weights: bool, + ): + self.r = r + self.lora_alpha = lora_alpha + # Optional dropout + if lora_dropout > 0.: + self.lora_dropout = nn.Dropout(p=lora_dropout) + else: + self.lora_dropout = lambda x: x + # Mark the weight as unmerged + self.merged = False + self.merge_weights = merge_weights + +class Linear(nn.Linear, LoRALayer): + # LoRA implemented in a dense layer + def __init__( + self, + in_features: int, + out_features: int, + r: int = 0, + lora_alpha: int = 1, + lora_dropout: float = 0., + fan_in_fan_out: bool = False, # Set this to True if the layer to replace stores weight like (fan_in, fan_out) + merge_weights: bool = True, + **kwargs + ): + nn.Linear.__init__(self, in_features, out_features, **kwargs) + LoRALayer.__init__(self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, + merge_weights=merge_weights) + + self.fan_in_fan_out = fan_in_fan_out + # Actual trainable parameters + if r > 0: + self.lora_A = nn.Parameter(self.weight.new_zeros((r, in_features))) + self.lora_B = nn.Parameter(self.weight.new_zeros((out_features, r))) + self.scaling = self.lora_alpha / self.r + # Freezing the pre-trained weight matrix + self.weight.requires_grad = False + self.reset_parameters() + if fan_in_fan_out: + self.weight.data = self.weight.data.transpose(0, 1) + + def reset_parameters(self): + nn.Linear.reset_parameters(self) + if hasattr(self, 'lora_A'): + # initialize A the same way as the default for nn.Linear and B to zero + nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5)) + nn.init.zeros_(self.lora_B) + + def train(self, mode: bool = True): + def T(w): + return w.transpose(0, 1) if self.fan_in_fan_out else w + nn.Linear.train(self, mode) + if mode: + if self.merge_weights and self.merged: + # Make sure that the weights are not merged + if self.r > 0: + self.weight.data -= T(self.lora_B @ self.lora_A) * self.scaling + self.merged = False + else: + if self.merge_weights and not self.merged: + # Merge the weights and mark it + if self.r > 0: + self.weight.data += T(self.lora_B @ self.lora_A) * self.scaling + self.merged = True + + def forward(self, x: torch.Tensor): + def T(w): + return w.transpose(0, 1) if self.fan_in_fan_out else w + if self.r > 0 and not self.merged: + result = F.linear(x, T(self.weight), bias=self.bias) + if self.r > 0: + result += (self.lora_dropout(x) @ self.lora_A.transpose(0, 1) @ self.lora_B.transpose(0, 1)) * self.scaling + return result + else: + return F.linear(x, T(self.weight), bias=self.bias) + + + +class Conv2d(nn.Conv2d, LoRALayer): + # LoRA implemented in a dense layer + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + r: int = 0, + lora_alpha: int = 1, + lora_dropout: float = 0., + merge_weights: bool = True, + **kwargs + ): + nn.Conv2d.__init__(self, in_channels, out_channels, kernel_size, **kwargs) + LoRALayer.__init__(self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, + merge_weights=merge_weights) + # assert type(kernel_size) is int + if type(kernel_size) is tuple: + temp_ks = kernel_size[0] + # Actual trainable parameters + if r > 0: + self.lora_A = nn.Parameter( + self.weight.new_zeros((r*temp_ks, in_channels*temp_ks)) + ) + self.lora_B = nn.Parameter( + self.weight.new_zeros((out_channels*temp_ks, r*temp_ks)) + ) + self.scaling = self.lora_alpha / self.r + # Freezing the pre-trained weight matrix + self.weight.requires_grad = False + self.reset_parameters() + + def reset_parameters(self): + nn.Conv2d.reset_parameters(self) + if hasattr(self, 'lora_A'): + # initialize A the same way as the default for nn.Linear and B to zero + nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5)) + nn.init.zeros_(self.lora_B) + + def train(self, mode: bool = True): + nn.Conv2d.train(self, mode) + if mode: + if self.merge_weights and self.merged: + # Make sure that the weights are not merged + self.weight.data -= (self.lora_B @ self.lora_A).view(self.weight.shape) * self.scaling + self.merged = False + else: + if self.merge_weights and not self.merged: + # Merge the weights and mark it + self.weight.data += (self.lora_B @ self.lora_A).view(self.weight.shape) * self.scaling + self.merged = True + + def forward(self, x: torch.Tensor): + if self.r > 0 and not self.merged: + return F.conv2d( + x, + self.weight + (self.lora_B @ self.lora_A).view(self.weight.shape) * self.scaling, + self.bias, self.stride, self.padding, self.dilation, self.groups + ) + return nn.Conv2d.forward(self, x) + +def wrap_model_with_lora(module, rank=4): + for name, child in module.named_children(): + if isinstance(child, (Linear, Conv2d)): + continue + + if 'stitch' in name: + pass + + if isinstance(child, nn.Linear): + setattr(module, name, Linear(in_features=child.in_features, out_features=child.out_features, bias=child.bias is not None, r=rank)) + elif isinstance(child, nn.Conv2d): + setattr(module, name, Conv2d(in_channels=child.in_channels, out_channels=child.out_channels, kernel_size=child.kernel_size, stride=child.stride, padding=child.padding, dilation=child.dilation, groups=child.groups, bias=child.bias is not None, r=rank)) + else: + wrap_model_with_lora(child, rank) diff --git a/mmseg/models/backbones/mae.py b/mmseg/models/backbones/mae.py new file mode 100644 index 0000000000000000000000000000000000000000..de7957895ab67902016e8a5950d14f47ed43a03c --- /dev/null +++ b/mmseg/models/backbones/mae.py @@ -0,0 +1,497 @@ +# Copyright (c) OpenMMLab. All rights reserved.import math +import math + +import torch +import torch.nn as nn +from mmengine.model import ModuleList +from mmengine.model.weight_init import (constant_init, kaiming_init, + trunc_normal_) +from mmengine.runner.checkpoint import _load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm +from ..utils import resize +from mmseg.registry import MODELS +from .beit import BEiT, BEiTAttention, BEiTTransformerEncoderLayer + + +class MAEAttention(BEiTAttention): + """Multi-head self-attention with relative position bias used in MAE. + + This module is different from ``BEiTAttention`` by initializing the + relative bias table with zeros. + """ + + def init_weights(self): + """Initialize relative position bias with zeros.""" + + # As MAE initializes relative position bias as zeros and this class + # inherited from BEiT which initializes relative position bias + # with `trunc_normal`, `init_weights` here does + # nothing and just passes directly + + pass + + +class MAETransformerEncoderLayer(BEiTTransformerEncoderLayer): + """Implements one encoder layer in Vision Transformer. + + This module is different from ``BEiTTransformerEncoderLayer`` by replacing + ``BEiTAttention`` with ``MAEAttention``. + """ + + def build_attn(self, attn_cfg): + self.attn = MAEAttention(**attn_cfg) + + +@MODELS.register_module() +class MAE(BEiT): + """VisionTransformer with support for patch. + + Args: + img_size (int | tuple): Input image size. Default: 224. + patch_size (int): The patch size. Default: 16. + in_channels (int): Number of input channels. Default: 3. + embed_dims (int): embedding dimension. Default: 768. + num_layers (int): depth of transformer. Default: 12. + num_heads (int): number of attention heads. Default: 12. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + out_indices (list | tuple | int): Output from which stages. + Default: -1. + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): stochastic depth rate. Default 0.0. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + patch_norm (bool): Whether to add a norm in PatchEmbed Block. + Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + pretrained (str, optional): model pretrained path. Default: None. + init_values (float): Initialize the values of Attention and FFN + with learnable scaling. Defaults to 0.1. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=-1, + attn_drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + patch_norm=False, + final_norm=False, + num_fcs=2, + norm_eval=False, + pretrained=None, + init_values=0.1, + is_deit_3=False, + is_anchor=False, + with_cls_token=True, + interpolate_mode='bicubic', + init_cfg=None): + super().__init__( + img_size=img_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dims=embed_dims, + num_layers=num_layers, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + out_indices=out_indices, + qv_bias=False, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rate, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + patch_norm=patch_norm, + final_norm=final_norm, + num_fcs=num_fcs, + norm_eval=norm_eval, + pretrained=pretrained, + init_values=init_values, + init_cfg=init_cfg) + self.is_anchor = is_anchor + self.interpolate_mode = interpolate_mode + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims)) + self.with_cls_token = with_cls_token + self.num_patches = self.patch_shape[0] * self.patch_shape[1] + + self.is_deit_3 = is_deit_3 + if self.is_deit_3: + self.pos_embed = nn.Parameter( + torch.zeros(1, self.num_patches, embed_dims)) + else: + self.pos_embed = nn.Parameter( + torch.zeros(1, self.num_patches + 1, embed_dims)) + + def _build_layers(self): + dpr = [ + x.item() + for x in torch.linspace(0, self.drop_path_rate, self.num_layers) + ] + self.layers = ModuleList() + for i in range(self.num_layers): + self.layers.append( + MAETransformerEncoderLayer( + embed_dims=self.embed_dims, + num_heads=self.num_heads, + feedforward_channels=self.mlp_ratio * self.embed_dims, + attn_drop_rate=self.attn_drop_rate, + drop_path_rate=dpr[i], + num_fcs=self.num_fcs, + bias=True, + act_cfg=self.act_cfg, + norm_cfg=self.norm_cfg, + window_size=self.patch_shape, + init_values=self.init_values)) + + def fix_init_weight(self): + """Rescale the initialization according to layer id. + + This function is copied from https://github.com/microsoft/unilm/blob/master/beit/modeling_pretrain.py. # noqa: E501 + Copyright (c) Microsoft Corporation + Licensed under the MIT License + """ + + def rescale(param, layer_id): + param.div_(math.sqrt(2.0 * layer_id)) + + for layer_id, layer in enumerate(self.layers): + rescale(layer.attn.proj.weight.data, layer_id + 1) + rescale(layer.ffn.layers[1].weight.data, layer_id + 1) + + def init_weights(self): + + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + self.apply(_init_weights) + self.fix_init_weight() + + if (isinstance(self.init_cfg, dict) + and self.init_cfg.get('type') == 'Pretrained'): + checkpoint = _load_checkpoint( + self.init_cfg['checkpoint'], logger=None, map_location='cpu') + state_dict = self.resize_rel_pos_embed(checkpoint) + state_dict = self.resize_abs_pos_embed(state_dict) + self.load_state_dict(state_dict, False) + elif self.init_cfg is not None: + super().init_weights() + else: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + # Copyright 2019 Ross Wightman + # Licensed under the Apache License, Version 2.0 (the "License") + trunc_normal_(self.cls_token, std=.02) + for n, m in self.named_modules(): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + if 'ffn' in n: + nn.init.normal_(m.bias, mean=0., std=1e-6) + else: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Conv2d): + kaiming_init(m, mode='fan_in', bias=0.) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m, val=1.0, bias=0.) + + @staticmethod + def resize_pos_embed(pos_embed, input_shpae, pos_shape, mode): + """Resize pos_embed weights. + + Resize pos_embed using bicubic interpolate method. + Args: + pos_embed (torch.Tensor): Position embedding weights. + input_shpae (tuple): Tuple for (downsampled input image height, + downsampled input image width). + pos_shape (tuple): The resolution of downsampled origin training + image. + mode (str): Algorithm used for upsampling: + ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` | + ``'trilinear'``. Default: ``'nearest'`` + Return: + torch.Tensor: The resized pos_embed of shape [B, L_new, C] + """ + assert pos_embed.ndim == 3, 'shape of pos_embed must be [B, L, C]' + pos_h, pos_w = pos_shape + # keep dim for easy deployment + pos_embed_weight = pos_embed.reshape( + 1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) + pos_embed_weight = resize( + pos_embed_weight, size=input_shpae, align_corners=False, mode=mode) + pos_embed_weight = torch.flatten(pos_embed_weight, 2).transpose(1, 2) + return pos_embed_weight + + + def _pos_embeding(self, patched_img, hw_shape, pos_embed): + """Positioning embeding method. + + Resize the pos_embed, if the input image size doesn't match + the training size. + Args: + patched_img (torch.Tensor): The patched image, it should be + shape of [B, L1, C]. + hw_shape (tuple): The downsampled image resolution. + pos_embed (torch.Tensor): The pos_embed weighs, it should be + shape of [B, L2, c]. + Return: + torch.Tensor: The pos encoded image feature. + """ + assert patched_img.ndim == 3 and pos_embed.ndim == 3, \ + 'the shapes of patched_img and pos_embed must be [B, L, C]' + x_len, pos_len = patched_img.shape[1], pos_embed.shape[1] + if x_len != pos_len: + pos_h = self.img_size[0] // self.patch_size + pos_w = self.img_size[1] // self.patch_size + if not self.is_deit_3: + pos_embed = self.resize_pos_embed_with_cls( + pos_embed, hw_shape, (pos_h, pos_w), self.interpolate_mode) + else: + pos_embed = self.resize_pos_embed(pos_embed, hw_shape, + (pos_h, pos_w), + self.interpolate_mode) + return patched_img + pos_embed + + def resize_abs_pos_embed(self, state_dict): + if 'pos_embed' in state_dict: + pos_embed_checkpoint = state_dict['pos_embed'] + embedding_size = pos_embed_checkpoint.shape[-1] + num_extra_tokens = self.pos_embed.shape[-2] - self.num_patches + # height (== width) for the checkpoint position embedding + orig_size = int( + (pos_embed_checkpoint.shape[-2] - num_extra_tokens)**0.5) + # height (== width) for the new position embedding + new_size = int(self.num_patches**0.5) + # class_token and dist_token are kept unchanged + if orig_size != new_size: + extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens] + # only the position tokens are interpolated + pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:] + pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, + embedding_size).permute( + 0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, + size=(new_size, new_size), + mode='bicubic', + align_corners=False) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1) + state_dict['pos_embed'] = new_pos_embed + return state_dict + + def extract_block_features(self, inputs): + B = inputs.shape[0] + + x, hw_shape = self.patch_embed(inputs) + cls_tokens = self.cls_token.expand(B, -1, -1) + + if self.is_deit_3: + x = self._pos_embeding(x, hw_shape, self.pos_embed) + x = torch.cat((cls_tokens, x), dim=1) + else: + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(x, hw_shape, self.pos_embed) + + if hasattr(self, 'norm_pre'): + x = self.norm_pre(x) + + if not self.with_cls_token: + # Remove class token for transformer encoder input + x = x[:, 1:] + + outs = {} + + for i, layer in enumerate(self.layers): + x = layer(x) + outs[i] = x.detach() + return outs + + + def forward_until(self, inputs, blk_id): + B = inputs.shape[0] + + x, hw_shape = self.patch_embed(inputs) + cls_tokens = self.cls_token.expand(B, -1, -1) + + if self.is_deit_3: + x = self._pos_embeding(x, hw_shape, self.pos_embed) + x = torch.cat((cls_tokens, x), dim=1) + else: + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(x, hw_shape, self.pos_embed) + + if hasattr(self, 'norm_pre'): + x = self.norm_pre(x) + + if not self.with_cls_token: + # Remove class token for transformer encoder input + x = x[:, 1:] + + outs = [] + for i, layer in enumerate(self.layers): + x = layer(x) + if i in self.out_indices: + outs.append(x) + + if i == blk_id: + break + + return x, outs, hw_shape + + def patch_embed_params(self): + total_params = 0 + total_params += sum([p.numel() for p in self.patch_embed.parameters()]) + + if hasattr(self, 'norm_pre'): + total_params += self.norm_pre.numel() + return total_params + + def selective_params(self, begin, end): + total_params = 0 + for i, layer in enumerate(self.layers): + if i < begin: + continue + if i > end: + break + total_params += sum([p.numel() for p in layer.parameters()]) + return total_params + + + def forward_patch_embed(self, x): + B = x.shape[0] + + x, hw_shape = self.patch_embed(x) + cls_tokens = self.cls_token.expand(B, -1, -1) + + if self.is_deit_3: + x = self._pos_embeding(x, hw_shape, self.pos_embed) + x = torch.cat((cls_tokens, x), dim=1) + else: + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(x, hw_shape, self.pos_embed) + + if hasattr(self, 'norm_pre'): + x = self.norm_pre(x) + + if not self.with_cls_token: + # Remove class token for transformer encoder input + x = x[:, 1:] + return x, hw_shape + + + def selective_forward(self, x, begin, end): + outs = [] + for i, layer in enumerate(self.layers): + if i < begin: + continue + if i > end: + break + x = layer(x) + if i in self.out_indices: + outs.append(x) + return x, outs + + def forward_from(self, x, blk_id): + outs = [] + for i, layer in enumerate(self.layers): + if i < blk_id: + continue + x = layer(x) + if i in self.out_indices: + outs.append(x) + return outs + + + def forward(self, inputs): + B = inputs.shape[0] + + x, hw_shape = self.patch_embed(inputs) + cls_tokens = self.cls_token.expand(B, -1, -1) + + if self.is_deit_3: + x = self._pos_embeding(x, hw_shape, self.pos_embed) + x = torch.cat((cls_tokens, x), dim=1) + else: + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(x, hw_shape, self.pos_embed) + + if hasattr(self, 'norm_pre'): + x = self.norm_pre(x) + + if not self.with_cls_token: + # Remove class token for transformer encoder input + x = x[:, 1:] + + outs = [] + for i, layer in enumerate(self.layers): + x = layer(x) + if i == len(self.layers) - 1: + if self.final_norm: + x = self.norm1(x) + if i in self.out_indices: + if self.is_anchor: + outs.append(x) + else: + if self.with_cls_token: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + else: + out = x + B, _, C = out.shape + out = out.reshape(B, hw_shape[0], hw_shape[1], + C).permute(0, 3, 1, 2).contiguous() + outs.append(out) + + if self.is_anchor: + return outs, hw_shape + else: + return outs + + + # def forward(self, inputs): + # B = inputs.shape[0] + # + # x, hw_shape = self.patch_embed(inputs) + # + # # stole cls_tokens impl from Phil Wang, thanks + # cls_tokens = self.cls_token.expand(B, -1, -1) + # x = torch.cat((cls_tokens, x), dim=1) + # x = x + self.pos_embed + # + # outs = [] + # for i, layer in enumerate(self.layers): + # x = layer(x) + # if i == len(self.layers) - 1: + # if self.final_norm: + # x = self.norm1(x) + # if i in self.out_indices: + # out = x[:, 1:] + # B, _, C = out.shape + # out = out.reshape(B, hw_shape[0], hw_shape[1], + # C).permute(0, 3, 1, 2).contiguous() + # outs.append(out) + # + # return tuple(outs) diff --git a/mmseg/models/backbones/mit.py b/mmseg/models/backbones/mit.py new file mode 100644 index 0000000000000000000000000000000000000000..66556bdfca2b0bcb180afd23c2923c68b9ff3a69 --- /dev/null +++ b/mmseg/models/backbones/mit.py @@ -0,0 +1,450 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import Conv2d, build_activation_layer, build_norm_layer +from mmcv.cnn.bricks.drop import build_dropout +from mmcv.cnn.bricks.transformer import MultiheadAttention +from mmengine.model import BaseModule, ModuleList, Sequential +from mmengine.model.weight_init import (constant_init, normal_init, + trunc_normal_init) + +from mmseg.registry import MODELS +from ..utils import PatchEmbed, nchw_to_nlc, nlc_to_nchw + + +class MixFFN(BaseModule): + """An implementation of MixFFN of Segformer. + + The differences between MixFFN & FFN: + 1. Use 1X1 Conv to replace Linear layer. + 2. Introduce 3X3 Conv to encode positional information. + Args: + embed_dims (int): The feature dimension. Same as + `MultiheadAttention`. Defaults: 256. + feedforward_channels (int): The hidden dimension of FFNs. + Defaults: 1024. + act_cfg (dict, optional): The activation config for FFNs. + Default: dict(type='ReLU') + ffn_drop (float, optional): Probability of an element to be + zeroed in FFN. Default 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims, + feedforward_channels, + act_cfg=dict(type='GELU'), + ffn_drop=0., + dropout_layer=None, + init_cfg=None): + super().__init__(init_cfg) + + self.embed_dims = embed_dims + self.feedforward_channels = feedforward_channels + self.act_cfg = act_cfg + self.activate = build_activation_layer(act_cfg) + + in_channels = embed_dims + fc1 = Conv2d( + in_channels=in_channels, + out_channels=feedforward_channels, + kernel_size=1, + stride=1, + bias=True) + # 3x3 depth wise conv to provide positional encode information + pe_conv = Conv2d( + in_channels=feedforward_channels, + out_channels=feedforward_channels, + kernel_size=3, + stride=1, + padding=(3 - 1) // 2, + bias=True, + groups=feedforward_channels) + fc2 = Conv2d( + in_channels=feedforward_channels, + out_channels=in_channels, + kernel_size=1, + stride=1, + bias=True) + drop = nn.Dropout(ffn_drop) + layers = [fc1, pe_conv, self.activate, drop, fc2, drop] + self.layers = Sequential(*layers) + self.dropout_layer = build_dropout( + dropout_layer) if dropout_layer else torch.nn.Identity() + + def forward(self, x, hw_shape, identity=None): + out = nlc_to_nchw(x, hw_shape) + out = self.layers(out) + out = nchw_to_nlc(out) + if identity is None: + identity = x + return identity + self.dropout_layer(out) + + +class EfficientMultiheadAttention(MultiheadAttention): + """An implementation of Efficient Multi-head Attention of Segformer. + + This module is modified from MultiheadAttention which is a module from + mmcv.cnn.bricks.transformer. + Args: + embed_dims (int): The embedding dimension. + num_heads (int): Parallel attention heads. + attn_drop (float): A Dropout layer on attn_output_weights. + Default: 0.0. + proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. + Default: 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default: False. + qkv_bias (bool): enable bias for qkv if True. Default True. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + sr_ratio (int): The ratio of spatial reduction of Efficient Multi-head + Attention of Segformer. Default: 1. + """ + + def __init__(self, + embed_dims, + num_heads, + attn_drop=0., + proj_drop=0., + dropout_layer=None, + init_cfg=None, + batch_first=True, + qkv_bias=False, + norm_cfg=dict(type='LN'), + sr_ratio=1): + super().__init__( + embed_dims, + num_heads, + attn_drop, + proj_drop, + dropout_layer=dropout_layer, + init_cfg=init_cfg, + batch_first=batch_first, + bias=qkv_bias) + + self.sr_ratio = sr_ratio + if sr_ratio > 1: + self.sr = Conv2d( + in_channels=embed_dims, + out_channels=embed_dims, + kernel_size=sr_ratio, + stride=sr_ratio) + # The ret[0] of build_norm_layer is norm name. + self.norm = build_norm_layer(norm_cfg, embed_dims)[1] + + # handle the BC-breaking from https://github.com/open-mmlab/mmcv/pull/1418 # noqa + from mmseg import digit_version, mmcv_version + if mmcv_version < digit_version('1.3.17'): + warnings.warn('The legacy version of forward function in' + 'EfficientMultiheadAttention is deprecated in' + 'mmcv>=1.3.17 and will no longer support in the' + 'future. Please upgrade your mmcv.') + self.forward = self.legacy_forward + + def forward(self, x, hw_shape, identity=None): + + x_q = x + if self.sr_ratio > 1: + x_kv = nlc_to_nchw(x, hw_shape) + x_kv = self.sr(x_kv) + x_kv = nchw_to_nlc(x_kv) + x_kv = self.norm(x_kv) + else: + x_kv = x + + if identity is None: + identity = x_q + + # Because the dataflow('key', 'query', 'value') of + # ``torch.nn.MultiheadAttention`` is (num_query, batch, + # embed_dims), We should adjust the shape of dataflow from + # batch_first (batch, num_query, embed_dims) to num_query_first + # (num_query ,batch, embed_dims), and recover ``attn_output`` + # from num_query_first to batch_first. + if self.batch_first: + x_q = x_q.transpose(0, 1) + x_kv = x_kv.transpose(0, 1) + + out = self.attn(query=x_q, key=x_kv, value=x_kv)[0] + + if self.batch_first: + out = out.transpose(0, 1) + + return identity + self.dropout_layer(self.proj_drop(out)) + + def legacy_forward(self, x, hw_shape, identity=None): + """multi head attention forward in mmcv version < 1.3.17.""" + + x_q = x + if self.sr_ratio > 1: + x_kv = nlc_to_nchw(x, hw_shape) + x_kv = self.sr(x_kv) + x_kv = nchw_to_nlc(x_kv) + x_kv = self.norm(x_kv) + else: + x_kv = x + + if identity is None: + identity = x_q + + # `need_weights=True` will let nn.MultiHeadAttention + # `return attn_output, attn_output_weights.sum(dim=1) / num_heads` + # The `attn_output_weights.sum(dim=1)` may cause cuda error. So, we set + # `need_weights=False` to ignore `attn_output_weights.sum(dim=1)`. + # This issue - `https://github.com/pytorch/pytorch/issues/37583` report + # the error that large scale tensor sum operation may cause cuda error. + out = self.attn(query=x_q, key=x_kv, value=x_kv, need_weights=False)[0] + + return identity + self.dropout_layer(self.proj_drop(out)) + + +class TransformerEncoderLayer(BaseModule): + """Implements one encoder layer in Segformer. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + drop_rate (float): Probability of an element to be zeroed. + after the feed forward layer. Default 0.0. + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0. + drop_path_rate (float): stochastic depth rate. Default 0.0. + qkv_bias (bool): enable bias for qkv if True. + Default: True. + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default: False. + init_cfg (dict, optional): Initialization config dict. + Default:None. + sr_ratio (int): The ratio of spatial reduction of Efficient Multi-head + Attention of Segformer. Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + qkv_bias=True, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + batch_first=True, + sr_ratio=1, + with_cp=False): + super().__init__() + + # The ret[0] of build_norm_layer is norm name. + self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1] + + self.attn = EfficientMultiheadAttention( + embed_dims=embed_dims, + num_heads=num_heads, + attn_drop=attn_drop_rate, + proj_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + batch_first=batch_first, + qkv_bias=qkv_bias, + norm_cfg=norm_cfg, + sr_ratio=sr_ratio) + + # The ret[0] of build_norm_layer is norm name. + self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1] + + self.ffn = MixFFN( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + act_cfg=act_cfg) + + self.with_cp = with_cp + + def forward(self, x, hw_shape): + + def _inner_forward(x): + x = self.attn(self.norm1(x), hw_shape, identity=x) + x = self.ffn(self.norm2(x), hw_shape, identity=x) + return x + + if self.with_cp and x.requires_grad: + x = cp.checkpoint(_inner_forward, x) + else: + x = _inner_forward(x) + return x + + +@MODELS.register_module() +class MixVisionTransformer(BaseModule): + """The backbone of Segformer. + + This backbone is the implementation of `SegFormer: Simple and + Efficient Design for Semantic Segmentation with + Transformers `_. + Args: + in_channels (int): Number of input channels. Default: 3. + embed_dims (int): Embedding dimension. Default: 768. + num_stags (int): The num of stages. Default: 4. + num_layers (Sequence[int]): The layer number of each transformer encode + layer. Default: [3, 4, 6, 3]. + num_heads (Sequence[int]): The attention heads of each transformer + encode layer. Default: [1, 2, 4, 8]. + patch_sizes (Sequence[int]): The patch_size of each overlapped patch + embedding. Default: [7, 3, 3, 3]. + strides (Sequence[int]): The stride of each overlapped patch embedding. + Default: [4, 2, 2, 2]. + sr_ratios (Sequence[int]): The spatial reduction rate of each + transformer encode layer. Default: [8, 4, 2, 1]. + out_indices (Sequence[int] | int): Output from which stages. + Default: (0, 1, 2, 3). + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + qkv_bias (bool): Enable bias for qkv if True. Default: True. + drop_rate (float): Probability of an element to be zeroed. + Default 0.0 + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): stochastic depth rate. Default 0.0 + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + pretrained (str, optional): model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels=3, + embed_dims=64, + num_stages=4, + num_layers=[3, 4, 6, 3], + num_heads=[1, 2, 4, 8], + patch_sizes=[7, 3, 3, 3], + strides=[4, 2, 2, 2], + sr_ratios=[8, 4, 2, 1], + out_indices=(0, 1, 2, 3), + mlp_ratio=4, + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN', eps=1e-6), + pretrained=None, + init_cfg=None, + with_cp=False): + super().__init__(init_cfg=init_cfg) + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + + self.embed_dims = embed_dims + self.num_stages = num_stages + self.num_layers = num_layers + self.num_heads = num_heads + self.patch_sizes = patch_sizes + self.strides = strides + self.sr_ratios = sr_ratios + self.with_cp = with_cp + assert num_stages == len(num_layers) == len(num_heads) \ + == len(patch_sizes) == len(strides) == len(sr_ratios) + + self.out_indices = out_indices + assert max(out_indices) < self.num_stages + + # transformer encoder + dpr = [ + x.item() + for x in torch.linspace(0, drop_path_rate, sum(num_layers)) + ] # stochastic num_layer decay rule + + cur = 0 + self.layers = ModuleList() + for i, num_layer in enumerate(num_layers): + embed_dims_i = embed_dims * num_heads[i] + patch_embed = PatchEmbed( + in_channels=in_channels, + embed_dims=embed_dims_i, + kernel_size=patch_sizes[i], + stride=strides[i], + padding=patch_sizes[i] // 2, + norm_cfg=norm_cfg) + layer = ModuleList([ + TransformerEncoderLayer( + embed_dims=embed_dims_i, + num_heads=num_heads[i], + feedforward_channels=mlp_ratio * embed_dims_i, + drop_rate=drop_rate, + attn_drop_rate=attn_drop_rate, + drop_path_rate=dpr[cur + idx], + qkv_bias=qkv_bias, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + sr_ratio=sr_ratios[i]) for idx in range(num_layer) + ]) + in_channels = embed_dims_i + # The ret[0] of build_norm_layer is norm name. + norm = build_norm_layer(norm_cfg, embed_dims_i)[1] + self.layers.append(ModuleList([patch_embed, layer, norm])) + cur += num_layer + + def init_weights(self): + if self.init_cfg is None: + for m in self.modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=.02, bias=0.) + elif isinstance(m, nn.LayerNorm): + constant_init(m, val=1.0, bias=0.) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[ + 1] * m.out_channels + fan_out //= m.groups + normal_init( + m, mean=0, std=math.sqrt(2.0 / fan_out), bias=0) + else: + super().init_weights() + + def forward(self, x): + outs = [] + + for i, layer in enumerate(self.layers): + x, hw_shape = layer[0](x) + for block in layer[1]: + x = block(x, hw_shape) + x = layer[2](x) + x = nlc_to_nchw(x, hw_shape) + if i in self.out_indices: + outs.append(x) + + return outs diff --git a/mmseg/models/backbones/mobilenet_v2.py b/mmseg/models/backbones/mobilenet_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..1c21b5df97dade148136e8b0e6b039512f9e03f9 --- /dev/null +++ b/mmseg/models/backbones/mobilenet_v2.py @@ -0,0 +1,197 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule +from torch.nn.modules.batchnorm import _BatchNorm + +from mmseg.registry import MODELS +from ..utils import InvertedResidual, make_divisible + + +@MODELS.register_module() +class MobileNetV2(BaseModule): + """MobileNetV2 backbone. + + This backbone is the implementation of + `MobileNetV2: Inverted Residuals and Linear Bottlenecks + `_. + + Args: + widen_factor (float): Width multiplier, multiply number of + channels in each layer by this amount. Default: 1.0. + strides (Sequence[int], optional): Strides of the first block of each + layer. If not specified, default config in ``arch_setting`` will + be used. + dilations (Sequence[int]): Dilation of each layer. + out_indices (None or Sequence[int]): Output from which stages. + Default: (7, ). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + pretrained (str, optional): model pretrained path. Default: None + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + """ + + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: expand_ratio, channel, num_blocks. + arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], + [6, 96, 3], [6, 160, 3], [6, 320, 1]] + + def __init__(self, + widen_factor=1., + strides=(1, 2, 2, 2, 1, 2, 1), + dilations=(1, 1, 1, 1, 1, 1, 1), + out_indices=(1, 2, 4, 6), + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + norm_eval=False, + with_cp=False, + pretrained=None, + init_cfg=None): + super().__init__(init_cfg) + + self.pretrained = pretrained + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + + self.widen_factor = widen_factor + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == len(self.arch_settings) + self.out_indices = out_indices + for index in out_indices: + if index not in range(0, 7): + raise ValueError('the item in out_indices must in ' + f'range(0, 7). But received {index}') + + if frozen_stages not in range(-1, 7): + raise ValueError('frozen_stages must be in range(-1, 7). ' + f'But received {frozen_stages}') + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + + self.in_channels = make_divisible(32 * widen_factor, 8) + + self.conv1 = ConvModule( + in_channels=3, + out_channels=self.in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.layers = [] + + for i, layer_cfg in enumerate(self.arch_settings): + expand_ratio, channel, num_blocks = layer_cfg + stride = self.strides[i] + dilation = self.dilations[i] + out_channels = make_divisible(channel * widen_factor, 8) + inverted_res_layer = self.make_layer( + out_channels=out_channels, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + expand_ratio=expand_ratio) + layer_name = f'layer{i + 1}' + self.add_module(layer_name, inverted_res_layer) + self.layers.append(layer_name) + + def make_layer(self, out_channels, num_blocks, stride, dilation, + expand_ratio): + """Stack InvertedResidual blocks to build a layer for MobileNetV2. + + Args: + out_channels (int): out_channels of block. + num_blocks (int): Number of blocks. + stride (int): Stride of the first block. + dilation (int): Dilation of the first block. + expand_ratio (int): Expand the number of channels of the + hidden layer in InvertedResidual by this ratio. + """ + layers = [] + for i in range(num_blocks): + layers.append( + InvertedResidual( + self.in_channels, + out_channels, + stride if i == 0 else 1, + expand_ratio=expand_ratio, + dilation=dilation if i == 0 else 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + with_cp=self.with_cp)) + self.in_channels = out_channels + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def _freeze_stages(self): + if self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for i in range(1, self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super().train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/mmseg/models/backbones/mobilenet_v3.py b/mmseg/models/backbones/mobilenet_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..1efb6e097472d53a5269e52a39ff2cae48e834db --- /dev/null +++ b/mmseg/models/backbones/mobilenet_v3.py @@ -0,0 +1,267 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +from mmcv.cnn import ConvModule +from mmcv.cnn.bricks import Conv2dAdaptivePadding +from mmengine.model import BaseModule +from mmengine.utils import is_tuple_of +from torch.nn.modules.batchnorm import _BatchNorm + +from mmseg.registry import MODELS +from ..utils import InvertedResidualV3 as InvertedResidual + + +@MODELS.register_module() +class MobileNetV3(BaseModule): + """MobileNetV3 backbone. + + This backbone is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + arch (str): Architecture of mobilnetv3, from {'small', 'large'}. + Default: 'small'. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + out_indices (tuple[int]): Output from which layer. + Default: (0, 1, 12). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. + Default: False. + pretrained (str, optional): model pretrained path. Default: None + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + """ + # Parameters to build each block: + # [kernel size, mid channels, out channels, with_se, act type, stride] + arch_settings = { + 'small': [[3, 16, 16, True, 'ReLU', 2], # block0 layer1 os=4 + [3, 72, 24, False, 'ReLU', 2], # block1 layer2 os=8 + [3, 88, 24, False, 'ReLU', 1], + [5, 96, 40, True, 'HSwish', 2], # block2 layer4 os=16 + [5, 240, 40, True, 'HSwish', 1], + [5, 240, 40, True, 'HSwish', 1], + [5, 120, 48, True, 'HSwish', 1], # block3 layer7 os=16 + [5, 144, 48, True, 'HSwish', 1], + [5, 288, 96, True, 'HSwish', 2], # block4 layer9 os=32 + [5, 576, 96, True, 'HSwish', 1], + [5, 576, 96, True, 'HSwish', 1]], + 'large': [[3, 16, 16, False, 'ReLU', 1], # block0 layer1 os=2 + [3, 64, 24, False, 'ReLU', 2], # block1 layer2 os=4 + [3, 72, 24, False, 'ReLU', 1], + [5, 72, 40, True, 'ReLU', 2], # block2 layer4 os=8 + [5, 120, 40, True, 'ReLU', 1], + [5, 120, 40, True, 'ReLU', 1], + [3, 240, 80, False, 'HSwish', 2], # block3 layer7 os=16 + [3, 200, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 480, 112, True, 'HSwish', 1], # block4 layer11 os=16 + [3, 672, 112, True, 'HSwish', 1], + [5, 672, 160, True, 'HSwish', 2], # block5 layer13 os=32 + [5, 960, 160, True, 'HSwish', 1], + [5, 960, 160, True, 'HSwish', 1]] + } # yapf: disable + + def __init__(self, + arch='small', + conv_cfg=None, + norm_cfg=dict(type='BN'), + out_indices=(0, 1, 12), + frozen_stages=-1, + reduction_factor=1, + norm_eval=False, + with_cp=False, + pretrained=None, + init_cfg=None): + super().__init__(init_cfg) + + self.pretrained = pretrained + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + + assert arch in self.arch_settings + assert isinstance(reduction_factor, int) and reduction_factor > 0 + assert is_tuple_of(out_indices, int) + for index in out_indices: + if index not in range(0, len(self.arch_settings[arch]) + 2): + raise ValueError( + 'the item in out_indices must in ' + f'range(0, {len(self.arch_settings[arch])+2}). ' + f'But received {index}') + + if frozen_stages not in range(-1, len(self.arch_settings[arch]) + 2): + raise ValueError('frozen_stages must be in range(-1, ' + f'{len(self.arch_settings[arch])+2}). ' + f'But received {frozen_stages}') + self.arch = arch + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.reduction_factor = reduction_factor + self.norm_eval = norm_eval + self.with_cp = with_cp + self.layers = self._make_layer() + + def _make_layer(self): + layers = [] + + # build the first layer (layer0) + in_channels = 16 + layer = ConvModule( + in_channels=3, + out_channels=in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=dict(type='Conv2dAdaptivePadding'), + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + self.add_module('layer0', layer) + layers.append('layer0') + + layer_setting = self.arch_settings[self.arch] + for i, params in enumerate(layer_setting): + (kernel_size, mid_channels, out_channels, with_se, act, + stride) = params + + if self.arch == 'large' and i >= 12 or self.arch == 'small' and \ + i >= 8: + mid_channels = mid_channels // self.reduction_factor + out_channels = out_channels // self.reduction_factor + + if with_se: + se_cfg = dict( + channels=mid_channels, + ratio=4, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))) + else: + se_cfg = None + + layer = InvertedResidual( + in_channels=in_channels, + out_channels=out_channels, + mid_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + se_cfg=se_cfg, + with_expand_conv=(in_channels != mid_channels), + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type=act), + with_cp=self.with_cp) + in_channels = out_channels + layer_name = f'layer{i + 1}' + self.add_module(layer_name, layer) + layers.append(layer_name) + + # build the last layer + # block5 layer12 os=32 for small model + # block6 layer16 os=32 for large model + layer = ConvModule( + in_channels=in_channels, + out_channels=576 if self.arch == 'small' else 960, + kernel_size=1, + stride=1, + dilation=4, + padding=0, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + layer_name = f'layer{len(layer_setting) + 1}' + self.add_module(layer_name, layer) + layers.append(layer_name) + + # next, convert backbone MobileNetV3 to a semantic segmentation version + if self.arch == 'small': + self.layer4.depthwise_conv.conv.stride = (1, 1) + self.layer9.depthwise_conv.conv.stride = (1, 1) + for i in range(4, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 9: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + else: + self.layer7.depthwise_conv.conv.stride = (1, 1) + self.layer13.depthwise_conv.conv.stride = (1, 1) + for i in range(7, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 13: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + + return layers + + def forward(self, x): + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + return outs + + def _freeze_stages(self): + for i in range(self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super().train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/mmseg/models/backbones/mscan.py b/mmseg/models/backbones/mscan.py new file mode 100644 index 0000000000000000000000000000000000000000..7150cb7a1c13d11dcdcc6fbbc72931154853929e --- /dev/null +++ b/mmseg/models/backbones/mscan.py @@ -0,0 +1,467 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Originally from https://github.com/visual-attention-network/segnext +# Licensed under the Apache License, Version 2.0 (the "License") +import math +import warnings + +import torch +import torch.nn as nn +from mmcv.cnn import build_activation_layer, build_norm_layer +from mmcv.cnn.bricks import DropPath +from mmengine.model import BaseModule +from mmengine.model.weight_init import (constant_init, normal_init, + trunc_normal_init) + +from mmseg.registry import MODELS + + +class Mlp(BaseModule): + """Multi Layer Perceptron (MLP) Module. + + Args: + in_features (int): The dimension of input features. + hidden_features (int): The dimension of hidden features. + Defaults: None. + out_features (int): The dimension of output features. + Defaults: None. + act_cfg (dict): Config dict for activation layer in block. + Default: dict(type='GELU'). + drop (float): The number of dropout rate in MLP block. + Defaults: 0.0. + """ + + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_cfg=dict(type='GELU'), + drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Conv2d(in_features, hidden_features, 1) + self.dwconv = nn.Conv2d( + hidden_features, + hidden_features, + 3, + 1, + 1, + bias=True, + groups=hidden_features) + self.act = build_activation_layer(act_cfg) + self.fc2 = nn.Conv2d(hidden_features, out_features, 1) + self.drop = nn.Dropout(drop) + + def forward(self, x): + """Forward function.""" + + x = self.fc1(x) + + x = self.dwconv(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + + return x + + +class StemConv(BaseModule): + """Stem Block at the beginning of Semantic Branch. + + Args: + in_channels (int): The dimension of input channels. + out_channels (int): The dimension of output channels. + act_cfg (dict): Config dict for activation layer in block. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Defaults: dict(type='SyncBN', requires_grad=True). + """ + + def __init__(self, + in_channels, + out_channels, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='SyncBN', requires_grad=True)): + super().__init__() + + self.proj = nn.Sequential( + nn.Conv2d( + in_channels, + out_channels // 2, + kernel_size=(3, 3), + stride=(2, 2), + padding=(1, 1)), + build_norm_layer(norm_cfg, out_channels // 2)[1], + build_activation_layer(act_cfg), + nn.Conv2d( + out_channels // 2, + out_channels, + kernel_size=(3, 3), + stride=(2, 2), + padding=(1, 1)), + build_norm_layer(norm_cfg, out_channels)[1], + ) + + def forward(self, x): + """Forward function.""" + + x = self.proj(x) + _, _, H, W = x.size() + x = x.flatten(2).transpose(1, 2) + return x, H, W + + +class MSCAAttention(BaseModule): + """Attention Module in Multi-Scale Convolutional Attention Module (MSCA). + + Args: + channels (int): The dimension of channels. + kernel_sizes (list): The size of attention + kernel. Defaults: [5, [1, 7], [1, 11], [1, 21]]. + paddings (list): The number of + corresponding padding value in attention module. + Defaults: [2, [0, 3], [0, 5], [0, 10]]. + """ + + def __init__(self, + channels, + kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + paddings=[2, [0, 3], [0, 5], [0, 10]]): + super().__init__() + self.conv0 = nn.Conv2d( + channels, + channels, + kernel_size=kernel_sizes[0], + padding=paddings[0], + groups=channels) + for i, (kernel_size, + padding) in enumerate(zip(kernel_sizes[1:], paddings[1:])): + kernel_size_ = [kernel_size, kernel_size[::-1]] + padding_ = [padding, padding[::-1]] + conv_name = [f'conv{i}_1', f'conv{i}_2'] + for i_kernel, i_pad, i_conv in zip(kernel_size_, padding_, + conv_name): + self.add_module( + i_conv, + nn.Conv2d( + channels, + channels, + tuple(i_kernel), + padding=i_pad, + groups=channels)) + self.conv3 = nn.Conv2d(channels, channels, 1) + + def forward(self, x): + """Forward function.""" + + u = x.clone() + + attn = self.conv0(x) + + # Multi-Scale Feature extraction + attn_0 = self.conv0_1(attn) + attn_0 = self.conv0_2(attn_0) + + attn_1 = self.conv1_1(attn) + attn_1 = self.conv1_2(attn_1) + + attn_2 = self.conv2_1(attn) + attn_2 = self.conv2_2(attn_2) + + attn = attn + attn_0 + attn_1 + attn_2 + # Channel Mixing + attn = self.conv3(attn) + + # Convolutional Attention + x = attn * u + + return x + + +class MSCASpatialAttention(BaseModule): + """Spatial Attention Module in Multi-Scale Convolutional Attention Module + (MSCA). + + Args: + in_channels (int): The dimension of channels. + attention_kernel_sizes (list): The size of attention + kernel. Defaults: [5, [1, 7], [1, 11], [1, 21]]. + attention_kernel_paddings (list): The number of + corresponding padding value in attention module. + Defaults: [2, [0, 3], [0, 5], [0, 10]]. + act_cfg (dict): Config dict for activation layer in block. + Default: dict(type='GELU'). + """ + + def __init__(self, + in_channels, + attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]], + act_cfg=dict(type='GELU')): + super().__init__() + self.proj_1 = nn.Conv2d(in_channels, in_channels, 1) + self.activation = build_activation_layer(act_cfg) + self.spatial_gating_unit = MSCAAttention(in_channels, + attention_kernel_sizes, + attention_kernel_paddings) + self.proj_2 = nn.Conv2d(in_channels, in_channels, 1) + + def forward(self, x): + """Forward function.""" + + shorcut = x.clone() + x = self.proj_1(x) + x = self.activation(x) + x = self.spatial_gating_unit(x) + x = self.proj_2(x) + x = x + shorcut + return x + + +class MSCABlock(BaseModule): + """Basic Multi-Scale Convolutional Attention Block. It leverage the large- + kernel attention (LKA) mechanism to build both channel and spatial + attention. In each branch, it uses two depth-wise strip convolutions to + approximate standard depth-wise convolutions with large kernels. The kernel + size for each branch is set to 7, 11, and 21, respectively. + + Args: + channels (int): The dimension of channels. + attention_kernel_sizes (list): The size of attention + kernel. Defaults: [5, [1, 7], [1, 11], [1, 21]]. + attention_kernel_paddings (list): The number of + corresponding padding value in attention module. + Defaults: [2, [0, 3], [0, 5], [0, 10]]. + mlp_ratio (float): The ratio of multiple input dimension to + calculate hidden feature in MLP layer. Defaults: 4.0. + drop (float): The number of dropout rate in MLP block. + Defaults: 0.0. + drop_path (float): The ratio of drop paths. + Defaults: 0.0. + act_cfg (dict): Config dict for activation layer in block. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Defaults: dict(type='SyncBN', requires_grad=True). + """ + + def __init__(self, + channels, + attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]], + mlp_ratio=4., + drop=0., + drop_path=0., + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='SyncBN', requires_grad=True)): + super().__init__() + self.norm1 = build_norm_layer(norm_cfg, channels)[1] + self.attn = MSCASpatialAttention(channels, attention_kernel_sizes, + attention_kernel_paddings, act_cfg) + self.drop_path = DropPath( + drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = build_norm_layer(norm_cfg, channels)[1] + mlp_hidden_channels = int(channels * mlp_ratio) + self.mlp = Mlp( + in_features=channels, + hidden_features=mlp_hidden_channels, + act_cfg=act_cfg, + drop=drop) + layer_scale_init_value = 1e-2 + self.layer_scale_1 = nn.Parameter( + layer_scale_init_value * torch.ones(channels), requires_grad=True) + self.layer_scale_2 = nn.Parameter( + layer_scale_init_value * torch.ones(channels), requires_grad=True) + + def forward(self, x, H, W): + """Forward function.""" + + B, N, C = x.shape + x = x.permute(0, 2, 1).view(B, C, H, W) + x = x + self.drop_path( + self.layer_scale_1.unsqueeze(-1).unsqueeze(-1) * + self.attn(self.norm1(x))) + x = x + self.drop_path( + self.layer_scale_2.unsqueeze(-1).unsqueeze(-1) * + self.mlp(self.norm2(x))) + x = x.view(B, C, N).permute(0, 2, 1) + return x + + +class OverlapPatchEmbed(BaseModule): + """Image to Patch Embedding. + + Args: + patch_size (int): The patch size. + Defaults: 7. + stride (int): Stride of the convolutional layer. + Default: 4. + in_channels (int): The number of input channels. + Defaults: 3. + embed_dims (int): The dimensions of embedding. + Defaults: 768. + norm_cfg (dict): Config dict for normalization layer. + Defaults: dict(type='SyncBN', requires_grad=True). + """ + + def __init__(self, + patch_size=7, + stride=4, + in_channels=3, + embed_dim=768, + norm_cfg=dict(type='SyncBN', requires_grad=True)): + super().__init__() + + self.proj = nn.Conv2d( + in_channels, + embed_dim, + kernel_size=patch_size, + stride=stride, + padding=patch_size // 2) + self.norm = build_norm_layer(norm_cfg, embed_dim)[1] + + def forward(self, x): + """Forward function.""" + + x = self.proj(x) + _, _, H, W = x.shape + x = self.norm(x) + + x = x.flatten(2).transpose(1, 2) + + return x, H, W + + +@MODELS.register_module() +class MSCAN(BaseModule): + """SegNeXt Multi-Scale Convolutional Attention Network (MCSAN) backbone. + + This backbone is the implementation of `SegNeXt: Rethinking + Convolutional Attention Design for Semantic + Segmentation `_. + Inspiration from https://github.com/visual-attention-network/segnext. + + Args: + in_channels (int): The number of input channels. Defaults: 3. + embed_dims (list[int]): Embedding dimension. + Defaults: [64, 128, 256, 512]. + mlp_ratios (list[int]): Ratio of mlp hidden dim to embedding dim. + Defaults: [4, 4, 4, 4]. + drop_rate (float): Dropout rate. Defaults: 0. + drop_path_rate (float): Stochastic depth rate. Defaults: 0. + depths (list[int]): Depths of each Swin Transformer stage. + Default: [3, 4, 6, 3]. + num_stages (int): MSCAN stages. Default: 4. + attention_kernel_sizes (list): Size of attention kernel in + Attention Module (Figure 2(b) of original paper). + Defaults: [5, [1, 7], [1, 11], [1, 21]]. + attention_kernel_paddings (list): Size of attention paddings + in Attention Module (Figure 2(b) of original paper). + Defaults: [2, [0, 3], [0, 5], [0, 10]]. + norm_cfg (dict): Config of norm layers. + Defaults: dict(type='SyncBN', requires_grad=True). + pretrained (str, optional): model pretrained path. + Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=3, + embed_dims=[64, 128, 256, 512], + mlp_ratios=[4, 4, 4, 4], + drop_rate=0., + drop_path_rate=0., + depths=[3, 4, 6, 3], + num_stages=4, + attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]], + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='SyncBN', requires_grad=True), + pretrained=None, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + + self.depths = depths + self.num_stages = num_stages + + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + cur = 0 + + for i in range(num_stages): + if i == 0: + patch_embed = StemConv(3, embed_dims[0], norm_cfg=norm_cfg) + else: + patch_embed = OverlapPatchEmbed( + patch_size=7 if i == 0 else 3, + stride=4 if i == 0 else 2, + in_channels=in_channels if i == 0 else embed_dims[i - 1], + embed_dim=embed_dims[i], + norm_cfg=norm_cfg) + + block = nn.ModuleList([ + MSCABlock( + channels=embed_dims[i], + attention_kernel_sizes=attention_kernel_sizes, + attention_kernel_paddings=attention_kernel_paddings, + mlp_ratio=mlp_ratios[i], + drop=drop_rate, + drop_path=dpr[cur + j], + act_cfg=act_cfg, + norm_cfg=norm_cfg) for j in range(depths[i]) + ]) + norm = nn.LayerNorm(embed_dims[i]) + cur += depths[i] + + setattr(self, f'patch_embed{i + 1}', patch_embed) + setattr(self, f'block{i + 1}', block) + setattr(self, f'norm{i + 1}', norm) + + def init_weights(self): + """Initialize modules of MSCAN.""" + + print('init cfg', self.init_cfg) + if self.init_cfg is None: + for m in self.modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=.02, bias=0.) + elif isinstance(m, nn.LayerNorm): + constant_init(m, val=1.0, bias=0.) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[ + 1] * m.out_channels + fan_out //= m.groups + normal_init( + m, mean=0, std=math.sqrt(2.0 / fan_out), bias=0) + else: + super().init_weights() + + def forward(self, x): + """Forward function.""" + + B = x.shape[0] + outs = [] + + for i in range(self.num_stages): + patch_embed = getattr(self, f'patch_embed{i + 1}') + block = getattr(self, f'block{i + 1}') + norm = getattr(self, f'norm{i + 1}') + x, H, W = patch_embed(x) + for blk in block: + x = blk(x, H, W) + x = norm(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + return outs diff --git a/mmseg/models/backbones/pidnet.py b/mmseg/models/backbones/pidnet.py new file mode 100644 index 0000000000000000000000000000000000000000..0b711a373701c0771c5c5997bbb8e5b345d70924 --- /dev/null +++ b/mmseg/models/backbones/pidnet.py @@ -0,0 +1,522 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule +from mmengine.runner import CheckpointLoader +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.utils import OptConfigType +from ..utils import DAPPM, PAPPM, BasicBlock, Bottleneck + + +class PagFM(BaseModule): + """Pixel-attention-guided fusion module. + + Args: + in_channels (int): The number of input channels. + channels (int): The number of channels. + after_relu (bool): Whether to use ReLU before attention. + Default: False. + with_channel (bool): Whether to use channel attention. + Default: False. + upsample_mode (str): The mode of upsample. Default: 'bilinear'. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(typ='ReLU', inplace=True). + init_cfg (dict): Config dict for initialization. Default: None. + """ + + def __init__(self, + in_channels: int, + channels: int, + after_relu: bool = False, + with_channel: bool = False, + upsample_mode: str = 'bilinear', + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(typ='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.after_relu = after_relu + self.with_channel = with_channel + self.upsample_mode = upsample_mode + self.f_i = ConvModule( + in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=None) + self.f_p = ConvModule( + in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=None) + if with_channel: + self.up = ConvModule( + channels, in_channels, 1, norm_cfg=norm_cfg, act_cfg=None) + if after_relu: + self.relu = MODELS.build(act_cfg) + + def forward(self, x_p: Tensor, x_i: Tensor) -> Tensor: + """Forward function. + + Args: + x_p (Tensor): The featrue map from P branch. + x_i (Tensor): The featrue map from I branch. + + Returns: + Tensor: The feature map with pixel-attention-guided fusion. + """ + if self.after_relu: + x_p = self.relu(x_p) + x_i = self.relu(x_i) + + f_i = self.f_i(x_i) + f_i = F.interpolate( + f_i, + size=x_p.shape[2:], + mode=self.upsample_mode, + align_corners=False) + + f_p = self.f_p(x_p) + + if self.with_channel: + sigma = torch.sigmoid(self.up(f_p * f_i)) + else: + sigma = torch.sigmoid(torch.sum(f_p * f_i, dim=1).unsqueeze(1)) + + x_i = F.interpolate( + x_i, + size=x_p.shape[2:], + mode=self.upsample_mode, + align_corners=False) + + out = sigma * x_i + (1 - sigma) * x_p + return out + + +class Bag(BaseModule): + """Boundary-attention-guided fusion module. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + kernel_size (int): The kernel size of the convolution. Default: 3. + padding (int): The padding of the convolution. Default: 1. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer. + Default: dict(order=('norm', 'act', 'conv')). + init_cfg (dict): Config dict for initialization. Default: None. + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + padding: int = 1, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + conv_cfg: OptConfigType = dict(order=('norm', 'act', 'conv')), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.conv = ConvModule( + in_channels, + out_channels, + kernel_size, + padding=padding, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + def forward(self, x_p: Tensor, x_i: Tensor, x_d: Tensor) -> Tensor: + """Forward function. + + Args: + x_p (Tensor): The featrue map from P branch. + x_i (Tensor): The featrue map from I branch. + x_d (Tensor): The featrue map from D branch. + + Returns: + Tensor: The feature map with boundary-attention-guided fusion. + """ + sigma = torch.sigmoid(x_d) + return self.conv(sigma * x_p + (1 - sigma) * x_i) + + +class LightBag(BaseModule): + """Light Boundary-attention-guided fusion module. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. Default: None. + init_cfg (dict): Config dict for initialization. Default: None. + """ + + def __init__(self, + in_channels: int, + out_channels: int, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = None, + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.f_p = ConvModule( + in_channels, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.f_i = ConvModule( + in_channels, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x_p: Tensor, x_i: Tensor, x_d: Tensor) -> Tensor: + """Forward function. + Args: + x_p (Tensor): The featrue map from P branch. + x_i (Tensor): The featrue map from I branch. + x_d (Tensor): The featrue map from D branch. + + Returns: + Tensor: The feature map with light boundary-attention-guided + fusion. + """ + sigma = torch.sigmoid(x_d) + + f_p = self.f_p((1 - sigma) * x_i + x_p) + f_i = self.f_i(x_i + sigma * x_p) + + return f_p + f_i + + +@MODELS.register_module() +class PIDNet(BaseModule): + """PIDNet backbone. + + This backbone is the implementation of `PIDNet: A Real-time Semantic + Segmentation Network Inspired from PID Controller + `_. + Modified from https://github.com/XuJiacong/PIDNet. + + Licensed under the MIT License. + + Args: + in_channels (int): The number of input channels. Default: 3. + channels (int): The number of channels in the stem layer. Default: 64. + ppm_channels (int): The number of channels in the PPM layer. + Default: 96. + num_stem_blocks (int): The number of blocks in the stem layer. + Default: 2. + num_branch_blocks (int): The number of blocks in the branch layer. + Default: 3. + align_corners (bool): The align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict): Config dict for initialization. Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 64, + ppm_channels: int = 96, + num_stem_blocks: int = 2, + num_branch_blocks: int = 3, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None, + **kwargs): + super().__init__(init_cfg) + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stem layer + self.stem = self._make_stem_layer(in_channels, channels, + num_stem_blocks) + self.relu = nn.ReLU() + + # I Branch + self.i_branch_layers = nn.ModuleList() + for i in range(3): + self.i_branch_layers.append( + self._make_layer( + block=BasicBlock if i < 2 else Bottleneck, + in_channels=channels * 2**(i + 1), + channels=channels * 8 if i > 0 else channels * 4, + num_blocks=num_branch_blocks if i < 2 else 2, + stride=2)) + + # P Branch + self.p_branch_layers = nn.ModuleList() + for i in range(3): + self.p_branch_layers.append( + self._make_layer( + block=BasicBlock if i < 2 else Bottleneck, + in_channels=channels * 2, + channels=channels * 2, + num_blocks=num_stem_blocks if i < 2 else 1)) + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None) + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None) + self.pag_1 = PagFM(channels * 2, channels) + self.pag_2 = PagFM(channels * 2, channels) + + # D Branch + if num_stem_blocks == 2: + self.d_branch_layers = nn.ModuleList([ + self._make_single_layer(BasicBlock, channels * 2, channels), + self._make_layer(Bottleneck, channels, channels, 1) + ]) + channel_expand = 1 + spp_module = PAPPM + dfm_module = LightBag + act_cfg_dfm = None + else: + self.d_branch_layers = nn.ModuleList([ + self._make_single_layer(BasicBlock, channels * 2, + channels * 2), + self._make_single_layer(BasicBlock, channels * 2, channels * 2) + ]) + channel_expand = 2 + spp_module = DAPPM + dfm_module = Bag + act_cfg_dfm = act_cfg + + self.diff_1 = ConvModule( + channels * 4, + channels * channel_expand, + kernel_size=3, + padding=1, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None) + self.diff_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=3, + padding=1, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None) + + self.spp = spp_module( + channels * 16, ppm_channels, channels * 4, num_scales=5) + self.dfm = dfm_module( + channels * 4, channels * 4, norm_cfg=norm_cfg, act_cfg=act_cfg_dfm) + + self.d_branch_layers.append( + self._make_layer(Bottleneck, channels * 2, channels * 2, 1)) + + def _make_stem_layer(self, in_channels: int, channels: int, + num_blocks: int) -> nn.Sequential: + """Make stem layer. + + Args: + in_channels (int): Number of input channels. + channels (int): Number of output channels. + num_blocks (int): Number of blocks. + + Returns: + nn.Sequential: The stem layer. + """ + + layers = [ + ConvModule( + in_channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + ] + + layers.append( + self._make_layer(BasicBlock, channels, channels, num_blocks)) + layers.append(nn.ReLU()) + layers.append( + self._make_layer( + BasicBlock, channels, channels * 2, num_blocks, stride=2)) + layers.append(nn.ReLU()) + + return nn.Sequential(*layers) + + def _make_layer(self, + block: BasicBlock, + in_channels: int, + channels: int, + num_blocks: int, + stride: int = 1) -> nn.Sequential: + """Make layer for PIDNet backbone. + Args: + block (BasicBlock): Basic block. + in_channels (int): Number of input channels. + channels (int): Number of output channels. + num_blocks (int): Number of blocks. + stride (int): Stride of the first block. Default: 1. + + Returns: + nn.Sequential: The Branch Layer. + """ + downsample = None + if stride != 1 or in_channels != channels * block.expansion: + downsample = ConvModule( + in_channels, + channels * block.expansion, + kernel_size=1, + stride=stride, + norm_cfg=self.norm_cfg, + act_cfg=None) + + layers = [block(in_channels, channels, stride, downsample)] + in_channels = channels * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels, + channels, + stride=1, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + return nn.Sequential(*layers) + + def _make_single_layer(self, + block: Union[BasicBlock, Bottleneck], + in_channels: int, + channels: int, + stride: int = 1) -> nn.Module: + """Make single layer for PIDNet backbone. + Args: + block (BasicBlock or Bottleneck): Basic block or Bottleneck. + in_channels (int): Number of input channels. + channels (int): Number of output channels. + stride (int): Stride of the first block. Default: 1. + + Returns: + nn.Module + """ + + downsample = None + if stride != 1 or in_channels != channels * block.expansion: + downsample = ConvModule( + in_channels, + channels * block.expansion, + kernel_size=1, + stride=stride, + norm_cfg=self.norm_cfg, + act_cfg=None) + return block( + in_channels, channels, stride, downsample, act_cfg_out=None) + + def init_weights(self): + """Initialize the weights in backbone. + + Since the D branch is not initialized by the pre-trained model, we + initialize it with the same method as the ResNet. + """ + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + if self.init_cfg is not None: + assert 'checkpoint' in self.init_cfg, f'Only support ' \ + f'specify `Pretrained` in ' \ + f'`init_cfg` in ' \ + f'{self.__class__.__name__} ' + ckpt = CheckpointLoader.load_checkpoint( + self.init_cfg['checkpoint'], map_location='cpu') + self.load_state_dict(ckpt, strict=False) + + def forward(self, x: Tensor) -> Union[Tensor, Tuple[Tensor]]: + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tensor or tuple[Tensor]: If self.training is True, return + tuple[Tensor], else return Tensor. + """ + w_out = x.shape[-1] // 8 + h_out = x.shape[-2] // 8 + + # stage 0-2 + x = self.stem(x) + + # stage 3 + x_i = self.relu(self.i_branch_layers[0](x)) + x_p = self.p_branch_layers[0](x) + x_d = self.d_branch_layers[0](x) + + comp_i = self.compression_1(x_i) + x_p = self.pag_1(x_p, comp_i) + diff_i = self.diff_1(x_i) + x_d += F.interpolate( + diff_i, + size=[h_out, w_out], + mode='bilinear', + align_corners=self.align_corners) + if self.training: + temp_p = x_p.clone() + + # stage 4 + x_i = self.relu(self.i_branch_layers[1](x_i)) + x_p = self.p_branch_layers[1](self.relu(x_p)) + x_d = self.d_branch_layers[1](self.relu(x_d)) + + comp_i = self.compression_2(x_i) + x_p = self.pag_2(x_p, comp_i) + diff_i = self.diff_2(x_i) + x_d += F.interpolate( + diff_i, + size=[h_out, w_out], + mode='bilinear', + align_corners=self.align_corners) + if self.training: + temp_d = x_d.clone() + + # stage 5 + x_i = self.i_branch_layers[2](x_i) + x_p = self.p_branch_layers[2](self.relu(x_p)) + x_d = self.d_branch_layers[2](self.relu(x_d)) + + x_i = self.spp(x_i) + x_i = F.interpolate( + x_i, + size=[h_out, w_out], + mode='bilinear', + align_corners=self.align_corners) + out = self.dfm(x_p, x_i, x_d) + return (temp_p, out, temp_d) if self.training else out diff --git a/mmseg/models/backbones/resnest.py b/mmseg/models/backbones/resnest.py new file mode 100644 index 0000000000000000000000000000000000000000..3cc380b4460915f476ffc1febcfc145a94fc7c7a --- /dev/null +++ b/mmseg/models/backbones/resnest.py @@ -0,0 +1,318 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from mmcv.cnn import build_conv_layer, build_norm_layer + +from mmseg.registry import MODELS +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNetV1d + + +class RSoftmax(nn.Module): + """Radix Softmax module in ``SplitAttentionConv2d``. + + Args: + radix (int): Radix of input. + groups (int): Groups of input. + """ + + def __init__(self, radix, groups): + super().__init__() + self.radix = radix + self.groups = groups + + def forward(self, x): + batch = x.size(0) + if self.radix > 1: + x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2) + x = F.softmax(x, dim=1) + x = x.reshape(batch, -1) + else: + x = torch.sigmoid(x) + return x + + +class SplitAttentionConv2d(nn.Module): + """Split-Attention Conv2d in ResNeSt. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int | tuple[int]): Same as nn.Conv2d. + stride (int | tuple[int]): Same as nn.Conv2d. + padding (int | tuple[int]): Same as nn.Conv2d. + dilation (int | tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels. Default: 4. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + dcn (dict): Config dict for DCN. Default: None. + """ + + def __init__(self, + in_channels, + channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + radix=2, + reduction_factor=4, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None): + super().__init__() + inter_channels = max(in_channels * radix // reduction_factor, 32) + self.radix = radix + self.groups = groups + self.channels = channels + self.with_dcn = dcn is not None + self.dcn = dcn + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if self.with_dcn and not fallback_on_stride: + assert conv_cfg is None, 'conv_cfg must be None for DCN' + conv_cfg = dcn + self.conv = build_conv_layer( + conv_cfg, + in_channels, + channels * radix, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups * radix, + bias=False) + self.norm0_name, norm0 = build_norm_layer( + norm_cfg, channels * radix, postfix=0) + self.add_module(self.norm0_name, norm0) + self.relu = nn.ReLU(inplace=True) + self.fc1 = build_conv_layer( + None, channels, inter_channels, 1, groups=self.groups) + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, inter_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.fc2 = build_conv_layer( + None, inter_channels, channels * radix, 1, groups=self.groups) + self.rsoftmax = RSoftmax(radix, groups) + + @property + def norm0(self): + """nn.Module: the normalization layer named "norm0" """ + return getattr(self, self.norm0_name) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def forward(self, x): + x = self.conv(x) + x = self.norm0(x) + x = self.relu(x) + + batch, rchannel = x.shape[:2] + batch = x.size(0) + if self.radix > 1: + splits = x.view(batch, self.radix, -1, *x.shape[2:]) + gap = splits.sum(dim=1) + else: + gap = x + gap = F.adaptive_avg_pool2d(gap, 1) + gap = self.fc1(gap) + + gap = self.norm1(gap) + gap = self.relu(gap) + + atten = self.fc2(gap) + atten = self.rsoftmax(atten).view(batch, -1, 1, 1) + + if self.radix > 1: + attens = atten.view(batch, self.radix, -1, *atten.shape[2:]) + out = torch.sum(attens * splits, dim=1) + else: + out = atten * x + return out.contiguous() + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeSt. + + Args: + inplane (int): Input planes of this block. + planes (int): Middle planes of this block. + groups (int): Groups of conv2. + width_per_group (int): Width per group of conv2. 64x4d indicates + ``groups=64, width_per_group=4`` and 32x8d indicates + ``groups=32, width_per_group=8``. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Key word arguments for base class. + """ + expansion = 4 + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + """Bottleneck block for ResNeSt.""" + super().__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.avg_down_stride = avg_down_stride and self.conv2_stride > 1 + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + self.with_modulated_dcn = False + self.conv2 = SplitAttentionConv2d( + width, + width, + kernel_size=3, + stride=1 if self.avg_down_stride else self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + radix=radix, + reduction_factor=reduction_factor, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + dcn=self.dcn) + delattr(self, self.norm2_name) + + if self.avg_down_stride: + self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) + + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + def forward(self, x): + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + + if self.avg_down_stride: + out = self.avd_layer(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@MODELS.register_module() +class ResNeSt(ResNetV1d): + """ResNeSt backbone. + + This backbone is the implementation of `ResNeSt: + Split-Attention Networks `_. + + Args: + groups (int): Number of groups of Bottleneck. Default: 1 + base_width (int): Base width of Bottleneck. Default: 4 + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Keyword arguments for ResNet. + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)), + 200: (Bottleneck, (3, 24, 36, 3)) + } + + def __init__(self, + groups=1, + base_width=4, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + self.groups = groups + self.base_width = base_width + self.radix = radix + self.reduction_factor = reduction_factor + self.avg_down_stride = avg_down_stride + super().__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + radix=self.radix, + reduction_factor=self.reduction_factor, + avg_down_stride=self.avg_down_stride, + **kwargs) diff --git a/mmseg/models/backbones/resnet.py b/mmseg/models/backbones/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..9226c90d85c938e76f322e58643ee9d7b17ba27b --- /dev/null +++ b/mmseg/models/backbones/resnet.py @@ -0,0 +1,712 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import build_conv_layer, build_norm_layer, build_plugin_layer +from mmengine.model import BaseModule +from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm + +from mmseg.registry import MODELS +from ..utils import ResLayer + + +class BasicBlock(BaseModule): + """Basic block for ResNet.""" + + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + init_cfg=None): + super().__init__(init_cfg) + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + 3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + conv_cfg, planes, planes, 3, padding=1, bias=False) + self.add_module(self.norm2_name, norm2) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +class Bottleneck(BaseModule): + """Bottleneck block for ResNet. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + init_cfg=None): + super().__init__(init_cfg) + assert style in ['pytorch', 'caffe'] + assert dcn is None or isinstance(dcn, dict) + assert plugins is None or isinstance(plugins, list) + if plugins is not None: + allowed_position = ['after_conv1', 'after_conv2', 'after_conv3'] + assert all(p['position'] in allowed_position for p in plugins) + + self.inplanes = inplanes + self.planes = planes + self.stride = stride + self.dilation = dilation + self.style = style + self.with_cp = with_cp + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.dcn = dcn + self.with_dcn = dcn is not None + self.plugins = plugins + self.with_plugins = plugins is not None + + if self.with_plugins: + # collect plugins for conv1/conv2/conv3 + self.after_conv1_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv1' + ] + self.after_conv2_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv2' + ] + self.after_conv3_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv3' + ] + + if self.style == 'pytorch': + self.conv1_stride = 1 + self.conv2_stride = stride + else: + self.conv1_stride = stride + self.conv2_stride = 1 + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + norm_cfg, planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + conv_cfg, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + dcn, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + conv_cfg, + planes, + planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + + if self.with_plugins: + self.after_conv1_plugin_names = self.make_block_plugins( + planes, self.after_conv1_plugins) + self.after_conv2_plugin_names = self.make_block_plugins( + planes, self.after_conv2_plugins) + self.after_conv3_plugin_names = self.make_block_plugins( + planes * self.expansion, self.after_conv3_plugins) + + def make_block_plugins(self, in_channels, plugins): + """make plugins for block. + + Args: + in_channels (int): Input channels of plugin. + plugins (list[dict]): List of plugins cfg to build. + + Returns: + list[str]: List of the names of plugin. + """ + assert isinstance(plugins, list) + plugin_names = [] + for plugin in plugins: + plugin = plugin.copy() + name, layer = build_plugin_layer( + plugin, + in_channels=in_channels, + postfix=plugin.pop('postfix', '')) + assert not hasattr(self, name), f'duplicate plugin {name}' + self.add_module(name, layer) + plugin_names.append(name) + return plugin_names + + def forward_plugin(self, x, plugin_names): + """Forward function for plugins.""" + out = x + for name in plugin_names: + out = getattr(self, name)(x) + return out + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + @property + def norm3(self): + """nn.Module: normalization layer after the third convolution layer""" + return getattr(self, self.norm3_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + out = self.norm2(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@MODELS.register_module() +class ResNet(BaseModule): + """ResNet backbone. + + This backbone is the improved implementation of `Deep Residual Learning + for Image Recognition `_. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Default: 3. + stem_channels (int): Number of stem channels. Default: 64. + base_channels (int): Number of base channels of res layer. Default: 64. + num_stages (int): Resnet stages, normally 4. Default: 4. + strides (Sequence[int]): Strides of the first block of each stage. + Default: (1, 2, 2, 2). + dilations (Sequence[int]): Dilation of each stage. + Default: (1, 1, 1, 1). + out_indices (Sequence[int]): Output from which stages. + Default: (0, 1, 2, 3). + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. Default: 'pytorch'. + deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv. + Default: False. + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. Default: False. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. Default: -1. + conv_cfg (dict | None): Dictionary to construct and config conv layer. + When conv_cfg is None, cfg will be set to dict(type='Conv2d'). + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN', requires_grad=True). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + dcn (dict | None): Dictionary to construct and config DCN conv layer. + When dcn is not None, conv_cfg must be None. Default: None. + stage_with_dcn (Sequence[bool]): Whether to set DCN conv for each + stage. The length of stage_with_dcn is equal to num_stages. + Default: (False, False, False, False). + plugins (list[dict]): List of plugins for stages, each dict contains: + + - cfg (dict, required): Cfg dict to build plugin. + + - position (str, required): Position inside block to insert plugin, + options: 'after_conv1', 'after_conv2', 'after_conv3'. + + - stages (tuple[bool], optional): Stages to apply plugin, length + should be same as 'num_stages'. + Default: None. + multi_grid (Sequence[int]|None): Multi grid dilation rates of last + stage. Default: None. + contract_dilation (bool): Whether contract first dilation of each layer + Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + zero_init_residual (bool): Whether to use zero init for last norm layer + in resblocks to let them behave as identity. Default: True. + pretrained (str, optional): model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Example: + >>> from mmseg.models import ResNet + >>> import torch + >>> self = ResNet(depth=18) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 64, 8, 8) + (1, 128, 4, 4) + (1, 256, 2, 2) + (1, 512, 1, 1) + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + in_channels=3, + stem_channels=64, + base_channels=64, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + deep_stem=False, + avg_down=False, + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + dcn=None, + stage_with_dcn=(False, False, False, False), + plugins=None, + multi_grid=None, + contract_dilation=False, + with_cp=False, + zero_init_residual=True, + pretrained=None, + init_cfg=None): + super().__init__(init_cfg) + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + + self.pretrained = pretrained + self.zero_init_residual = zero_init_residual + block_init_cfg = None + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + block = self.arch_settings[depth][0] + if self.zero_init_residual: + if block is BasicBlock: + block_init_cfg = dict( + type='Constant', + val=0, + override=dict(name='norm2')) + elif block is Bottleneck: + block_init_cfg = dict( + type='Constant', + val=0, + override=dict(name='norm3')) + else: + raise TypeError('pretrained must be a str or None') + + self.depth = depth + self.stem_channels = stem_channels + self.base_channels = base_channels + self.num_stages = num_stages + assert num_stages >= 1 and num_stages <= 4 + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == num_stages + self.out_indices = out_indices + assert max(out_indices) < num_stages + self.style = style + self.deep_stem = deep_stem + self.avg_down = avg_down + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.with_cp = with_cp + self.norm_eval = norm_eval + self.dcn = dcn + self.stage_with_dcn = stage_with_dcn + if dcn is not None: + assert len(stage_with_dcn) == num_stages + self.plugins = plugins + self.multi_grid = multi_grid + self.contract_dilation = contract_dilation + self.block, stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + self.inplanes = stem_channels + + self._make_stem_layer(in_channels, stem_channels) + + self.res_layers = [] + for i, num_blocks in enumerate(self.stage_blocks): + stride = strides[i] + dilation = dilations[i] + dcn = self.dcn if self.stage_with_dcn[i] else None + if plugins is not None: + stage_plugins = self.make_stage_plugins(plugins, i) + else: + stage_plugins = None + # multi grid is applied to last layer only + stage_multi_grid = multi_grid if i == len( + self.stage_blocks) - 1 else None + planes = base_channels * 2**i + res_layer = self.make_res_layer( + block=self.block, + inplanes=self.inplanes, + planes=planes, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + avg_down=self.avg_down, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + dcn=dcn, + plugins=stage_plugins, + multi_grid=stage_multi_grid, + contract_dilation=contract_dilation, + init_cfg=block_init_cfg) + self.inplanes = planes * self.block.expansion + layer_name = f'layer{i+1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self._freeze_stages() + + self.feat_dim = self.block.expansion * base_channels * 2**( + len(self.stage_blocks) - 1) + + def make_stage_plugins(self, plugins, stage_idx): + """make plugins for ResNet 'stage_idx'th stage . + + Currently we support to insert 'context_block', + 'empirical_attention_block', 'nonlocal_block' into the backbone like + ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of + Bottleneck. + + An example of plugins format could be : + >>> plugins=[ + ... dict(cfg=dict(type='xxx', arg1='xxx'), + ... stages=(False, True, True, True), + ... position='after_conv2'), + ... dict(cfg=dict(type='yyy'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='1'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='2'), + ... stages=(True, True, True, True), + ... position='after_conv3') + ... ] + >>> self = ResNet(depth=18) + >>> stage_plugins = self.make_stage_plugins(plugins, 0) + >>> assert len(stage_plugins) == 3 + + Suppose 'stage_idx=0', the structure of blocks in the stage would be: + conv1-> conv2->conv3->yyy->zzz1->zzz2 + Suppose 'stage_idx=1', the structure of blocks in the stage would be: + conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2 + + If stages is missing, the plugin would be applied to all stages. + + Args: + plugins (list[dict]): List of plugins cfg to build. The postfix is + required if multiple same type plugins are inserted. + stage_idx (int): Index of stage to build + + Returns: + list[dict]: Plugins for current stage + """ + stage_plugins = [] + for plugin in plugins: + plugin = plugin.copy() + stages = plugin.pop('stages', None) + assert stages is None or len(stages) == self.num_stages + # whether to insert plugin into current stage + if stages is None or stages[stage_idx]: + stage_plugins.append(plugin) + + return stage_plugins + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer(**kwargs) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def _make_stem_layer(self, in_channels, stem_channels): + """Make stem layer for ResNet.""" + if self.deep_stem: + self.stem = nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels // 2, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels // 2, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels)[1], + nn.ReLU(inplace=True)) + else: + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False) + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, stem_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + def _freeze_stages(self): + """Freeze stages param and norm stats.""" + if self.frozen_stages >= 0: + if self.deep_stem: + self.stem.eval() + for param in self.stem.parameters(): + param.requires_grad = False + else: + self.norm1.eval() + for m in [self.conv1, self.norm1]: + for param in m.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + m = getattr(self, f'layer{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def forward(self, x): + """Forward function.""" + if self.deep_stem: + x = self.stem(x) + else: + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + return tuple(outs) + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super().train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + +@MODELS.register_module() +class ResNetV1c(ResNet): + """ResNetV1c variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv in + the input stem with three 3x3 convs. For more details please refer to `Bag + of Tricks for Image Classification with Convolutional Neural Networks + `_. + """ + + def __init__(self, **kwargs): + super().__init__(deep_stem=True, avg_down=False, **kwargs) + + +@MODELS.register_module() +class ResNetV1d(ResNet): + """ResNetV1d variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in + the input stem with three 3x3 convs. And in the downsampling block, a 2x2 + avg_pool with stride 2 is added before conv, whose stride is changed to 1. + """ + + def __init__(self, **kwargs): + super().__init__(deep_stem=True, avg_down=True, **kwargs) diff --git a/mmseg/models/backbones/resnext.py b/mmseg/models/backbones/resnext.py new file mode 100644 index 0000000000000000000000000000000000000000..67a244a12f61b78ee12e89e8b45868781208614c --- /dev/null +++ b/mmseg/models/backbones/resnext.py @@ -0,0 +1,150 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +from mmcv.cnn import build_conv_layer, build_norm_layer + +from mmseg.registry import MODELS +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNet + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeXt. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + **kwargs): + super().__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm2_name, norm2 = build_norm_layer( + self.norm_cfg, width, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + self.with_modulated_dcn = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + self.conv_cfg, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + self.dcn, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + +@MODELS.register_module() +class ResNeXt(ResNet): + """ResNeXt backbone. + + This backbone is the implementation of `Aggregated + Residual Transformations for Deep Neural + Networks `_. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Normally 3. + num_stages (int): Resnet stages, normally 4. + groups (int): Group of resnext. + base_width (int): Base width of resnext. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from mmseg.models import ResNeXt + >>> import torch + >>> self = ResNeXt(depth=50) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 256, 8, 8) + (1, 512, 4, 4) + (1, 1024, 2, 2) + (1, 2048, 1, 1) + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, groups=1, base_width=4, **kwargs): + self.groups = groups + self.base_width = base_width + super().__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + **kwargs) diff --git a/mmseg/models/backbones/snnet.py b/mmseg/models/backbones/snnet.py new file mode 100644 index 0000000000000000000000000000000000000000..7c92af665a7f5caff4c401f8e978b22ebc93053d --- /dev/null +++ b/mmseg/models/backbones/snnet.py @@ -0,0 +1,521 @@ +# Copyright (c) OpenMMLab. All rights reserved.import math +import json +import math +import torch +import torch.nn as nn +from mmengine.model.weight_init import (constant_init, kaiming_init, + trunc_normal_) +from mmengine.model import ModuleList +from mmengine.runner.checkpoint import _load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm + +from ..builder import BACKBONES +from .mae import MAE +from mmengine.model import BaseModule +import numpy as np +from .lora import wrap_model_with_lora, Linear + +def rearrange_activations(activations): + n_channels = activations.shape[-1] + activations = activations.reshape(-1, n_channels) + return activations + +def ps_inv(x1, x2): + '''Least-squares solver given feature maps from two anchors. + ''' + x1 = rearrange_activations(x1) + x2 = rearrange_activations(x2) + + if not x1.shape[0] == x2.shape[0]: + raise ValueError('Spatial size of compared neurons must match when ' \ + 'calculating psuedo inverse matrix.') + + # Get transformation matrix shape + shape = list(x1.shape) + shape[-1] += 1 + + # Calculate pseudo inverse + x1_ones = torch.ones(shape) + x1_ones[:, :-1] = x1 + A_ones = torch.matmul(torch.linalg.pinv(x1_ones), x2.to(x1_ones.device)).T + + # Get weights and bias + w = A_ones[..., :-1] + b = A_ones[..., -1] + + return w, b + +def reset_out_indices(front_depth=12, end_depth=24, out_indices=(9, 14, 19, 23)): + block_ids = torch.tensor(list(range(front_depth))) + block_ids = block_ids[None, None, :].float() + end_mapping_ids = torch.nn.functional.interpolate(block_ids, end_depth) + end_mapping_ids = end_mapping_ids.squeeze().long().tolist() + + small_out_indices = [] + for i, idx in enumerate(end_mapping_ids): + if i in out_indices: + small_out_indices.append(idx) + + return small_out_indices + + +def get_stitch_configs_general_unequal(depths): + depths = sorted(depths) + + total_configs = [] + + # anchor configurations + total_configs.append({'comb_id': [0], }) + total_configs.append({'comb_id': [1], }) + + num_stitches = depths[0] + for i, blk_id in enumerate(range(num_stitches)): + if i == depths[0] - 1: + break + total_configs.append({ + 'comb_id': (0, 1), + 'stitch_cfgs': (i, (i + 1) * (depths[1]//depths[0])) + }) + return total_configs, num_stitches + + +def get_stitch_configs_bidirection(depths): + depths = sorted(depths) + + total_configs = [] + + # anchor configurations + total_configs.append({'comb_id': [0], }) + total_configs.append({'comb_id': [1], }) + + num_stitches = depths[0] + + # small --> large + sl_configs = [] + for i, blk_id in enumerate(range(num_stitches)): + sl_configs.append({ + 'comb_id': [0, 1], + 'stitch_cfgs': [ + [i, (i + 1) * (depths[1] // depths[0])] + ], + 'stitch_layer_ids': [i] + }) + + ls_configs = [] + lsl_confgs = [] + block_ids = torch.tensor(list(range(depths[0]))) + block_ids = block_ids[None, None, :].float() + end_mapping_ids = torch.nn.functional.interpolate(block_ids, depths[1]) + end_mapping_ids = end_mapping_ids.squeeze().long().tolist() + + # large --> small + for i in range(depths[1]): + if depths[1] != depths[0]: + if i % 2 == 1 and i < (depths[1] - 1): + ls_configs.append({ + 'comb_id': [1, 0], + 'stitch_cfgs': [[i, end_mapping_ids[i] + 1]], + 'stitch_layer_ids': [i // (depths[1] // depths[0])] + }) + else: + if i < (depths[1] - 1): + ls_configs.append({ + 'comb_id': [1, 0], + 'stitch_cfgs': [[i, end_mapping_ids[i] + 1]], + 'stitch_layer_ids': [i // (depths[1] // depths[0])] + }) + + # large --> small --> large + for ls_cfg in ls_configs: + for sl_cfg in sl_configs: + if sl_cfg['stitch_layer_ids'][0] == depths[0] - 1: + continue + if sl_cfg['stitch_cfgs'][0][0] >= ls_cfg['stitch_cfgs'][0][1]: + lsl_confgs.append({ + 'comb_id': [1, 0, 1], + 'stitch_cfgs': [ls_cfg['stitch_cfgs'][0], sl_cfg['stitch_cfgs'][0]], + 'stitch_layer_ids': ls_cfg['stitch_layer_ids'] + sl_cfg['stitch_layer_ids'] + }) + + # small --> large --> small + sls_configs = [] + for sl_cfg in sl_configs: + for ls_cfg in ls_configs: + if ls_cfg['stitch_cfgs'][0][0] >= sl_cfg['stitch_cfgs'][0][1]: + sls_configs.append({ + 'comb_id': [0, 1, 0], + 'stitch_cfgs': [sl_cfg['stitch_cfgs'][0], ls_cfg['stitch_cfgs'][0]], + 'stitch_layer_ids': sl_cfg['stitch_layer_ids'] + ls_cfg['stitch_layer_ids'] + }) + + total_configs += sl_configs + ls_configs + lsl_confgs + sls_configs + + anchor_ids = [] + sl_ids = [] + ls_ids = [] + lsl_ids = [] + sls_ids = [] + + for i, cfg in enumerate(total_configs): + comb_id = cfg['comb_id'] + + if len(comb_id) == 1: + anchor_ids.append(i) + continue + + if len(comb_id) == 2: + route = [] + front, end = cfg['stitch_cfgs'][0] + route.append([0, front]) + route.append([end, depths[comb_id[-1]]]) + cfg['route'] = route + if comb_id == [0, 1] and front != 11: + sl_ids.append(i) + elif comb_id == [1, 0]: + ls_ids.append(i) + + if len(comb_id) == 3: + route = [] + front_1, end_1 = cfg['stitch_cfgs'][0] + front_2, end_2 = cfg['stitch_cfgs'][1] + route.append([0, front_1]) + route.append([end_1, front_2]) + route.append([end_2, depths[comb_id[-1]]]) + cfg['route'] = route + + if comb_id == [1, 0, 1]: + lsl_ids.append(i) + elif comb_id == [0, 1, 0]: + sls_ids.append(i) + + cfg['stitch_layer_ids'].append(-1) + + model_combos = [(0, 1), (1, 0)] + return total_configs, model_combos, [len(sl_configs), len(ls_configs)], anchor_ids, sl_ids, ls_ids, lsl_ids, sls_ids + + +def format_out_features(outs, with_cls_token, hw_shape): + if len(outs[0].shape) == 4: + for i in range(len(outs)): + outs[i] = outs[i].permute(0, 3, 1, 2).contiguous() + else: + B, _, C = outs[0].shape + for i in range(len(outs)): + if with_cls_token: + # Remove class token and reshape token for decoder head + outs[i] = outs[i][:, 1:].reshape(B, hw_shape[0], hw_shape[1], + C).permute(0, 3, 1, 2).contiguous() + else: + outs[i] = outs[i].reshape(B, hw_shape[0], hw_shape[1], + C).permute(0, 3, 1, 2).contiguous() + return outs + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + +# import loralib as lora + + +class StitchingLayer(BaseModule): + def __init__(self, in_features=None, out_features=None, r=0): + super().__init__() + self.transform = Linear(in_features, out_features, r) + + def init_stitch_weights_bias(self, weight, bias): + self.transform.weight.data.copy_(weight) + self.transform.bias.data.copy_(bias) + + def forward(self, x): + out = self.transform(x) + return out + + +@BACKBONES.register_module() +class SNNetv1(BaseModule): + + def __init__(self, anchors=None): + super(SNNetv1, self).__init__() + self.anchors = nn.ModuleList() + for cfg in anchors: + mod = MAE(**cfg) + self.anchors.append(mod) + + self.with_cls_token = self.anchors[0].with_cls_token + + self.depths = [anc.num_layers for anc in self.anchors] + + # reset out indices of small + self.anchors[0].out_indices = reset_out_indices(self.depths[0], self.depths[1], self.anchors[1].out_indices) + + total_configs, num_stitches = get_stitch_configs_general_unequal(self.depths) + self.stitch_layers = nn.ModuleList([StitchingLayer(self.anchors[0].embed_dims, self.anchors[1].embed_dims) for _ in range(num_stitches)]) + + self.stitch_configs = {i: cfg for i, cfg in enumerate(total_configs)} + self.all_cfgs = list(self.stitch_configs.keys()) + self.num_configs = len(total_configs) + self.stitch_config_id = 0 + + def reset_stitch_id(self, stitch_config_id): + self.stitch_config_id = stitch_config_id + + def initialize_stitching_weights(self, x): + # logger = get_root_logger() + front, end = 0, 1 + with torch.no_grad(): + front_features = self.anchors[front].extract_block_features(x) + end_features = self.anchors[end].extract_block_features(x) + + for i, blk_id in enumerate(range(self.depths[0])): + front_id, end_id = i, (i + 1) * (self.depths[1] // self.depths[0]) + front_blk_feat = front_features[front_id] + end_blk_feat = end_features[end_id - 1] + w, b = ps_inv(front_blk_feat, end_blk_feat) + self.stitch_layers[i].init_stitch_weights_bias(w, b) + print(f'Initialized Stitching Model {front} to Model {end}, Layer {i}') + + def init_weights(self): + for anc in self.anchors: + anc.init_weights() + + def forward(self, x): + + # randomly sample a stitch at each training iteration + if self.training: + stitch_cfg_id = np.random.randint(0, self.num_configs) + else: + stitch_cfg_id = self.stitch_config_id + + comb_id = self.stitch_configs[stitch_cfg_id]['comb_id'] + + if len(comb_id) == 1: + outs, hw_shape = self.anchors[comb_id[0]](x) + # in case forwarding the smaller anchor + if comb_id[0] == 0: + for i, out_idx in enumerate(self.anchors[comb_id[0]].out_indices): + outs[i] = self.stitch_layers[out_idx](outs[i]) + + + else: + cfg = self.stitch_configs[stitch_cfg_id]['stitch_cfgs'] + x, outs, hw_shape = self.anchors[comb_id[0]].forward_until(x, blk_id=cfg[0]) + + for i, out_idx in enumerate(self.anchors[comb_id[0]].out_indices): + if out_idx < cfg[0]: + outs[i] = self.stitch_layers[out_idx](outs[i]) + + x = self.stitch_layers[cfg[0]](x) + if cfg[0] in self.anchors[comb_id[0]].out_indices: + outs[-1] = x + + B, _, C = x.shape + outs_2 = self.anchors[comb_id[1]].forward_from(x, blk_id=cfg[1]) + outs += outs_2 + + outs = format_out_features(outs, self.with_cls_token, hw_shape) + + return outs + + +@BACKBONES.register_module() +class SNNetv2(BaseModule): + + def __init__(self, anchors=None, selected_ids=[], include_sl=True, include_ls=True, include_lsl=True, include_sls=True, lora_r=0, pretrained=None): + super(SNNetv2, self).__init__() + + self.lora_r = lora_r + self.anchors = nn.ModuleList() + for cfg in anchors: + mod = MAE(**cfg) + self.anchors.append(mod) + + self.with_cls_token = self.anchors[0].with_cls_token + self.depths = [anc.num_layers for anc in self.anchors] + + # reset out indices of small + self.anchors[0].out_indices = reset_out_indices(self.depths[0], self.depths[1], self.anchors[1].out_indices) + + total_configs, model_combos, num_stitches, anchor_ids, sl_ids, ls_ids, lsl_ids, sls_ids = get_stitch_configs_bidirection(self.depths) + + self.stitch_layers = nn.ModuleList() + self.stitching_map_id = {} + + for i, (comb, num_sth) in enumerate(zip(model_combos, num_stitches)): + front, end = comb + temp = nn.ModuleList( + [StitchingLayer(self.anchors[front].embed_dims, self.anchors[end].embed_dims, lora_r) for _ in range(num_sth)]) + temp.append(nn.Identity()) + self.stitch_layers.append(temp) + + self.stitch_configs = {i: cfg for i, cfg in enumerate(total_configs)} + self.stitch_init_configs = {i: cfg for i, cfg in enumerate(total_configs) if len(cfg['comb_id']) == 2} + + self.selected_ids = selected_ids + if len(selected_ids) == 0: + self.all_cfgs = anchor_ids + + if include_sl: + self.all_cfgs += sl_ids + + if include_ls: + self.all_cfgs += ls_ids + + if include_lsl: + self.all_cfgs += lsl_ids + + if include_sls: + self.all_cfgs += sls_ids + else: + self.all_cfgs = selected_ids + + + self.trained_cfgs = {} + for idx in self.all_cfgs: + self.trained_cfgs[idx] = self.stitch_configs[idx] + + print(str(self.all_cfgs)) + self.num_configs = len(self.stitch_configs) + self.stitch_config_id = 0 + + def reset_stitch_id(self, stitch_config_id): + self.stitch_config_id = stitch_config_id + + def initialize_stitching_weights(self, x): + anchor_features = [] + for anchor in self.anchors: + with torch.no_grad(): + temp = anchor.extract_block_features(x) + anchor_features.append(temp) + + for idx, cfg in self.stitch_init_configs.items(): + comb_id = cfg['comb_id'] + if len(comb_id) == 2: + front_id, end_id = cfg['stitch_cfgs'][0] + stitch_layer_id = cfg['stitch_layer_ids'][0] + front_blk_feat = anchor_features[comb_id[0]][front_id] + end_blk_feat = anchor_features[comb_id[1]][end_id - 1] + w, b = ps_inv(front_blk_feat, end_blk_feat) + self.stitch_layers[comb_id[0]][stitch_layer_id].init_stitch_weights_bias(w, b) + print(f'Initialized Stitching Layer {cfg}') + + def resize_abs_pos_embed(self, state_dict): + pos_keys = [k for k in state_dict.keys() if 'pos_embed' in k] + + for pos_k in pos_keys: + anchor_id = int(pos_k.split('.')[1]) + # if 'pos_embed' in state_dict: + pos_embed_checkpoint = state_dict[pos_k] + embedding_size = pos_embed_checkpoint.shape[-1] + num_extra_tokens = self.anchors[anchor_id].pos_embed.shape[-2] - self.anchors[anchor_id].num_patches + # height (== width) for the checkpoint position embedding + orig_size = int( + (pos_embed_checkpoint.shape[-2] - num_extra_tokens)**0.5) + # height (== width) for the new position embedding + new_size = int(self.anchors[anchor_id].num_patches**0.5) + # class_token and dist_token are kept unchanged + if orig_size != new_size: + extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens] + # only the position tokens are interpolated + pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:] + pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, + embedding_size).permute( + 0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, + size=(new_size, new_size), + mode=self.anchors[anchor_id].interpolate_mode, + align_corners=False) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1) + state_dict[pos_k] = new_pos_embed + return state_dict + + def init_weights(self): + for anc in self.anchors: + anc.init_weights() + def sampling_stitch_config(self): + flops_id = np.random.choice(len(self.flops_grouped_cfgs)) + self.stitch_config_id = np.random.choice(self.flops_grouped_cfgs[flops_id]) + + def get_stitch_parameters(self): + stitch_cfg_id = self.stitch_config_id + + comb_id = self.stitch_configs[stitch_cfg_id]['comb_id'] + + total_params = 0 + + # forward by a single anchor + if len(comb_id) == 1: + total_params += sum(p.numel() for p in self.anchors[comb_id[0]].parameters()) + # outs, hw_shape = self.anchors[comb_id[0]](x) + # in case forwarding the smaller anchor + if comb_id[0] == 0: + for i, out_idx in enumerate(self.anchors[comb_id[0]].out_indices): + total_params += sum([p.numel() for p in self.stitch_layers[0][out_idx].parameters()]) + + return total_params + + # forward among anchors + route = self.stitch_configs[stitch_cfg_id]['route'] + stitch_layer_ids = self.stitch_configs[stitch_cfg_id]['stitch_layer_ids'] + + # patch embeding + total_params += self.anchors[comb_id[0]].patch_embed_params() + + for i, (model_id, cfg) in enumerate(zip(comb_id, route)): + total_params += self.anchors[model_id].selective_params(cfg[0], cfg[1]) + + if model_id == 0: + mapping_idx = [idx for idx in self.anchors[model_id].out_indices if cfg[0] <= idx <= cfg[1]] + for j, out_idx in enumerate(mapping_idx): + total_params += sum([p.numel() for p in self.stitch_layers[model_id][out_idx].parameters()]) + + total_params += sum([p.numel() for p in self.stitch_layers[model_id][stitch_layer_ids[i]].parameters()]) + + return total_params + + def forward(self, x): + + if self.training: + self.sampling_stitch_config() + + stitch_cfg_id = self.stitch_config_id + + comb_id = self.stitch_configs[stitch_cfg_id]['comb_id'] + + # forward by a single anchor + if len(comb_id) == 1: + outs, hw_shape = self.anchors[comb_id[0]](x) + # in case forwarding the smaller anchor + if comb_id[0] == 0: + for i, out_idx in enumerate(self.anchors[comb_id[0]].out_indices): + outs[i] = self.stitch_layers[0][out_idx](outs[i]) + outs = format_out_features(outs, self.with_cls_token, hw_shape) + return outs + + # forward among anchors + route = self.stitch_configs[stitch_cfg_id]['route'] + stitch_layer_ids = self.stitch_configs[stitch_cfg_id]['stitch_layer_ids'] + + # patch embeding + x, hw_shape = self.anchors[comb_id[0]].forward_patch_embed(x) + final_outs = [] + + for i, (model_id, cfg) in enumerate(zip(comb_id, route)): + x, outs = self.anchors[model_id].selective_forward(x, cfg[0], cfg[1]) + if model_id == 0: + mapping_idx = [idx for idx in self.anchors[model_id].out_indices if cfg[0] <= idx <= cfg[1]] + for j, out_idx in enumerate(mapping_idx): + outs[j] = self.stitch_layers[model_id][out_idx](outs[j]) + + final_outs += outs + x = self.stitch_layers[model_id][stitch_layer_ids[i]](x) + + final_outs = format_out_features(final_outs, self.with_cls_token, hw_shape) + return final_outs + diff --git a/mmseg/models/backbones/stdc.py b/mmseg/models/backbones/stdc.py new file mode 100644 index 0000000000000000000000000000000000000000..758a3c92e07dc8d2051f670adf00d163019d758c --- /dev/null +++ b/mmseg/models/backbones/stdc.py @@ -0,0 +1,422 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/MichaelFan01/STDC-Seg.""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule, ModuleList, Sequential + +from mmseg.registry import MODELS +from ..utils import resize +from .bisenetv1 import AttentionRefinementModule + + +class STDCModule(BaseModule): + """STDCModule. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels before scaling. + stride (int): The number of stride for the first conv layer. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): The activation config for conv layers. + num_convs (int): Numbers of conv layers. + fusion_type (str): Type of fusion operation. Default: 'add'. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + stride, + norm_cfg=None, + act_cfg=None, + num_convs=4, + fusion_type='add', + init_cfg=None): + super().__init__(init_cfg=init_cfg) + assert num_convs > 1 + assert fusion_type in ['add', 'cat'] + self.stride = stride + self.with_downsample = True if self.stride == 2 else False + self.fusion_type = fusion_type + + self.layers = ModuleList() + conv_0 = ConvModule( + in_channels, out_channels // 2, kernel_size=1, norm_cfg=norm_cfg) + + if self.with_downsample: + self.downsample = ConvModule( + out_channels // 2, + out_channels // 2, + kernel_size=3, + stride=2, + padding=1, + groups=out_channels // 2, + norm_cfg=norm_cfg, + act_cfg=None) + + if self.fusion_type == 'add': + self.layers.append(nn.Sequential(conv_0, self.downsample)) + self.skip = Sequential( + ConvModule( + in_channels, + in_channels, + kernel_size=3, + stride=2, + padding=1, + groups=in_channels, + norm_cfg=norm_cfg, + act_cfg=None), + ConvModule( + in_channels, + out_channels, + 1, + norm_cfg=norm_cfg, + act_cfg=None)) + else: + self.layers.append(conv_0) + self.skip = nn.AvgPool2d(kernel_size=3, stride=2, padding=1) + else: + self.layers.append(conv_0) + + for i in range(1, num_convs): + out_factor = 2**(i + 1) if i != num_convs - 1 else 2**i + self.layers.append( + ConvModule( + out_channels // 2**i, + out_channels // out_factor, + kernel_size=3, + stride=1, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, inputs): + if self.fusion_type == 'add': + out = self.forward_add(inputs) + else: + out = self.forward_cat(inputs) + return out + + def forward_add(self, inputs): + layer_outputs = [] + x = inputs.clone() + for layer in self.layers: + x = layer(x) + layer_outputs.append(x) + if self.with_downsample: + inputs = self.skip(inputs) + + return torch.cat(layer_outputs, dim=1) + inputs + + def forward_cat(self, inputs): + x0 = self.layers[0](inputs) + layer_outputs = [x0] + for i, layer in enumerate(self.layers[1:]): + if i == 0: + if self.with_downsample: + x = layer(self.downsample(x0)) + else: + x = layer(x0) + else: + x = layer(x) + layer_outputs.append(x) + if self.with_downsample: + layer_outputs[0] = self.skip(x0) + return torch.cat(layer_outputs, dim=1) + + +class FeatureFusionModule(BaseModule): + """Feature Fusion Module. This module is different from FeatureFusionModule + in BiSeNetV1. It uses two ConvModules in `self.attention` whose inter + channel number is calculated by given `scale_factor`, while + FeatureFusionModule in BiSeNetV1 only uses one ConvModule in + `self.conv_atten`. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + scale_factor (int): The number of channel scale factor. + Default: 4. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): The activation config for conv layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + scale_factor=4, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + channels = out_channels // scale_factor + self.conv0 = ConvModule( + in_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg) + self.attention = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + out_channels, + channels, + 1, + norm_cfg=None, + bias=False, + act_cfg=act_cfg), + ConvModule( + channels, + out_channels, + 1, + norm_cfg=None, + bias=False, + act_cfg=None), nn.Sigmoid()) + + def forward(self, spatial_inputs, context_inputs): + inputs = torch.cat([spatial_inputs, context_inputs], dim=1) + x = self.conv0(inputs) + attn = self.attention(x) + x_attn = x * attn + return x_attn + x + + +@MODELS.register_module() +class STDCNet(BaseModule): + """This backbone is the implementation of `Rethinking BiSeNet For Real-time + Semantic Segmentation `_. + + Args: + stdc_type (int): The type of backbone structure, + `STDCNet1` and`STDCNet2` denotes two main backbones in paper, + whose FLOPs is 813M and 1446M, respectively. + in_channels (int): The num of input_channels. + channels (tuple[int]): The output channels for each stage. + bottleneck_type (str): The type of STDC Module type, the value must + be 'add' or 'cat'. + norm_cfg (dict): Config dict for normalization layer. + act_cfg (dict): The activation config for conv layers. + num_convs (int): Numbers of conv layer at each STDC Module. + Default: 4. + with_final_conv (bool): Whether add a conv layer at the Module output. + Default: True. + pretrained (str, optional): Model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Example: + >>> import torch + >>> stdc_type = 'STDCNet1' + >>> in_channels = 3 + >>> channels = (32, 64, 256, 512, 1024) + >>> bottleneck_type = 'cat' + >>> inputs = torch.rand(1, 3, 1024, 2048) + >>> self = STDCNet(stdc_type, in_channels, + ... channels, bottleneck_type).eval() + >>> outputs = self.forward(inputs) + >>> for i in range(len(outputs)): + ... print(f'outputs[{i}].shape = {outputs[i].shape}') + outputs[0].shape = torch.Size([1, 256, 128, 256]) + outputs[1].shape = torch.Size([1, 512, 64, 128]) + outputs[2].shape = torch.Size([1, 1024, 32, 64]) + """ + + arch_settings = { + 'STDCNet1': [(2, 1), (2, 1), (2, 1)], + 'STDCNet2': [(2, 1, 1, 1), (2, 1, 1, 1, 1), (2, 1, 1)] + } + + def __init__(self, + stdc_type, + in_channels, + channels, + bottleneck_type, + norm_cfg, + act_cfg, + num_convs=4, + with_final_conv=False, + pretrained=None, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + assert stdc_type in self.arch_settings, \ + f'invalid structure {stdc_type} for STDCNet.' + assert bottleneck_type in ['add', 'cat'],\ + f'bottleneck_type must be `add` or `cat`, got {bottleneck_type}' + + assert len(channels) == 5,\ + f'invalid channels length {len(channels)} for STDCNet.' + + self.in_channels = in_channels + self.channels = channels + self.stage_strides = self.arch_settings[stdc_type] + self.prtrained = pretrained + self.num_convs = num_convs + self.with_final_conv = with_final_conv + + self.stages = ModuleList([ + ConvModule( + self.in_channels, + self.channels[0], + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + self.channels[0], + self.channels[1], + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + ]) + # `self.num_shallow_features` is the number of shallow modules in + # `STDCNet`, which is noted as `Stage1` and `Stage2` in original paper. + # They are both not used for following modules like Attention + # Refinement Module and Feature Fusion Module. + # Thus they would be cut from `outs`. Please refer to Figure 4 + # of original paper for more details. + self.num_shallow_features = len(self.stages) + + for strides in self.stage_strides: + idx = len(self.stages) - 1 + self.stages.append( + self._make_stage(self.channels[idx], self.channels[idx + 1], + strides, norm_cfg, act_cfg, bottleneck_type)) + # After appending, `self.stages` is a ModuleList including several + # shallow modules and STDCModules. + # (len(self.stages) == + # self.num_shallow_features + len(self.stage_strides)) + if self.with_final_conv: + self.final_conv = ConvModule( + self.channels[-1], + max(1024, self.channels[-1]), + 1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def _make_stage(self, in_channels, out_channels, strides, norm_cfg, + act_cfg, bottleneck_type): + layers = [] + for i, stride in enumerate(strides): + layers.append( + STDCModule( + in_channels if i == 0 else out_channels, + out_channels, + stride, + norm_cfg, + act_cfg, + num_convs=self.num_convs, + fusion_type=bottleneck_type)) + return Sequential(*layers) + + def forward(self, x): + outs = [] + for stage in self.stages: + x = stage(x) + outs.append(x) + if self.with_final_conv: + outs[-1] = self.final_conv(outs[-1]) + outs = outs[self.num_shallow_features:] + return tuple(outs) + + +@MODELS.register_module() +class STDCContextPathNet(BaseModule): + """STDCNet with Context Path. The `outs` below is a list of three feature + maps from deep to shallow, whose height and width is from small to big, + respectively. The biggest feature map of `outs` is outputted for + `STDCHead`, where Detail Loss would be calculated by Detail Ground-truth. + The other two feature maps are used for Attention Refinement Module, + respectively. Besides, the biggest feature map of `outs` and the last + output of Attention Refinement Module are concatenated for Feature Fusion + Module. Then, this fusion feature map `feat_fuse` would be outputted for + `decode_head`. More details please refer to Figure 4 of original paper. + + Args: + backbone_cfg (dict): Config dict for stdc backbone. + last_in_channels (tuple(int)), The number of channels of last + two feature maps from stdc backbone. Default: (1024, 512). + out_channels (int): The channels of output feature maps. + Default: 128. + ffm_cfg (dict): Config dict for Feature Fusion Module. Default: + `dict(in_channels=512, out_channels=256, scale_factor=4)`. + upsample_mode (str): Algorithm used for upsampling: + ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` | + ``'trilinear'``. Default: ``'nearest'``. + align_corners (str): align_corners argument of F.interpolate. It + must be `None` if upsample_mode is ``'nearest'``. Default: None. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Return: + outputs (tuple): The tuple of list of output feature map for + auxiliary heads and decoder head. + """ + + def __init__(self, + backbone_cfg, + last_in_channels=(1024, 512), + out_channels=128, + ffm_cfg=dict( + in_channels=512, out_channels=256, scale_factor=4), + upsample_mode='nearest', + align_corners=None, + norm_cfg=dict(type='BN'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.backbone = MODELS.build(backbone_cfg) + self.arms = ModuleList() + self.convs = ModuleList() + for channels in last_in_channels: + self.arms.append(AttentionRefinementModule(channels, out_channels)) + self.convs.append( + ConvModule( + out_channels, + out_channels, + 3, + padding=1, + norm_cfg=norm_cfg)) + self.conv_avg = ConvModule( + last_in_channels[0], out_channels, 1, norm_cfg=norm_cfg) + + self.ffm = FeatureFusionModule(**ffm_cfg) + + self.upsample_mode = upsample_mode + self.align_corners = align_corners + + def forward(self, x): + outs = list(self.backbone(x)) + avg = F.adaptive_avg_pool2d(outs[-1], 1) + avg_feat = self.conv_avg(avg) + + feature_up = resize( + avg_feat, + size=outs[-1].shape[2:], + mode=self.upsample_mode, + align_corners=self.align_corners) + arms_out = [] + for i in range(len(self.arms)): + x_arm = self.arms[i](outs[len(outs) - 1 - i]) + feature_up + feature_up = resize( + x_arm, + size=outs[len(outs) - 1 - i - 1].shape[2:], + mode=self.upsample_mode, + align_corners=self.align_corners) + feature_up = self.convs[i](feature_up) + arms_out.append(feature_up) + + feat_fuse = self.ffm(outs[0], arms_out[1]) + + # The `outputs` has four feature maps. + # `outs[0]` is outputted for `STDCHead` auxiliary head. + # Two feature maps of `arms_out` are outputted for auxiliary head. + # `feat_fuse` is outputted for decoder head. + outputs = [outs[0]] + list(arms_out) + [feat_fuse] + return tuple(outputs) diff --git a/mmseg/models/backbones/swin.py b/mmseg/models/backbones/swin.py new file mode 100644 index 0000000000000000000000000000000000000000..67b28a96e15fe81e8213d67518d664383a4fd255 --- /dev/null +++ b/mmseg/models/backbones/swin.py @@ -0,0 +1,757 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from collections import OrderedDict +from copy import deepcopy + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from mmcv.cnn import build_norm_layer +from mmcv.cnn.bricks.transformer import FFN, build_dropout +from mmengine.logging import print_log +from mmengine.model import BaseModule, ModuleList +from mmengine.model.weight_init import (constant_init, trunc_normal_, + trunc_normal_init) +from mmengine.runner import CheckpointLoader +from mmengine.utils import to_2tuple + +from mmseg.registry import MODELS +from ..utils.embed import PatchEmbed, PatchMerging + + +class WindowMSA(BaseModule): + """Window based multi-head self-attention (W-MSA) module with relative + position bias. + + Args: + embed_dims (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (tuple[int]): The height and width of the window. + qkv_bias (bool, optional): If True, add a learnable bias to q, k, v. + Default: True. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + attn_drop_rate (float, optional): Dropout ratio of attention weight. + Default: 0.0 + proj_drop_rate (float, optional): Dropout ratio of output. Default: 0. + init_cfg (dict | None, optional): The Config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + window_size, + qkv_bias=True, + qk_scale=None, + attn_drop_rate=0., + proj_drop_rate=0., + init_cfg=None): + + super().__init__(init_cfg=init_cfg) + self.embed_dims = embed_dims + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_embed_dims = embed_dims // num_heads + self.scale = qk_scale or head_embed_dims**-0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), + num_heads)) # 2*Wh-1 * 2*Ww-1, nH + + # About 2x faster than original impl + Wh, Ww = self.window_size + rel_index_coords = self.double_step_seq(2 * Ww - 1, Wh, 1, Ww) + rel_position_index = rel_index_coords + rel_index_coords.T + rel_position_index = rel_position_index.flip(1).contiguous() + self.register_buffer('relative_position_index', rel_position_index) + + self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop_rate) + self.proj = nn.Linear(embed_dims, embed_dims) + self.proj_drop = nn.Dropout(proj_drop_rate) + + self.softmax = nn.Softmax(dim=-1) + + def init_weights(self): + trunc_normal_(self.relative_position_bias_table, std=0.02) + + def forward(self, x, mask=None): + """ + Args: + + x (tensor): input features with shape of (num_windows*B, N, C) + mask (tensor | None, Optional): mask with shape of (num_windows, + Wh*Ww, Wh*Ww), value should be between (-inf, 0]. + """ + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, + C // self.num_heads).permute(2, 0, 3, 1, 4) + # make torchscript happy (cannot use tensor as tuple) + q, k, v = qkv[0], qkv[1], qkv[2] + + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1)].view( + self.window_size[0] * self.window_size[1], + self.window_size[0] * self.window_size[1], + -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B // nW, nW, self.num_heads, N, + N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + @staticmethod + def double_step_seq(step1, len1, step2, len2): + seq1 = torch.arange(0, step1 * len1, step1) + seq2 = torch.arange(0, step2 * len2, step2) + return (seq1[:, None] + seq2[None, :]).reshape(1, -1) + + +class ShiftWindowMSA(BaseModule): + """Shifted Window Multihead Self-Attention Module. + + Args: + embed_dims (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (int): The height and width of the window. + shift_size (int, optional): The shift step of each window towards + right-bottom. If zero, act as regular window-msa. Defaults to 0. + qkv_bias (bool, optional): If True, add a learnable bias to q, k, v. + Default: True + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Defaults: None. + attn_drop_rate (float, optional): Dropout ratio of attention weight. + Defaults: 0. + proj_drop_rate (float, optional): Dropout ratio of output. + Defaults: 0. + dropout_layer (dict, optional): The dropout_layer used before output. + Defaults: dict(type='DropPath', drop_prob=0.). + init_cfg (dict, optional): The extra config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + window_size, + shift_size=0, + qkv_bias=True, + qk_scale=None, + attn_drop_rate=0, + proj_drop_rate=0, + dropout_layer=dict(type='DropPath', drop_prob=0.), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + self.window_size = window_size + self.shift_size = shift_size + assert 0 <= self.shift_size < self.window_size + + self.w_msa = WindowMSA( + embed_dims=embed_dims, + num_heads=num_heads, + window_size=to_2tuple(window_size), + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop_rate=attn_drop_rate, + proj_drop_rate=proj_drop_rate, + init_cfg=None) + + self.drop = build_dropout(dropout_layer) + + def forward(self, query, hw_shape): + B, L, C = query.shape + H, W = hw_shape + assert L == H * W, 'input feature has wrong size' + query = query.view(B, H, W, C) + + # pad feature maps to multiples of window size + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + query = F.pad(query, (0, 0, 0, pad_r, 0, pad_b)) + H_pad, W_pad = query.shape[1], query.shape[2] + + # cyclic shift + if self.shift_size > 0: + shifted_query = torch.roll( + query, + shifts=(-self.shift_size, -self.shift_size), + dims=(1, 2)) + + # calculate attention mask for SW-MSA + img_mask = torch.zeros((1, H_pad, W_pad, 1), device=query.device) + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, + -self.shift_size), slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, + -self.shift_size), slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + # nW, window_size, window_size, 1 + mask_windows = self.window_partition(img_mask) + mask_windows = mask_windows.view( + -1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, + float(-100.0)).masked_fill( + attn_mask == 0, float(0.0)) + else: + shifted_query = query + attn_mask = None + + # nW*B, window_size, window_size, C + query_windows = self.window_partition(shifted_query) + # nW*B, window_size*window_size, C + query_windows = query_windows.view(-1, self.window_size**2, C) + + # W-MSA/SW-MSA (nW*B, window_size*window_size, C) + attn_windows = self.w_msa(query_windows, mask=attn_mask) + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, + self.window_size, C) + + # B H' W' C + shifted_x = self.window_reverse(attn_windows, H_pad, W_pad) + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll( + shifted_x, + shifts=(self.shift_size, self.shift_size), + dims=(1, 2)) + else: + x = shifted_x + + if pad_r > 0 or pad_b: + x = x[:, :H, :W, :].contiguous() + + x = x.view(B, H * W, C) + + x = self.drop(x) + return x + + def window_reverse(self, windows, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + window_size = self.window_size + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, + window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + def window_partition(self, x): + """ + Args: + x: (B, H, W, C) + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + window_size = self.window_size + x = x.view(B, H // window_size, window_size, W // window_size, + window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous() + windows = windows.view(-1, window_size, window_size, C) + return windows + + +class SwinBlock(BaseModule): + """" + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + window_size (int, optional): The local window scale. Default: 7. + shift (bool, optional): whether to shift window or not. Default False. + qkv_bias (bool, optional): enable bias for qkv if True. Default: True. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + drop_rate (float, optional): Dropout rate. Default: 0. + attn_drop_rate (float, optional): Attention dropout rate. Default: 0. + drop_path_rate (float, optional): Stochastic depth rate. Default: 0. + act_cfg (dict, optional): The config dict of activation function. + Default: dict(type='GELU'). + norm_cfg (dict, optional): The config dict of normalization. + Default: dict(type='LN'). + with_cp (bool, optional): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + init_cfg (dict | list | None, optional): The init config. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + window_size=7, + shift=False, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + with_cp=False, + init_cfg=None): + + super().__init__(init_cfg=init_cfg) + + self.with_cp = with_cp + + self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1] + self.attn = ShiftWindowMSA( + embed_dims=embed_dims, + num_heads=num_heads, + window_size=window_size, + shift_size=window_size // 2 if shift else 0, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop_rate=attn_drop_rate, + proj_drop_rate=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + init_cfg=None) + + self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1] + self.ffn = FFN( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + num_fcs=2, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + act_cfg=act_cfg, + add_identity=True, + init_cfg=None) + + def forward(self, x, hw_shape): + + def _inner_forward(x): + identity = x + x = self.norm1(x) + x = self.attn(x, hw_shape) + + x = x + identity + + identity = x + x = self.norm2(x) + x = self.ffn(x, identity=identity) + + return x + + if self.with_cp and x.requires_grad: + x = cp.checkpoint(_inner_forward, x) + else: + x = _inner_forward(x) + + return x + + +class SwinBlockSequence(BaseModule): + """Implements one stage in Swin Transformer. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + depth (int): The number of blocks in this stage. + window_size (int, optional): The local window scale. Default: 7. + qkv_bias (bool, optional): enable bias for qkv if True. Default: True. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + drop_rate (float, optional): Dropout rate. Default: 0. + attn_drop_rate (float, optional): Attention dropout rate. Default: 0. + drop_path_rate (float | list[float], optional): Stochastic depth + rate. Default: 0. + downsample (BaseModule | None, optional): The downsample operation + module. Default: None. + act_cfg (dict, optional): The config dict of activation function. + Default: dict(type='GELU'). + norm_cfg (dict, optional): The config dict of normalization. + Default: dict(type='LN'). + with_cp (bool, optional): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + init_cfg (dict | list | None, optional): The init config. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + depth, + window_size=7, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + downsample=None, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + with_cp=False, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + if isinstance(drop_path_rate, list): + drop_path_rates = drop_path_rate + assert len(drop_path_rates) == depth + else: + drop_path_rates = [deepcopy(drop_path_rate) for _ in range(depth)] + + self.blocks = ModuleList() + for i in range(depth): + block = SwinBlock( + embed_dims=embed_dims, + num_heads=num_heads, + feedforward_channels=feedforward_channels, + window_size=window_size, + shift=False if i % 2 == 0 else True, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop_rate=drop_rate, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rates[i], + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + init_cfg=None) + self.blocks.append(block) + + self.downsample = downsample + + def forward(self, x, hw_shape): + for block in self.blocks: + x = block(x, hw_shape) + + if self.downsample: + x_down, down_hw_shape = self.downsample(x, hw_shape) + return x_down, down_hw_shape, x, hw_shape + else: + return x, hw_shape, x, hw_shape + + +@MODELS.register_module() +class SwinTransformer(BaseModule): + """Swin Transformer backbone. + + This backbone is the implementation of `Swin Transformer: + Hierarchical Vision Transformer using Shifted + Windows `_. + Inspiration from https://github.com/microsoft/Swin-Transformer. + + Args: + pretrain_img_size (int | tuple[int]): The size of input image when + pretrain. Defaults: 224. + in_channels (int): The num of input channels. + Defaults: 3. + embed_dims (int): The feature dimension. Default: 96. + patch_size (int | tuple[int]): Patch size. Default: 4. + window_size (int): Window size. Default: 7. + mlp_ratio (int | float): Ratio of mlp hidden dim to embedding dim. + Default: 4. + depths (tuple[int]): Depths of each Swin Transformer stage. + Default: (2, 2, 6, 2). + num_heads (tuple[int]): Parallel attention heads of each Swin + Transformer stage. Default: (3, 6, 12, 24). + strides (tuple[int]): The patch merging or patch embedding stride of + each Swin Transformer stage. (In swin, we set kernel size equal to + stride.) Default: (4, 2, 2, 2). + out_indices (tuple[int]): Output from which stages. + Default: (0, 1, 2, 3). + qkv_bias (bool, optional): If True, add a learnable bias to query, key, + value. Default: True + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + patch_norm (bool): If add a norm layer for patch embed and patch + merging. Default: True. + drop_rate (float): Dropout rate. Defaults: 0. + attn_drop_rate (float): Attention dropout rate. Default: 0. + drop_path_rate (float): Stochastic depth rate. Defaults: 0.1. + use_abs_pos_embed (bool): If True, add absolute position embedding to + the patch embedding. Defaults: False. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='LN'). + norm_cfg (dict): Config dict for normalization layer at + output of backone. Defaults: dict(type='LN'). + with_cp (bool, optional): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + pretrained (str, optional): model pretrained path. Default: None. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + pretrain_img_size=224, + in_channels=3, + embed_dims=96, + patch_size=4, + window_size=7, + mlp_ratio=4, + depths=(2, 2, 6, 2), + num_heads=(3, 6, 12, 24), + strides=(4, 2, 2, 2), + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + patch_norm=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1, + use_abs_pos_embed=False, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + with_cp=False, + pretrained=None, + frozen_stages=-1, + init_cfg=None): + self.frozen_stages = frozen_stages + + if isinstance(pretrain_img_size, int): + pretrain_img_size = to_2tuple(pretrain_img_size) + elif isinstance(pretrain_img_size, tuple): + if len(pretrain_img_size) == 1: + pretrain_img_size = to_2tuple(pretrain_img_size[0]) + assert len(pretrain_img_size) == 2, \ + f'The size of image should have length 1 or 2, ' \ + f'but got {len(pretrain_img_size)}' + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be specified at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + init_cfg = init_cfg + else: + raise TypeError('pretrained must be a str or None') + + super().__init__(init_cfg=init_cfg) + + num_layers = len(depths) + self.out_indices = out_indices + self.use_abs_pos_embed = use_abs_pos_embed + + assert strides[0] == patch_size, 'Use non-overlapping patch embed.' + + self.patch_embed = PatchEmbed( + in_channels=in_channels, + embed_dims=embed_dims, + conv_type='Conv2d', + kernel_size=patch_size, + stride=strides[0], + padding='corner', + norm_cfg=norm_cfg if patch_norm else None, + init_cfg=None) + + if self.use_abs_pos_embed: + patch_row = pretrain_img_size[0] // patch_size + patch_col = pretrain_img_size[1] // patch_size + num_patches = patch_row * patch_col + self.absolute_pos_embed = nn.Parameter( + torch.zeros((1, num_patches, embed_dims))) + + self.drop_after_pos = nn.Dropout(p=drop_rate) + + # set stochastic depth decay rule + total_depth = sum(depths) + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, total_depth) + ] + + self.stages = ModuleList() + in_channels = embed_dims + for i in range(num_layers): + if i < num_layers - 1: + downsample = PatchMerging( + in_channels=in_channels, + out_channels=2 * in_channels, + stride=strides[i + 1], + norm_cfg=norm_cfg if patch_norm else None, + init_cfg=None) + else: + downsample = None + + stage = SwinBlockSequence( + embed_dims=in_channels, + num_heads=num_heads[i], + feedforward_channels=int(mlp_ratio * in_channels), + depth=depths[i], + window_size=window_size, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop_rate=drop_rate, + attn_drop_rate=attn_drop_rate, + drop_path_rate=dpr[sum(depths[:i]):sum(depths[:i + 1])], + downsample=downsample, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + init_cfg=None) + self.stages.append(stage) + if downsample: + in_channels = downsample.out_channels + + self.num_features = [int(embed_dims * 2**i) for i in range(num_layers)] + # Add a norm layer for each output + for i in out_indices: + layer = build_norm_layer(norm_cfg, self.num_features[i])[1] + layer_name = f'norm{i}' + self.add_module(layer_name, layer) + + def train(self, mode=True): + """Convert the model into training mode while keep layers freezed.""" + super().train(mode) + self._freeze_stages() + + def _freeze_stages(self): + if self.frozen_stages >= 0: + self.patch_embed.eval() + for param in self.patch_embed.parameters(): + param.requires_grad = False + if self.use_abs_pos_embed: + self.absolute_pos_embed.requires_grad = False + self.drop_after_pos.eval() + + for i in range(1, self.frozen_stages + 1): + + if (i - 1) in self.out_indices: + norm_layer = getattr(self, f'norm{i-1}') + norm_layer.eval() + for param in norm_layer.parameters(): + param.requires_grad = False + + m = self.stages[i - 1] + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def init_weights(self): + if self.init_cfg is None: + print_log(f'No pre-trained weights for ' + f'{self.__class__.__name__}, ' + f'training start from scratch') + if self.use_abs_pos_embed: + trunc_normal_(self.absolute_pos_embed, std=0.02) + for m in self.modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=.02, bias=0.) + elif isinstance(m, nn.LayerNorm): + constant_init(m, val=1.0, bias=0.) + else: + assert 'checkpoint' in self.init_cfg, f'Only support ' \ + f'specify `Pretrained` in ' \ + f'`init_cfg` in ' \ + f'{self.__class__.__name__} ' + ckpt = CheckpointLoader.load_checkpoint( + self.init_cfg['checkpoint'], logger=None, map_location='cpu') + if 'state_dict' in ckpt: + _state_dict = ckpt['state_dict'] + elif 'model' in ckpt: + _state_dict = ckpt['model'] + else: + _state_dict = ckpt + + state_dict = OrderedDict() + for k, v in _state_dict.items(): + if k.startswith('backbone.'): + state_dict[k[9:]] = v + else: + state_dict[k] = v + + # strip prefix of state_dict + if list(state_dict.keys())[0].startswith('module.'): + state_dict = {k[7:]: v for k, v in state_dict.items()} + + # reshape absolute position embedding + if state_dict.get('absolute_pos_embed') is not None: + absolute_pos_embed = state_dict['absolute_pos_embed'] + N1, L, C1 = absolute_pos_embed.size() + N2, C2, H, W = self.absolute_pos_embed.size() + if N1 != N2 or C1 != C2 or L != H * W: + print_log('Error in loading absolute_pos_embed, pass') + else: + state_dict['absolute_pos_embed'] = absolute_pos_embed.view( + N2, H, W, C2).permute(0, 3, 1, 2).contiguous() + + # interpolate position bias table if needed + relative_position_bias_table_keys = [ + k for k in state_dict.keys() + if 'relative_position_bias_table' in k + ] + for table_key in relative_position_bias_table_keys: + table_pretrained = state_dict[table_key] + if table_key in self.state_dict(): + table_current = self.state_dict()[table_key] + L1, nH1 = table_pretrained.size() + L2, nH2 = table_current.size() + if nH1 != nH2: + print_log(f'Error in loading {table_key}, pass') + elif L1 != L2: + S1 = int(L1**0.5) + S2 = int(L2**0.5) + table_pretrained_resized = F.interpolate( + table_pretrained.permute(1, 0).reshape( + 1, nH1, S1, S1), + size=(S2, S2), + mode='bicubic') + state_dict[table_key] = table_pretrained_resized.view( + nH2, L2).permute(1, 0).contiguous() + + # load state_dict + self.load_state_dict(state_dict, strict=False) + + def forward(self, x): + x, hw_shape = self.patch_embed(x) + + if self.use_abs_pos_embed: + x = x + self.absolute_pos_embed + x = self.drop_after_pos(x) + + outs = [] + for i, stage in enumerate(self.stages): + x, hw_shape, out, out_hw_shape = stage(x, hw_shape) + if i in self.out_indices: + norm_layer = getattr(self, f'norm{i}') + out = norm_layer(out) + out = out.view(-1, *out_hw_shape, + self.num_features[i]).permute(0, 3, 1, + 2).contiguous() + outs.append(out) + + return outs diff --git a/mmseg/models/backbones/timm_backbone.py b/mmseg/models/backbones/timm_backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..1eef302bddeac3cee71412bcb481b68b796e515f --- /dev/null +++ b/mmseg/models/backbones/timm_backbone.py @@ -0,0 +1,63 @@ +# Copyright (c) OpenMMLab. All rights reserved. +try: + import timm +except ImportError: + timm = None + +from mmengine.model import BaseModule +from mmengine.registry import MODELS as MMENGINE_MODELS + +from mmseg.registry import MODELS + + +@MODELS.register_module() +class TIMMBackbone(BaseModule): + """Wrapper to use backbones from timm library. More details can be found in + `timm `_ . + + Args: + model_name (str): Name of timm model to instantiate. + pretrained (bool): Load pretrained weights if True. + checkpoint_path (str): Path of checkpoint to load after + model is initialized. + in_channels (int): Number of input image channels. Default: 3. + init_cfg (dict, optional): Initialization config dict + **kwargs: Other timm & model specific arguments. + """ + + def __init__( + self, + model_name, + features_only=True, + pretrained=True, + checkpoint_path='', + in_channels=3, + init_cfg=None, + **kwargs, + ): + if timm is None: + raise RuntimeError('timm is not installed') + super().__init__(init_cfg) + if 'norm_layer' in kwargs: + kwargs['norm_layer'] = MMENGINE_MODELS.get(kwargs['norm_layer']) + self.timm_model = timm.create_model( + model_name=model_name, + features_only=features_only, + pretrained=pretrained, + in_chans=in_channels, + checkpoint_path=checkpoint_path, + **kwargs, + ) + + # Make unused parameters None + self.timm_model.global_pool = None + self.timm_model.fc = None + self.timm_model.classifier = None + + # Hack to use pretrained weights from timm + if pretrained or checkpoint_path: + self._is_init = True + + def forward(self, x): + features = self.timm_model(x) + return features diff --git a/mmseg/models/backbones/twins.py b/mmseg/models/backbones/twins.py new file mode 100644 index 0000000000000000000000000000000000000000..b6a6eea795cf53bee6b52ece80d5d90ecc969970 --- /dev/null +++ b/mmseg/models/backbones/twins.py @@ -0,0 +1,588 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import build_norm_layer +from mmcv.cnn.bricks.drop import build_dropout +from mmcv.cnn.bricks.transformer import FFN +from mmengine.model import BaseModule, ModuleList +from mmengine.model.weight_init import (constant_init, normal_init, + trunc_normal_init) +from torch.nn.modules.batchnorm import _BatchNorm + +from mmseg.models.backbones.mit import EfficientMultiheadAttention +from mmseg.registry import MODELS +from ..utils.embed import PatchEmbed + + +class GlobalSubsampledAttention(EfficientMultiheadAttention): + """Global Sub-sampled Attention (Spatial Reduction Attention) + + This module is modified from EfficientMultiheadAttention, + which is a module from mmseg.models.backbones.mit.py. + Specifically, there is no difference between + `GlobalSubsampledAttention` and `EfficientMultiheadAttention`, + `GlobalSubsampledAttention` is built as a brand new class + because it is renamed as `Global sub-sampled attention (GSA)` + in paper. + + + Args: + embed_dims (int): The embedding dimension. + num_heads (int): Parallel attention heads. + attn_drop (float): A Dropout layer on attn_output_weights. + Default: 0.0. + proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. + Default: 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. Default: None. + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dims) + or (n, batch, embed_dims). Default: False. + qkv_bias (bool): enable bias for qkv if True. Default: True. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + sr_ratio (int): The ratio of spatial reduction of GSA of PCPVT. + Default: 1. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + embed_dims, + num_heads, + attn_drop=0., + proj_drop=0., + dropout_layer=None, + batch_first=True, + qkv_bias=True, + norm_cfg=dict(type='LN'), + sr_ratio=1, + init_cfg=None): + super().__init__( + embed_dims, + num_heads, + attn_drop=attn_drop, + proj_drop=proj_drop, + dropout_layer=dropout_layer, + batch_first=batch_first, + qkv_bias=qkv_bias, + norm_cfg=norm_cfg, + sr_ratio=sr_ratio, + init_cfg=init_cfg) + + +class GSAEncoderLayer(BaseModule): + """Implements one encoder layer with GSA. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + drop_rate (float): Probability of an element to be zeroed + after the feed forward layer. Default: 0.0. + attn_drop_rate (float): The drop out rate for attention layer. + Default: 0.0. + drop_path_rate (float): Stochastic depth rate. Default 0.0. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + qkv_bias (bool): Enable bias for qkv if True. Default: True + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + sr_ratio (float): Kernel_size of conv in Attention modules. Default: 1. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + num_fcs=2, + qkv_bias=True, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + sr_ratio=1., + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1] + self.attn = GlobalSubsampledAttention( + embed_dims=embed_dims, + num_heads=num_heads, + attn_drop=attn_drop_rate, + proj_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + qkv_bias=qkv_bias, + norm_cfg=norm_cfg, + sr_ratio=sr_ratio) + + self.norm2 = build_norm_layer(norm_cfg, embed_dims, postfix=2)[1] + self.ffn = FFN( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + num_fcs=num_fcs, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + act_cfg=act_cfg, + add_identity=False) + + self.drop_path = build_dropout( + dict(type='DropPath', drop_prob=drop_path_rate) + ) if drop_path_rate > 0. else nn.Identity() + + def forward(self, x, hw_shape): + x = x + self.drop_path(self.attn(self.norm1(x), hw_shape, identity=0.)) + x = x + self.drop_path(self.ffn(self.norm2(x))) + return x + + +class LocallyGroupedSelfAttention(BaseModule): + """Locally-grouped Self Attention (LSA) module. + + Args: + embed_dims (int): Number of input channels. + num_heads (int): Number of attention heads. Default: 8 + qkv_bias (bool, optional): If True, add a learnable bias to q, k, v. + Default: False. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + attn_drop_rate (float, optional): Dropout ratio of attention weight. + Default: 0.0 + proj_drop_rate (float, optional): Dropout ratio of output. Default: 0. + window_size(int): Window size of LSA. Default: 1. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + embed_dims, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop_rate=0., + proj_drop_rate=0., + window_size=1, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + assert embed_dims % num_heads == 0, f'dim {embed_dims} should be ' \ + f'divided by num_heads ' \ + f'{num_heads}.' + self.embed_dims = embed_dims + self.num_heads = num_heads + head_dim = embed_dims // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop_rate) + self.proj = nn.Linear(embed_dims, embed_dims) + self.proj_drop = nn.Dropout(proj_drop_rate) + self.window_size = window_size + + def forward(self, x, hw_shape): + b, n, c = x.shape + h, w = hw_shape + x = x.view(b, h, w, c) + + # pad feature maps to multiples of Local-groups + pad_l = pad_t = 0 + pad_r = (self.window_size - w % self.window_size) % self.window_size + pad_b = (self.window_size - h % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + + # calculate attention mask for LSA + Hp, Wp = x.shape[1:-1] + _h, _w = Hp // self.window_size, Wp // self.window_size + mask = torch.zeros((1, Hp, Wp), device=x.device) + mask[:, -pad_b:, :].fill_(1) + mask[:, :, -pad_r:].fill_(1) + + # [B, _h, _w, window_size, window_size, C] + x = x.reshape(b, _h, self.window_size, _w, self.window_size, + c).transpose(2, 3) + mask = mask.reshape(1, _h, self.window_size, _w, + self.window_size).transpose(2, 3).reshape( + 1, _h * _w, + self.window_size * self.window_size) + # [1, _h*_w, window_size*window_size, window_size*window_size] + attn_mask = mask.unsqueeze(2) - mask.unsqueeze(3) + attn_mask = attn_mask.masked_fill(attn_mask != 0, + float(-1000.0)).masked_fill( + attn_mask == 0, float(0.0)) + + # [3, B, _w*_h, nhead, window_size*window_size, dim] + qkv = self.qkv(x).reshape(b, _h * _w, + self.window_size * self.window_size, 3, + self.num_heads, c // self.num_heads).permute( + 3, 0, 1, 4, 2, 5) + q, k, v = qkv[0], qkv[1], qkv[2] + # [B, _h*_w, n_head, window_size*window_size, window_size*window_size] + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn + attn_mask.unsqueeze(2) + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + attn = (attn @ v).transpose(2, 3).reshape(b, _h, _w, self.window_size, + self.window_size, c) + x = attn.transpose(2, 3).reshape(b, _h * self.window_size, + _w * self.window_size, c) + if pad_r > 0 or pad_b > 0: + x = x[:, :h, :w, :].contiguous() + + x = x.reshape(b, n, c) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class LSAEncoderLayer(BaseModule): + """Implements one encoder layer in Twins-SVT. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + drop_rate (float): Probability of an element to be zeroed + after the feed forward layer. Default: 0.0. + attn_drop_rate (float, optional): Dropout ratio of attention weight. + Default: 0.0 + drop_path_rate (float): Stochastic depth rate. Default 0.0. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + qkv_bias (bool): Enable bias for qkv if True. Default: True + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + window_size (int): Window size of LSA. Default: 1. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + num_fcs=2, + qkv_bias=True, + qk_scale=None, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + window_size=1, + init_cfg=None): + + super().__init__(init_cfg=init_cfg) + + self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1] + self.attn = LocallyGroupedSelfAttention(embed_dims, num_heads, + qkv_bias, qk_scale, + attn_drop_rate, drop_rate, + window_size) + + self.norm2 = build_norm_layer(norm_cfg, embed_dims, postfix=2)[1] + self.ffn = FFN( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + num_fcs=num_fcs, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + act_cfg=act_cfg, + add_identity=False) + + self.drop_path = build_dropout( + dict(type='DropPath', drop_prob=drop_path_rate) + ) if drop_path_rate > 0. else nn.Identity() + + def forward(self, x, hw_shape): + x = x + self.drop_path(self.attn(self.norm1(x), hw_shape)) + x = x + self.drop_path(self.ffn(self.norm2(x))) + return x + + +class ConditionalPositionEncoding(BaseModule): + """The Conditional Position Encoding (CPE) module. + + The CPE is the implementation of 'Conditional Positional Encodings + for Vision Transformers '_. + + Args: + in_channels (int): Number of input channels. + embed_dims (int): The feature dimension. Default: 768. + stride (int): Stride of conv layer. Default: 1. + """ + + def __init__(self, in_channels, embed_dims=768, stride=1, init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.proj = nn.Conv2d( + in_channels, + embed_dims, + kernel_size=3, + stride=stride, + padding=1, + bias=True, + groups=embed_dims) + self.stride = stride + + def forward(self, x, hw_shape): + b, n, c = x.shape + h, w = hw_shape + feat_token = x + cnn_feat = feat_token.transpose(1, 2).view(b, c, h, w) + if self.stride == 1: + x = self.proj(cnn_feat) + cnn_feat + else: + x = self.proj(cnn_feat) + x = x.flatten(2).transpose(1, 2) + return x + + +@MODELS.register_module() +class PCPVT(BaseModule): + """The backbone of Twins-PCPVT. + + This backbone is the implementation of `Twins: Revisiting the Design + of Spatial Attention in Vision Transformers + `_. + + Args: + in_channels (int): Number of input channels. Default: 3. + embed_dims (list): Embedding dimension. Default: [64, 128, 256, 512]. + patch_sizes (list): The patch sizes. Default: [4, 2, 2, 2]. + strides (list): The strides. Default: [4, 2, 2, 2]. + num_heads (int): Number of attention heads. Default: [1, 2, 4, 8]. + mlp_ratios (int): Ratio of mlp hidden dim to embedding dim. + Default: [4, 4, 4, 4]. + out_indices (tuple[int]): Output from which stages. + Default: (0, 1, 2, 3). + qkv_bias (bool): Enable bias for qkv if True. Default: False. + drop_rate (float): Probability of an element to be zeroed. + Default 0. + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): Stochastic depth rate. Default 0.0 + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + depths (list): Depths of each stage. Default [3, 4, 6, 3] + sr_ratios (list): Kernel_size of conv in each Attn module in + Transformer encoder layer. Default: [8, 4, 2, 1]. + norm_after_stage(bool): Add extra norm. Default False. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + in_channels=3, + embed_dims=[64, 128, 256, 512], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + num_heads=[1, 2, 4, 8], + mlp_ratios=[4, 4, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=False, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN'), + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + norm_after_stage=False, + pretrained=None, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + self.depths = depths + + # patch_embed + self.patch_embeds = ModuleList() + self.position_encoding_drops = ModuleList() + self.layers = ModuleList() + + for i in range(len(depths)): + self.patch_embeds.append( + PatchEmbed( + in_channels=in_channels if i == 0 else embed_dims[i - 1], + embed_dims=embed_dims[i], + conv_type='Conv2d', + kernel_size=patch_sizes[i], + stride=strides[i], + padding='corner', + norm_cfg=norm_cfg)) + + self.position_encoding_drops.append(nn.Dropout(p=drop_rate)) + + self.position_encodings = ModuleList([ + ConditionalPositionEncoding(embed_dim, embed_dim) + for embed_dim in embed_dims + ]) + + # transformer encoder + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + cur = 0 + + for k in range(len(depths)): + _block = ModuleList([ + GSAEncoderLayer( + embed_dims=embed_dims[k], + num_heads=num_heads[k], + feedforward_channels=mlp_ratios[k] * embed_dims[k], + attn_drop_rate=attn_drop_rate, + drop_rate=drop_rate, + drop_path_rate=dpr[cur + i], + num_fcs=2, + qkv_bias=qkv_bias, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + sr_ratio=sr_ratios[k]) for i in range(depths[k]) + ]) + self.layers.append(_block) + cur += depths[k] + + self.norm_name, norm = build_norm_layer( + norm_cfg, embed_dims[-1], postfix=1) + + self.out_indices = out_indices + self.norm_after_stage = norm_after_stage + if self.norm_after_stage: + self.norm_list = ModuleList() + for dim in embed_dims: + self.norm_list.append(build_norm_layer(norm_cfg, dim)[1]) + + def init_weights(self): + if self.init_cfg is not None: + super().init_weights() + else: + for m in self.modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=.02, bias=0.) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m, val=1.0, bias=0.) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[ + 1] * m.out_channels + fan_out //= m.groups + normal_init( + m, mean=0, std=math.sqrt(2.0 / fan_out), bias=0) + + def forward(self, x): + outputs = list() + + b = x.shape[0] + + for i in range(len(self.depths)): + x, hw_shape = self.patch_embeds[i](x) + h, w = hw_shape + x = self.position_encoding_drops[i](x) + for j, blk in enumerate(self.layers[i]): + x = blk(x, hw_shape) + if j == 0: + x = self.position_encodings[i](x, hw_shape) + if self.norm_after_stage: + x = self.norm_list[i](x) + x = x.reshape(b, h, w, -1).permute(0, 3, 1, 2).contiguous() + + if i in self.out_indices: + outputs.append(x) + + return tuple(outputs) + + +@MODELS.register_module() +class SVT(PCPVT): + """The backbone of Twins-SVT. + + This backbone is the implementation of `Twins: Revisiting the Design + of Spatial Attention in Vision Transformers + `_. + + Args: + in_channels (int): Number of input channels. Default: 3. + embed_dims (list): Embedding dimension. Default: [64, 128, 256, 512]. + patch_sizes (list): The patch sizes. Default: [4, 2, 2, 2]. + strides (list): The strides. Default: [4, 2, 2, 2]. + num_heads (int): Number of attention heads. Default: [1, 2, 4]. + mlp_ratios (int): Ratio of mlp hidden dim to embedding dim. + Default: [4, 4, 4]. + out_indices (tuple[int]): Output from which stages. + Default: (0, 1, 2, 3). + qkv_bias (bool): Enable bias for qkv if True. Default: False. + drop_rate (float): Dropout rate. Default 0. + attn_drop_rate (float): Dropout ratio of attention weight. + Default 0.0 + drop_path_rate (float): Stochastic depth rate. Default 0.2. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + depths (list): Depths of each stage. Default [4, 4, 4]. + sr_ratios (list): Kernel_size of conv in each Attn module in + Transformer encoder layer. Default: [4, 2, 1]. + windiow_sizes (list): Window size of LSA. Default: [7, 7, 7], + input_features_slice(bool): Input features need slice. Default: False. + norm_after_stage(bool): Add extra norm. Default False. + strides (list): Strides in patch-Embedding modules. Default: (2, 2, 2) + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + in_channels=3, + embed_dims=[64, 128, 256], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + num_heads=[1, 2, 4], + mlp_ratios=[4, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=False, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.2, + norm_cfg=dict(type='LN'), + depths=[4, 4, 4], + sr_ratios=[4, 2, 1], + windiow_sizes=[7, 7, 7], + norm_after_stage=True, + pretrained=None, + init_cfg=None): + super().__init__(in_channels, embed_dims, patch_sizes, strides, + num_heads, mlp_ratios, out_indices, qkv_bias, + drop_rate, attn_drop_rate, drop_path_rate, norm_cfg, + depths, sr_ratios, norm_after_stage, pretrained, + init_cfg) + # transformer encoder + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + + for k in range(len(depths)): + for i in range(depths[k]): + if i % 2 == 0: + self.layers[k][i] = \ + LSAEncoderLayer( + embed_dims=embed_dims[k], + num_heads=num_heads[k], + feedforward_channels=mlp_ratios[k] * embed_dims[k], + drop_rate=drop_rate, + attn_drop_rate=attn_drop_rate, + drop_path_rate=dpr[sum(depths[:k])+i], + qkv_bias=qkv_bias, + window_size=windiow_sizes[k]) diff --git a/mmseg/models/backbones/unet.py b/mmseg/models/backbones/unet.py new file mode 100644 index 0000000000000000000000000000000000000000..545921db8e14668e454f5834f9a1618fe0c04ffe --- /dev/null +++ b/mmseg/models/backbones/unet.py @@ -0,0 +1,436 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer +from mmengine.model import BaseModule +from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm + +from mmseg.registry import MODELS +from ..utils import UpConvBlock, Upsample + + +class BasicConvBlock(nn.Module): + """Basic convolutional block for UNet. + + This module consists of several plain convolutional layers. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers. Default: 2. + stride (int): Whether use stride convolution to downsample + the input feature map. If stride=2, it only uses stride convolution + in the first convolutional layer to downsample the input feature + map. Options are 1 or 2. Default: 1. + dilation (int): Whether use dilated convolution to expand the + receptive field. Set dilation rate of each convolutional layer and + the dilation rate of the first convolutional layer is always 1. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + dcn=None, + plugins=None): + super().__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.with_cp = with_cp + convs = [] + for i in range(num_convs): + convs.append( + ConvModule( + in_channels=in_channels if i == 0 else out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride if i == 0 else 1, + dilation=1 if i == 0 else dilation, + padding=1 if i == 0 else dilation, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + self.convs = nn.Sequential(*convs) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.convs, x) + else: + out = self.convs(x) + return out + + +@MODELS.register_module() +class DeconvModule(nn.Module): + """Deconvolution upsample module in decoder for UNet (2X upsample). + + This module uses deconvolution to upsample feature map in the decoder + of UNet. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + kernel_size (int): Kernel size of the convolutional layer. Default: 4. + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + kernel_size=4, + scale_factor=2): + super().__init__() + + assert (kernel_size - scale_factor >= 0) and\ + (kernel_size - scale_factor) % 2 == 0,\ + f'kernel_size should be greater than or equal to scale_factor '\ + f'and (kernel_size - scale_factor) should be even numbers, '\ + f'while the kernel size is {kernel_size} and scale_factor is '\ + f'{scale_factor}.' + + stride = scale_factor + padding = (kernel_size - scale_factor) // 2 + self.with_cp = with_cp + deconv = nn.ConvTranspose2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding) + + norm_name, norm = build_norm_layer(norm_cfg, out_channels) + activate = build_activation_layer(act_cfg) + self.deconv_upsamping = nn.Sequential(deconv, norm, activate) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.deconv_upsamping, x) + else: + out = self.deconv_upsamping(x) + return out + + +@MODELS.register_module() +class InterpConv(nn.Module): + """Interpolation upsample module in decoder for UNet. + + This module uses interpolation to upsample feature map in the decoder + of UNet. It consists of one interpolation upsample layer and one + convolutional layer. It can be one interpolation upsample layer followed + by one convolutional layer (conv_first=False) or one convolutional layer + followed by one interpolation upsample layer (conv_first=True). + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + conv_first (bool): Whether convolutional layer or interpolation + upsample layer first. Default: False. It means interpolation + upsample layer followed by one convolutional layer. + kernel_size (int): Kernel size of the convolutional layer. Default: 1. + stride (int): Stride of the convolutional layer. Default: 1. + padding (int): Padding of the convolutional layer. Default: 1. + upsample_cfg (dict): Interpolation config of the upsample layer. + Default: dict( + scale_factor=2, mode='bilinear', align_corners=False). + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + conv_cfg=None, + conv_first=False, + kernel_size=1, + stride=1, + padding=0, + upsample_cfg=dict( + scale_factor=2, mode='bilinear', align_corners=False)): + super().__init__() + + self.with_cp = with_cp + conv = ConvModule( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + upsample = Upsample(**upsample_cfg) + if conv_first: + self.interp_upsample = nn.Sequential(conv, upsample) + else: + self.interp_upsample = nn.Sequential(upsample, conv) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.interp_upsample, x) + else: + out = self.interp_upsample(x) + return out + + +@MODELS.register_module() +class UNet(BaseModule): + """UNet backbone. + + This backbone is the implementation of `U-Net: Convolutional Networks + for Biomedical Image Segmentation `_. + + Args: + in_channels (int): Number of input image channels. Default" 3. + base_channels (int): Number of base channels of each stage. + The output channels of the first stage. Default: 64. + num_stages (int): Number of stages in encoder, normally 5. Default: 5. + strides (Sequence[int 1 | 2]): Strides of each stage in encoder. + len(strides) is equal to num_stages. Normally the stride of the + first stage in encoder is 1. If strides[i]=2, it uses stride + convolution to downsample in the correspondence encoder stage. + Default: (1, 1, 1, 1, 1). + enc_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence encoder stage. + Default: (2, 2, 2, 2, 2). + dec_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence decoder stage. + Default: (2, 2, 2, 2). + downsamples (Sequence[int]): Whether use MaxPool to downsample the + feature map after the first stage of encoder + (stages: [1, num_stages)). If the correspondence encoder stage use + stride convolution (strides[i]=2), it will never use MaxPool to + downsample, even downsamples[i-1]=True. + Default: (True, True, True, True). + enc_dilations (Sequence[int]): Dilation rate of each stage in encoder. + Default: (1, 1, 1, 1, 1). + dec_dilations (Sequence[int]): Dilation rate of each stage in decoder. + Default: (1, 1, 1, 1). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + pretrained (str, optional): model pretrained path. Default: None + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + + Notice: + The input image size should be divisible by the whole downsample rate + of the encoder. More detail of the whole downsample rate can be found + in UNet._check_input_divisible. + """ + + def __init__(self, + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + dcn=None, + plugins=None, + pretrained=None, + init_cfg=None): + super().__init__(init_cfg) + + self.pretrained = pretrained + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + assert len(strides) == num_stages, \ + 'The length of strides should be equal to num_stages, '\ + f'while the strides is {strides}, the length of '\ + f'strides is {len(strides)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_num_convs) == num_stages, \ + 'The length of enc_num_convs should be equal to num_stages, '\ + f'while the enc_num_convs is {enc_num_convs}, the length of '\ + f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_num_convs) == (num_stages-1), \ + 'The length of dec_num_convs should be equal to (num_stages-1), '\ + f'while the dec_num_convs is {dec_num_convs}, the length of '\ + f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(downsamples) == (num_stages-1), \ + 'The length of downsamples should be equal to (num_stages-1), '\ + f'while the downsamples is {downsamples}, the length of '\ + f'downsamples is {len(downsamples)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_dilations) == num_stages, \ + 'The length of enc_dilations should be equal to num_stages, '\ + f'while the enc_dilations is {enc_dilations}, the length of '\ + f'enc_dilations is {len(enc_dilations)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_dilations) == (num_stages-1), \ + 'The length of dec_dilations should be equal to (num_stages-1), '\ + f'while the dec_dilations is {dec_dilations}, the length of '\ + f'dec_dilations is {len(dec_dilations)}, and the num_stages is '\ + f'{num_stages}.' + self.num_stages = num_stages + self.strides = strides + self.downsamples = downsamples + self.norm_eval = norm_eval + self.base_channels = base_channels + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + for i in range(num_stages): + enc_conv_block = [] + if i != 0: + if strides[i] == 1 and downsamples[i - 1]: + enc_conv_block.append(nn.MaxPool2d(kernel_size=2)) + upsample = (strides[i] != 1 or downsamples[i - 1]) + self.decoder.append( + UpConvBlock( + conv_block=BasicConvBlock, + in_channels=base_channels * 2**i, + skip_channels=base_channels * 2**(i - 1), + out_channels=base_channels * 2**(i - 1), + num_convs=dec_num_convs[i - 1], + stride=1, + dilation=dec_dilations[i - 1], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + upsample_cfg=upsample_cfg if upsample else None, + dcn=None, + plugins=None)) + + enc_conv_block.append( + BasicConvBlock( + in_channels=in_channels, + out_channels=base_channels * 2**i, + num_convs=enc_num_convs[i], + stride=strides[i], + dilation=enc_dilations[i], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None)) + self.encoder.append(nn.Sequential(*enc_conv_block)) + in_channels = base_channels * 2**i + + def forward(self, x): + self._check_input_divisible(x) + enc_outs = [] + for enc in self.encoder: + x = enc(x) + enc_outs.append(x) + dec_outs = [x] + for i in reversed(range(len(self.decoder))): + x = self.decoder[i](enc_outs[i], x) + dec_outs.append(x) + + return dec_outs + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super().train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + def _check_input_divisible(self, x): + h, w = x.shape[-2:] + whole_downsample_rate = 1 + for i in range(1, self.num_stages): + if self.strides[i] == 2 or self.downsamples[i - 1]: + whole_downsample_rate *= 2 + assert (h % whole_downsample_rate == 0) \ + and (w % whole_downsample_rate == 0),\ + f'The input image size {(h, w)} should be divisible by the whole '\ + f'downsample rate {whole_downsample_rate}, when num_stages is '\ + f'{self.num_stages}, strides is {self.strides}, and downsamples '\ + f'is {self.downsamples}.' diff --git a/mmseg/models/backbones/vit.py b/mmseg/models/backbones/vit.py new file mode 100644 index 0000000000000000000000000000000000000000..dd0f688fcc46680b13904a26f14269b3d19d6ce3 --- /dev/null +++ b/mmseg/models/backbones/vit.py @@ -0,0 +1,501 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import build_norm_layer +from mmcv.cnn.bricks.transformer import FFN, MultiheadAttention +from mmengine.logging import print_log +from mmengine.model import BaseModule, ModuleList +from mmengine.model.weight_init import (constant_init, kaiming_init, + trunc_normal_) +from mmengine.runner.checkpoint import CheckpointLoader, load_state_dict +from torch.nn.modules.batchnorm import _BatchNorm +from torch.nn.modules.utils import _pair as to_2tuple + +from mmseg.registry import MODELS +from ..utils import PatchEmbed, resize + + +class TransformerEncoderLayer(BaseModule): + """Implements one encoder layer in Vision Transformer. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + drop_rate (float): Probability of an element to be zeroed + after the feed forward layer. Default: 0.0. + attn_drop_rate (float): The drop out rate for attention layer. + Default: 0.0. + drop_path_rate (float): stochastic depth rate. Default 0.0. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + qkv_bias (bool): enable bias for qkv if True. Default: True + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default: True. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + num_fcs=2, + qkv_bias=True, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + batch_first=True, + attn_cfg=dict(), + ffn_cfg=dict(), + with_cp=False): + super().__init__() + + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, embed_dims, postfix=1) + self.add_module(self.norm1_name, norm1) + + attn_cfg.update( + dict( + embed_dims=embed_dims, + num_heads=num_heads, + attn_drop=attn_drop_rate, + proj_drop=drop_rate, + batch_first=batch_first, + bias=qkv_bias)) + + self.build_attn(attn_cfg) + + self.norm2_name, norm2 = build_norm_layer( + norm_cfg, embed_dims, postfix=2) + self.add_module(self.norm2_name, norm2) + + ffn_cfg.update( + dict( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + num_fcs=num_fcs, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate) + if drop_path_rate > 0 else None, + act_cfg=act_cfg)) + self.build_ffn(ffn_cfg) + self.with_cp = with_cp + + def build_attn(self, attn_cfg): + self.attn = MultiheadAttention(**attn_cfg) + + def build_ffn(self, ffn_cfg): + self.ffn = FFN(**ffn_cfg) + + @property + def norm1(self): + return getattr(self, self.norm1_name) + + @property + def norm2(self): + return getattr(self, self.norm2_name) + + def forward(self, x): + + def _inner_forward(x): + x = self.attn(self.norm1(x), identity=x) + x = self.ffn(self.norm2(x), identity=x) + return x + + if self.with_cp and x.requires_grad: + x = cp.checkpoint(_inner_forward, x) + else: + x = _inner_forward(x) + return x + + +@MODELS.register_module() +class VisionTransformer(BaseModule): + """Vision Transformer. + + This backbone is the implementation of `An Image is Worth 16x16 Words: + Transformers for Image Recognition at + Scale `_. + + Args: + img_size (int | tuple): Input image size. Default: 224. + patch_size (int): The patch size. Default: 16. + patch_pad (str | int | None): The padding method in patch embedding. + Default: 'corner'. + in_channels (int): Number of input channels. Default: 3. + embed_dims (int): embedding dimension. Default: 768. + num_layers (int): depth of transformer. Default: 12. + num_heads (int): number of attention heads. Default: 12. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + out_origin (bool): Whether to output the original input embedding. + Default: False + out_indices (list | tuple | int): Output from which stages. + Default: -1. + qkv_bias (bool): enable bias for qkv if True. Default: True. + drop_rate (float): Probability of an element to be zeroed. + Default 0.0 + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): stochastic depth rate. Default 0.0 + with_cls_token (bool): Whether concatenating class token into image + tokens as transformer input. Default: True. + output_cls_token (bool): Whether output the cls_token. If set True, + `with_cls_token` must be True. Default: False. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + patch_bias (dict): Whether use bias in convolution of PatchEmbed Block. + Default: True. + patch_norm (bool): Whether to add a norm in PatchEmbed Block. + Default: False. + pre_norm (bool): Whether to add a norm before Transformer Layers. + Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + interpolate_mode (str): Select the interpolate mode for position + embeding vector resize. Default: bicubic. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. Default: False. + frozen_exclude (List): List of parameters that are not to be frozen. + Default: ["all"], "all" means there are no frozen parameters. + pretrained (str, optional): model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + img_size=224, + patch_size=16, + patch_pad='corner', + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_origin=False, + out_indices=-1, + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + with_cls_token=True, + output_cls_token=False, + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + patch_norm=False, + patch_bias=False, + pre_norm=False, + final_norm=False, + interpolate_mode='bicubic', + num_fcs=2, + norm_eval=False, + with_cp=False, + frozen_exclude=['all'], + pretrained=None, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + if isinstance(img_size, int): + img_size = to_2tuple(img_size) + elif isinstance(img_size, tuple): + if len(img_size) == 1: + img_size = to_2tuple(img_size[0]) + assert len(img_size) == 2, \ + f'The size of image should have length 1 or 2, ' \ + f'but got {len(img_size)}' + + if output_cls_token: + assert with_cls_token is True, f'with_cls_token must be True if' \ + f'set output_cls_token to True, but got {with_cls_token}' + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + + self.img_size = img_size + self.patch_size = patch_size + self.interpolate_mode = interpolate_mode + self.norm_eval = norm_eval + self.with_cp = with_cp + self.pretrained = pretrained + self.out_origin = out_origin + self.frozen_exclude = frozen_exclude + + self.patch_embed = PatchEmbed( + in_channels=in_channels, + embed_dims=embed_dims, + conv_type='Conv2d', + kernel_size=patch_size, + stride=patch_size, + padding=patch_pad, + bias=patch_bias, + norm_cfg=norm_cfg if patch_norm else None, + init_cfg=None, + ) + + num_patches = (img_size[0] // patch_size) * \ + (img_size[1] // patch_size) + + self.with_cls_token = with_cls_token + self.output_cls_token = output_cls_token + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims)) + self.pos_embed = nn.Parameter( + torch.zeros(1, num_patches + 1, embed_dims)) + self.drop_after_pos = nn.Dropout(p=drop_rate) + self.pre_norm = pre_norm + + if self.pre_norm: + self.pre_ln_name, pre_ln = build_norm_layer( + norm_cfg, embed_dims, postfix='_pre') + self.add_module(self.pre_ln_name, pre_ln) + + if isinstance(out_indices, int): + if out_indices == -1: + out_indices = num_layers - 1 + self.out_indices = [out_indices] + elif isinstance(out_indices, list) or isinstance(out_indices, tuple): + self.out_indices = out_indices + else: + raise TypeError('out_indices must be type of int, list or tuple') + + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, num_layers) + ] # stochastic depth decay rule + + self.layers = ModuleList() + for i in range(num_layers): + self.layers.append( + TransformerEncoderLayer( + embed_dims=embed_dims, + num_heads=num_heads, + feedforward_channels=mlp_ratio * embed_dims, + attn_drop_rate=attn_drop_rate, + drop_rate=drop_rate, + drop_path_rate=dpr[i], + num_fcs=num_fcs, + qkv_bias=qkv_bias, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + batch_first=True)) + + self.final_norm = final_norm + if final_norm: + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, embed_dims, postfix=1) + self.add_module(self.norm1_name, norm1) + + self._freeze() + + @property + def pre_ln(self): + return getattr(self, self.pre_ln_name) + + @property + def norm1(self): + return getattr(self, self.norm1_name) + + def init_weights(self): + if isinstance(self.init_cfg, dict) and \ + self.init_cfg.get('type') in ['Pretrained', 'Pretrained_Part']: + checkpoint = CheckpointLoader.load_checkpoint( + self.init_cfg['checkpoint'], logger=None, map_location='cpu') + + if self.init_cfg.get('type') == 'Pretrained': + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + elif self.init_cfg.get('type') == 'Pretrained_Part': + state_dict = checkpoint.copy() + para_prefix = 'image_encoder' + prefix_len = len(para_prefix) + 1 + for k, v in checkpoint.items(): + state_dict.pop(k) + if para_prefix in k: + state_dict[k[prefix_len:]] = v + + if 'pos_embed' in state_dict.keys(): + if self.pos_embed.shape != state_dict['pos_embed'].shape: + print_log(msg=f'Resize the pos_embed shape from ' + f'{state_dict["pos_embed"].shape} to ' + f'{self.pos_embed.shape}') + h, w = self.img_size + pos_size = int( + math.sqrt(state_dict['pos_embed'].shape[1] - 1)) + state_dict['pos_embed'] = self.resize_pos_embed( + state_dict['pos_embed'], + (h // self.patch_size, w // self.patch_size), + (pos_size, pos_size), self.interpolate_mode) + + load_state_dict(self, state_dict, strict=False, logger=None) + elif self.init_cfg is not None: + super().init_weights() + else: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + trunc_normal_(self.pos_embed, std=.02) + trunc_normal_(self.cls_token, std=.02) + for n, m in self.named_modules(): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + if 'ffn' in n: + nn.init.normal_(m.bias, mean=0., std=1e-6) + else: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Conv2d): + kaiming_init(m, mode='fan_in', bias=0.) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m, val=1.0, bias=0.) + + def _freeze(self): + if 'all' in self.frozen_exclude: + return + for name, param in self.named_parameters(): + if not any([exclude in name for exclude in self.frozen_exclude]): + param.requires_grad = False + + def _pos_embeding(self, patched_img, hw_shape, pos_embed): + """Positioning embeding method. + + Resize the pos_embed, if the input image size doesn't match + the training size. + Args: + patched_img (torch.Tensor): The patched image, it should be + shape of [B, L1, C]. + hw_shape (tuple): The downsampled image resolution. + pos_embed (torch.Tensor): The pos_embed weighs, it should be + shape of [B, L2, c]. + Return: + torch.Tensor: The pos encoded image feature. + """ + assert patched_img.ndim == 3 and pos_embed.ndim == 3, \ + 'the shapes of patched_img and pos_embed must be [B, L, C]' + x_len, pos_len = patched_img.shape[1], pos_embed.shape[1] + if x_len != pos_len: + if pos_len == (self.img_size[0] // self.patch_size) * ( + self.img_size[1] // self.patch_size) + 1: + pos_h = self.img_size[0] // self.patch_size + pos_w = self.img_size[1] // self.patch_size + else: + raise ValueError( + 'Unexpected shape of pos_embed, got {}.'.format( + pos_embed.shape)) + pos_embed = self.resize_pos_embed(pos_embed, hw_shape, + (pos_h, pos_w), + self.interpolate_mode) + return self.drop_after_pos(patched_img + pos_embed) + + @staticmethod + def resize_pos_embed(pos_embed, input_shpae, pos_shape, mode): + """Resize pos_embed weights. + + Resize pos_embed using bicubic interpolate method. + Args: + pos_embed (torch.Tensor): Position embedding weights. + input_shpae (tuple): Tuple for (downsampled input image height, + downsampled input image width). + pos_shape (tuple): The resolution of downsampled origin training + image. + mode (str): Algorithm used for upsampling: + ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` | + ``'trilinear'``. Default: ``'nearest'`` + Return: + torch.Tensor: The resized pos_embed of shape [B, L_new, C] + """ + assert pos_embed.ndim == 3, 'shape of pos_embed must be [B, L, C]' + pos_h, pos_w = pos_shape + cls_token_weight = pos_embed[:, 0] + pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w):] + pos_embed_weight = pos_embed_weight.reshape( + 1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) + pos_embed_weight = resize( + pos_embed_weight, size=input_shpae, align_corners=False, mode=mode) + cls_token_weight = cls_token_weight.unsqueeze(1) + pos_embed_weight = torch.flatten(pos_embed_weight, 2).transpose(1, 2) + pos_embed = torch.cat((cls_token_weight, pos_embed_weight), dim=1) + return pos_embed + + def forward(self, inputs): + B = inputs.shape[0] + + x, hw_shape = self.patch_embed(inputs) + + # stole cls_tokens impl from Phil Wang, thanks + cls_tokens = self.cls_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(x, hw_shape, self.pos_embed) + + if not self.with_cls_token: + # Remove class token for transformer encoder input + x = x[:, 1:] + + if self.pre_norm: + x = self.pre_ln(x) + + outs = [] + if self.out_origin: + if self.with_cls_token: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + else: + out = x + B, _, C = out.shape + out = out.reshape(B, hw_shape[0], hw_shape[1], + C).permute(0, 3, 1, 2).contiguous() + if self.output_cls_token: + out = [out, x[:, 0]] + outs.append(out) + + for i, layer in enumerate(self.layers): + x = layer(x) + if i == len(self.layers) - 1: + if self.final_norm: + x = self.norm1(x) + if i in self.out_indices: + if self.with_cls_token: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + else: + out = x + B, _, C = out.shape + out = out.reshape(B, hw_shape[0], hw_shape[1], + C).permute(0, 3, 1, 2).contiguous() + if self.output_cls_token: + out = [out, x[:, 0]] + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + super().train(mode) + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, nn.LayerNorm): + m.eval() diff --git a/mmseg/models/backbones/vpd.py b/mmseg/models/backbones/vpd.py new file mode 100644 index 0000000000000000000000000000000000000000..e0536d31c64f82fb66117d9ebd2161d5f2df57bd --- /dev/null +++ b/mmseg/models/backbones/vpd.py @@ -0,0 +1,395 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# ------------------------------------------------------------------------------ +# Adapted from https://github.com/wl-zhao/VPD/blob/main/vpd/models.py +# Original licence: MIT License +# ------------------------------------------------------------------------------ + +import math +from typing import List, Optional, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmengine.model import BaseModule +from mmengine.runner import CheckpointLoader, load_checkpoint + +from mmseg.registry import MODELS +from mmseg.utils import ConfigType, OptConfigType + +try: + from ldm.modules.diffusionmodules.util import timestep_embedding + from ldm.util import instantiate_from_config + has_ldm = True +except ImportError: + has_ldm = False + + +def register_attention_control(model, controller): + """Registers a control function to manage attention within a model. + + Args: + model: The model to which attention is to be registered. + controller: The control function responsible for managing attention. + """ + + def ca_forward(self, place_in_unet): + """Custom forward method for attention. + + Args: + self: Reference to the current object. + place_in_unet: The location in UNet (down/mid/up). + + Returns: + The modified forward method. + """ + + def forward(x, context=None, mask=None): + h = self.heads + is_cross = context is not None + context = context or x # if context is None, use x + + q, k, v = self.to_q(x), self.to_k(context), self.to_v(context) + q, k, v = ( + tensor.view(tensor.shape[0] * h, tensor.shape[1], + tensor.shape[2] // h) for tensor in [q, k, v]) + + sim = torch.matmul(q, k.transpose(-2, -1)) * self.scale + + if mask is not None: + mask = mask.flatten(1).unsqueeze(1).repeat(h, 1, 1) + max_neg_value = -torch.finfo(sim.dtype).max + sim.masked_fill_(~mask, max_neg_value) + + attn = sim.softmax(dim=-1) + attn_mean = attn.view(h, attn.shape[0] // h, + *attn.shape[1:]).mean(0) + controller(attn_mean, is_cross, place_in_unet) + + out = torch.matmul(attn, v) + out = out.view(out.shape[0] // h, out.shape[1], out.shape[2] * h) + return self.to_out(out) + + return forward + + def register_recr(net_, count, place_in_unet): + """Recursive function to register the custom forward method to all + CrossAttention layers. + + Args: + net_: The network layer currently being processed. + count: The current count of layers processed. + place_in_unet: The location in UNet (down/mid/up). + + Returns: + The updated count of layers processed. + """ + if net_.__class__.__name__ == 'CrossAttention': + net_.forward = ca_forward(net_, place_in_unet) + return count + 1 + if hasattr(net_, 'children'): + return sum( + register_recr(child, 0, place_in_unet) + for child in net_.children()) + return count + + cross_att_count = sum( + register_recr(net[1], 0, place) for net, place in [ + (child, 'down') if 'input_blocks' in name else ( + child, 'up') if 'output_blocks' in name else + (child, + 'mid') if 'middle_block' in name else (None, None) # Default case + for name, child in model.diffusion_model.named_children() + ] if net is not None) + + controller.num_att_layers = cross_att_count + + +class AttentionStore: + """A class for storing attention information in the UNet model. + + Attributes: + base_size (int): Base size for storing attention information. + max_size (int): Maximum size for storing attention information. + """ + + def __init__(self, base_size=64, max_size=None): + """Initialize AttentionStore with default or custom sizes.""" + self.reset() + self.base_size = base_size + self.max_size = max_size or (base_size // 2) + self.num_att_layers = -1 + + @staticmethod + def get_empty_store(): + """Returns an empty store for holding attention values.""" + return { + key: [] + for key in [ + 'down_cross', 'mid_cross', 'up_cross', 'down_self', 'mid_self', + 'up_self' + ] + } + + def reset(self): + """Resets the step and attention stores to their initial states.""" + self.cur_step = 0 + self.cur_att_layer = 0 + self.step_store = self.get_empty_store() + self.attention_store = {} + + def forward(self, attn, is_cross: bool, place_in_unet: str): + """Processes a single forward step, storing the attention. + + Args: + attn: The attention tensor. + is_cross (bool): Whether it's cross attention. + place_in_unet (str): The location in UNet (down/mid/up). + + Returns: + The unmodified attention tensor. + """ + key = f"{place_in_unet}_{'cross' if is_cross else 'self'}" + if attn.shape[1] <= (self.max_size)**2: + self.step_store[key].append(attn) + return attn + + def between_steps(self): + """Processes and stores attention information between steps.""" + if not self.attention_store: + self.attention_store = self.step_store + else: + for key in self.attention_store: + self.attention_store[key] = [ + stored + step for stored, step in zip( + self.attention_store[key], self.step_store[key]) + ] + self.step_store = self.get_empty_store() + + def get_average_attention(self): + """Calculates and returns the average attention across all steps.""" + return { + key: [item for item in self.step_store[key]] + for key in self.step_store + } + + def __call__(self, attn, is_cross: bool, place_in_unet: str): + """Allows the class instance to be callable.""" + return self.forward(attn, is_cross, place_in_unet) + + @property + def num_uncond_att_layers(self): + """Returns the number of unconditional attention layers (default is + 0).""" + return 0 + + def step_callback(self, x_t): + """A placeholder for a step callback. + + Returns the input unchanged. + """ + return x_t + + +class UNetWrapper(nn.Module): + """A wrapper for UNet with optional attention mechanisms. + + Args: + unet (nn.Module): The UNet model to wrap + use_attn (bool): Whether to use attention. Defaults to True + base_size (int): Base size for the attention store. Defaults to 512 + max_attn_size (int, optional): Maximum size for the attention store. + Defaults to None + attn_selector (str): The types of attention to use. + Defaults to 'up_cross+down_cross' + """ + + def __init__(self, + unet, + use_attn=True, + base_size=512, + max_attn_size=None, + attn_selector='up_cross+down_cross'): + super().__init__() + + assert has_ldm, 'To use UNetWrapper, please install required ' \ + 'packages via `pip install -r requirements/optional.txt`.' + + self.unet = unet + self.attention_store = AttentionStore( + base_size=base_size // 8, max_size=max_attn_size) + self.attn_selector = attn_selector.split('+') + self.use_attn = use_attn + self.init_sizes(base_size) + if self.use_attn: + register_attention_control(unet, self.attention_store) + + def init_sizes(self, base_size): + """Initialize sizes based on the base size.""" + self.size16 = base_size // 32 + self.size32 = base_size // 16 + self.size64 = base_size // 8 + + def forward(self, x, timesteps=None, context=None, y=None, **kwargs): + """Forward pass through the model.""" + diffusion_model = self.unet.diffusion_model + if self.use_attn: + self.attention_store.reset() + hs, emb, out_list = self._unet_forward(x, timesteps, context, y, + diffusion_model) + if self.use_attn: + self._append_attn_to_output(out_list) + return out_list[::-1] + + def _unet_forward(self, x, timesteps, context, y, diffusion_model): + hs = [] + t_emb = timestep_embedding( + timesteps, diffusion_model.model_channels, repeat_only=False) + emb = diffusion_model.time_embed(t_emb) + h = x.type(diffusion_model.dtype) + for module in diffusion_model.input_blocks: + h = module(h, emb, context) + hs.append(h) + h = diffusion_model.middle_block(h, emb, context) + out_list = [] + for i_out, module in enumerate(diffusion_model.output_blocks): + h = torch.cat([h, hs.pop()], dim=1) + h = module(h, emb, context) + if i_out in [1, 4, 7]: + out_list.append(h) + h = h.type(x.dtype) + out_list.append(h) + return hs, emb, out_list + + def _append_attn_to_output(self, out_list): + avg_attn = self.attention_store.get_average_attention() + attns = {self.size16: [], self.size32: [], self.size64: []} + for k in self.attn_selector: + for up_attn in avg_attn[k]: + size = int(math.sqrt(up_attn.shape[1])) + up_attn = up_attn.transpose(-1, -2).reshape( + *up_attn.shape[:2], size, -1) + attns[size].append(up_attn) + attn16 = torch.stack(attns[self.size16]).mean(0) + attn32 = torch.stack(attns[self.size32]).mean(0) + attn64 = torch.stack(attns[self.size64]).mean(0) if len( + attns[self.size64]) > 0 else None + out_list[1] = torch.cat([out_list[1], attn16], dim=1) + out_list[2] = torch.cat([out_list[2], attn32], dim=1) + if attn64 is not None: + out_list[3] = torch.cat([out_list[3], attn64], dim=1) + + +class TextAdapter(nn.Module): + """A PyTorch Module that serves as a text adapter. + + This module takes text embeddings and adjusts them based on a scaling + factor gamma. + """ + + def __init__(self, text_dim=768): + super().__init__() + self.fc = nn.Sequential( + nn.Linear(text_dim, text_dim), nn.GELU(), + nn.Linear(text_dim, text_dim)) + + def forward(self, texts, gamma): + texts_after = self.fc(texts) + texts = texts + gamma * texts_after + return texts + + +@MODELS.register_module() +class VPD(BaseModule): + """VPD (Visual Perception Diffusion) model. + + .. _`VPD`: https://arxiv.org/abs/2303.02153 + + Args: + diffusion_cfg (dict): Configuration for diffusion model. + class_embed_path (str): Path for class embeddings. + unet_cfg (dict, optional): Configuration for U-Net. + gamma (float, optional): Gamma for text adaptation. Defaults to 1e-4. + class_embed_select (bool, optional): If True, enables class embedding + selection. Defaults to False. + pad_shape (Optional[Union[int, List[int]]], optional): Padding shape. + Defaults to None. + pad_val (Union[int, List[int]], optional): Padding value. + Defaults to 0. + init_cfg (dict, optional): Configuration for network initialization. + """ + + def __init__(self, + diffusion_cfg: ConfigType, + class_embed_path: str, + unet_cfg: OptConfigType = dict(), + gamma: float = 1e-4, + class_embed_select=False, + pad_shape: Optional[Union[int, List[int]]] = None, + pad_val: Union[int, List[int]] = 0, + init_cfg: OptConfigType = None): + + super().__init__(init_cfg=init_cfg) + + assert has_ldm, 'To use VPD model, please install required packages' \ + ' via `pip install -r requirements/optional.txt`.' + + if pad_shape is not None: + if not isinstance(pad_shape, (list, tuple)): + pad_shape = (pad_shape, pad_shape) + + self.pad_shape = pad_shape + self.pad_val = pad_val + + # diffusion model + diffusion_checkpoint = diffusion_cfg.pop('checkpoint', None) + sd_model = instantiate_from_config(diffusion_cfg) + if diffusion_checkpoint is not None: + load_checkpoint(sd_model, diffusion_checkpoint, strict=False) + + self.encoder_vq = sd_model.first_stage_model + self.unet = UNetWrapper(sd_model.model, **unet_cfg) + + # class embeddings & text adapter + class_embeddings = CheckpointLoader.load_checkpoint(class_embed_path) + text_dim = class_embeddings.size(-1) + self.text_adapter = TextAdapter(text_dim=text_dim) + self.class_embed_select = class_embed_select + if class_embed_select: + class_embeddings = torch.cat( + (class_embeddings, class_embeddings.mean(dim=0, + keepdims=True)), + dim=0) + self.register_buffer('class_embeddings', class_embeddings) + self.gamma = nn.Parameter(torch.ones(text_dim) * gamma) + + def forward(self, x): + """Extract features from images.""" + + # calculate cross-attn map + if self.class_embed_select: + if isinstance(x, (tuple, list)): + x, class_ids = x[:2] + class_ids = class_ids.tolist() + else: + class_ids = [-1] * x.size(0) + class_embeddings = self.class_embeddings[class_ids] + c_crossattn = self.text_adapter(class_embeddings, self.gamma) + c_crossattn = c_crossattn.unsqueeze(1) + else: + class_embeddings = self.class_embeddings + c_crossattn = self.text_adapter(class_embeddings, self.gamma) + c_crossattn = c_crossattn.unsqueeze(0).repeat(x.size(0), 1, 1) + + # pad to required input shape for pretrained diffusion model + if self.pad_shape is not None: + pad_width = max(0, self.pad_shape[1] - x.shape[-1]) + pad_height = max(0, self.pad_shape[0] - x.shape[-2]) + x = F.pad(x, (0, pad_width, 0, pad_height), value=self.pad_val) + + # forward the denoising model + with torch.no_grad(): + latents = self.encoder_vq.encode(x).mode().detach() + t = torch.ones((x.shape[0], ), device=x.device).long() + outs = self.unet(latents, t, context=c_crossattn) + + return outs diff --git a/mmseg/models/builder.py b/mmseg/models/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..081c646b49b8ff1ea6c42d1ea4e24e63cdf6b43a --- /dev/null +++ b/mmseg/models/builder.py @@ -0,0 +1,52 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +from mmseg.registry import MODELS + +BACKBONES = MODELS +NECKS = MODELS +HEADS = MODELS +LOSSES = MODELS +SEGMENTORS = MODELS + + +def build_backbone(cfg): + """Build backbone.""" + warnings.warn('``build_backbone`` would be deprecated soon, please use ' + '``mmseg.registry.MODELS.build()`` ') + return BACKBONES.build(cfg) + + +def build_neck(cfg): + """Build neck.""" + warnings.warn('``build_neck`` would be deprecated soon, please use ' + '``mmseg.registry.MODELS.build()`` ') + return NECKS.build(cfg) + + +def build_head(cfg): + """Build head.""" + warnings.warn('``build_head`` would be deprecated soon, please use ' + '``mmseg.registry.MODELS.build()`` ') + return HEADS.build(cfg) + + +def build_loss(cfg): + """Build loss.""" + warnings.warn('``build_loss`` would be deprecated soon, please use ' + '``mmseg.registry.MODELS.build()`` ') + return LOSSES.build(cfg) + + +def build_segmentor(cfg, train_cfg=None, test_cfg=None): + """Build segmentor.""" + if train_cfg is not None or test_cfg is not None: + warnings.warn( + 'train_cfg and test_cfg is deprecated, ' + 'please specify them in model', UserWarning) + assert cfg.get('train_cfg') is None or train_cfg is None, \ + 'train_cfg specified in both outer field and model field ' + assert cfg.get('test_cfg') is None or test_cfg is None, \ + 'test_cfg specified in both outer field and model field ' + return SEGMENTORS.build( + cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/mmseg/models/data_preprocessor.py b/mmseg/models/data_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..8d32bc647b7d48183590408e36ec42ea36aea91c --- /dev/null +++ b/mmseg/models/data_preprocessor.py @@ -0,0 +1,151 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from numbers import Number +from typing import Any, Dict, List, Optional, Sequence + +import torch +from mmengine.model import BaseDataPreprocessor + +from mmseg.registry import MODELS +from mmseg.utils import stack_batch + + +@MODELS.register_module() +class SegDataPreProcessor(BaseDataPreprocessor): + """Image pre-processor for segmentation tasks. + + Comparing with the :class:`mmengine.ImgDataPreprocessor`, + + 1. It won't do normalization if ``mean`` is not specified. + 2. It does normalization and color space conversion after stacking batch. + 3. It supports batch augmentations like mixup and cutmix. + + + It provides the data pre-processing as follows + + - Collate and move data to the target device. + - Pad inputs to the input size with defined ``pad_val``, and pad seg map + with defined ``seg_pad_val``. + - Stack inputs to batch_inputs. + - Convert inputs from bgr to rgb if the shape of input is (3, H, W). + - Normalize image with defined std and mean. + - Do batch augmentations like Mixup and Cutmix during training. + + Args: + mean (Sequence[Number], optional): The pixel mean of R, G, B channels. + Defaults to None. + std (Sequence[Number], optional): The pixel standard deviation of + R, G, B channels. Defaults to None. + size (tuple, optional): Fixed padding size. + size_divisor (int, optional): The divisor of padded size. + pad_val (float, optional): Padding value. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + padding_mode (str): Type of padding. Default: constant. + - constant: pads with a constant value, this value is specified + with pad_val. + bgr_to_rgb (bool): whether to convert image from BGR to RGB. + Defaults to False. + rgb_to_bgr (bool): whether to convert image from RGB to RGB. + Defaults to False. + batch_augments (list[dict], optional): Batch-level augmentations + test_cfg (dict, optional): The padding size config in testing, if not + specify, will use `size` and `size_divisor` params as default. + Defaults to None, only supports keys `size` or `size_divisor`. + """ + + def __init__( + self, + mean: Sequence[Number] = None, + std: Sequence[Number] = None, + size: Optional[tuple] = None, + size_divisor: Optional[int] = None, + pad_val: Number = 0, + seg_pad_val: Number = 255, + bgr_to_rgb: bool = False, + rgb_to_bgr: bool = False, + batch_augments: Optional[List[dict]] = None, + test_cfg: dict = None, + ): + super().__init__() + self.size = size + self.size_divisor = size_divisor + self.pad_val = pad_val + self.seg_pad_val = seg_pad_val + + assert not (bgr_to_rgb and rgb_to_bgr), ( + '`bgr2rgb` and `rgb2bgr` cannot be set to True at the same time') + self.channel_conversion = rgb_to_bgr or bgr_to_rgb + + if mean is not None: + assert std is not None, 'To enable the normalization in ' \ + 'preprocessing, please specify both ' \ + '`mean` and `std`.' + # Enable the normalization in preprocessing. + self._enable_normalize = True + self.register_buffer('mean', + torch.tensor(mean).view(-1, 1, 1), False) + self.register_buffer('std', + torch.tensor(std).view(-1, 1, 1), False) + else: + self._enable_normalize = False + + # TODO: support batch augmentations. + self.batch_augments = batch_augments + + # Support different padding methods in testing + self.test_cfg = test_cfg + + def forward(self, data: dict, training: bool = False) -> Dict[str, Any]: + """Perform normalization、padding and bgr2rgb conversion based on + ``BaseDataPreprocessor``. + + Args: + data (dict): data sampled from dataloader. + training (bool): Whether to enable training time augmentation. + + Returns: + Dict: Data in the same format as the model input. + """ + data = self.cast_data(data) # type: ignore + inputs = data['inputs'] + data_samples = data.get('data_samples', None) + # TODO: whether normalize should be after stack_batch + if self.channel_conversion and inputs[0].size(0) == 3: + inputs = [_input[[2, 1, 0], ...] for _input in inputs] + + inputs = [_input.float() for _input in inputs] + if self._enable_normalize: + inputs = [(_input - self.mean) / self.std for _input in inputs] + + if training: + assert data_samples is not None, ('During training, ', + '`data_samples` must be define.') + inputs, data_samples = stack_batch( + inputs=inputs, + data_samples=data_samples, + size=self.size, + size_divisor=self.size_divisor, + pad_val=self.pad_val, + seg_pad_val=self.seg_pad_val) + + if self.batch_augments is not None: + inputs, data_samples = self.batch_augments( + inputs, data_samples) + else: + img_size = inputs[0].shape[1:] + assert all(input_.shape[1:] == img_size for input_ in inputs), \ + 'The image size in a batch should be the same.' + # pad images when testing + if self.test_cfg: + inputs, padded_samples = stack_batch( + inputs=inputs, + size=self.test_cfg.get('size', None), + size_divisor=self.test_cfg.get('size_divisor', None), + pad_val=self.pad_val, + seg_pad_val=self.seg_pad_val) + for data_sample, pad_info in zip(data_samples, padded_samples): + data_sample.set_metainfo({**pad_info}) + else: + inputs = torch.stack(inputs, dim=0) + + return dict(inputs=inputs, data_samples=data_samples) diff --git a/mmseg/models/decode_heads/__init__.py b/mmseg/models/decode_heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4229763816e4100ab6718e4698a21ce92199371b --- /dev/null +++ b/mmseg/models/decode_heads/__init__.py @@ -0,0 +1,48 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .ann_head import ANNHead +from .apc_head import APCHead +from .aspp_head import ASPPHead +from .cc_head import CCHead +from .da_head import DAHead +from .ddr_head import DDRHead +from .dm_head import DMHead +from .dnl_head import DNLHead +from .dpt_head import DPTHead +from .ema_head import EMAHead +from .enc_head import EncHead +from .fcn_head import FCNHead +from .fpn_head import FPNHead +from .gc_head import GCHead +from .ham_head import LightHamHead +from .isa_head import ISAHead +from .knet_head import IterativeDecodeHead, KernelUpdateHead, KernelUpdator +from .lraspp_head import LRASPPHead +from .mask2former_head import Mask2FormerHead +from .maskformer_head import MaskFormerHead +from .nl_head import NLHead +from .ocr_head import OCRHead +from .pid_head import PIDHead +from .point_head import PointHead +from .psa_head import PSAHead +from .psp_head import PSPHead +from .san_head import SideAdapterCLIPHead +from .segformer_head import SegformerHead +from .segmenter_mask_head import SegmenterMaskTransformerHead +from .sep_aspp_head import DepthwiseSeparableASPPHead +from .sep_fcn_head import DepthwiseSeparableFCNHead +from .setr_mla_head import SETRMLAHead +from .setr_up_head import SETRUPHead +from .stdc_head import STDCHead +from .uper_head import UPerHead +from .vpd_depth_head import VPDDepthHead + +__all__ = [ + 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', + 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', + 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', + 'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead', + 'SETRMLAHead', 'DPTHead', 'SETRMLAHead', 'SegmenterMaskTransformerHead', + 'SegformerHead', 'ISAHead', 'STDCHead', 'IterativeDecodeHead', + 'KernelUpdateHead', 'KernelUpdator', 'MaskFormerHead', 'Mask2FormerHead', + 'LightHamHead', 'PIDHead', 'DDRHead', 'VPDDepthHead', 'SideAdapterCLIPHead' +] diff --git a/mmseg/models/decode_heads/__pycache__/__init__.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7cf7091478083396b6876161fcd7ea568d461cb4 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/ann_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/ann_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2145e68d82eccc95c36a833bc6640bfc21a7bec0 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/ann_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/apc_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/apc_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..82854773e7b8138e06eec594572146c342b7410c Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/apc_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/aspp_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/aspp_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8227eb93f5a68c9aa813f109d1233e850d7a685 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/aspp_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/cascade_decode_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/cascade_decode_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c87b149687d0f2e44ee49aad586b8197a804fce Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/cascade_decode_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/cc_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/cc_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2d4e2b5788892494268f20802a7fc5a30d19526d Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/cc_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/da_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/da_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2874153058664690c1d8fa410a1098464565806 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/da_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/ddr_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/ddr_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21eeb95134dafe0b077beeb76fa9320edbe9fa2a Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/ddr_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/decode_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/decode_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..044f17fba501ec7b5a670477a2a5f795de98103b Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/decode_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/dm_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/dm_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3cd616d11ef5b4855b1221f42b2b2bef4b2039e Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/dm_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/dnl_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/dnl_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..547dabcc0e4295b1427056bf1c610cb8e71788be Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/dnl_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/dpt_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/dpt_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21f7fca56becc43dda73996c30cbe86037cbfc68 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/dpt_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/ema_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/ema_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f438fa37b6353eedbae94260a1fc88a8217efe13 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/ema_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/enc_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/enc_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..098a80b70ff8f2ba93ec29c6fdec4fad2e1d94e5 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/enc_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/fcn_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/fcn_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6595b4c281cdebfd9f42b41e206a8df446534e5a Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/fcn_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/fpn_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/fpn_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c0927e4d67a9c0742d02aac40beea637cf25505 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/fpn_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/gc_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/gc_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dad80536c52b760a1959b4d9314fc99a225e9747 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/gc_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/ham_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/ham_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2af326df40352440ecdd1129e56a94026dafcced Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/ham_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/isa_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/isa_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..675a67786203a8950af3337be5d8884e22d81c04 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/isa_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/knet_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/knet_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d0bb82ff059f27f016cfff4c92f246ba265c3499 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/knet_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/lraspp_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/lraspp_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e50d5b01ba16fc1ddb882ce78ae0de25cdda212 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/lraspp_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/mask2former_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/mask2former_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8a42bfc862cba9505822b7442ff36d98e009585 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/mask2former_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/maskformer_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/maskformer_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2a69e7fb73f37fd4d286969039e48442d0f9e17 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/maskformer_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/nl_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/nl_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e1c61c5b9dff907bc89e6b8f29fd584b86e37a71 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/nl_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/ocr_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/ocr_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53791e78dce2b58d4bceea5925822cb81e996957 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/ocr_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/pid_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/pid_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58171b7d42d18d3d82d79c0c4cbb01e7c3e37ce8 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/pid_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/point_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/point_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17ea706a80528182dcca4365083ce1f21eccc3e3 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/point_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/psa_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/psa_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d26eae8ecd03bbf99ca84c91ecbcf84f4133c175 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/psa_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/psp_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/psp_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8c001930095dc4e7ef369c6426b72d71ad8e010 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/psp_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/san_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/san_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..576a011f0d815ce21921f720a5bde00a6546fc14 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/san_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/segformer_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/segformer_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ffdf7cbfc75b4e7fe10806af32906ed4c232729 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/segformer_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/segmenter_mask_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/segmenter_mask_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b0f1d79bd2a6b4642f997fda01ed8ff7ea84c77 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/segmenter_mask_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/sep_aspp_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/sep_aspp_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fbd22f4013b1de969b1d8c7bd57014ef4c4aa36a Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/sep_aspp_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/sep_fcn_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/sep_fcn_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..500646487cccb97675642d8c8e551faf71084aed Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/sep_fcn_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/setr_mla_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/setr_mla_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1940ffc34a1b933313588d9f9a0e2402351373bd Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/setr_mla_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/setr_up_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/setr_up_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e177f1987f1c77144b2d32aeca4b2866f6d919c4 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/setr_up_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/stdc_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/stdc_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2971d0d617d44a37eb1fb701e85528a6cef04d17 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/stdc_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/uper_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/uper_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fae6abddb43f89542e252f9ff0dbb16eb16e068 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/uper_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/__pycache__/vpd_depth_head.cpython-39.pyc b/mmseg/models/decode_heads/__pycache__/vpd_depth_head.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f90ae3bfd2545a77d9272ec18332005d339aad58 Binary files /dev/null and b/mmseg/models/decode_heads/__pycache__/vpd_depth_head.cpython-39.pyc differ diff --git a/mmseg/models/decode_heads/ann_head.py b/mmseg/models/decode_heads/ann_head.py new file mode 100644 index 0000000000000000000000000000000000000000..2b40ef5aa1da0bc2473597fedca5b3f33973beb0 --- /dev/null +++ b/mmseg/models/decode_heads/ann_head.py @@ -0,0 +1,245 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PPMConcat(nn.ModuleList): + """Pyramid Pooling Module that only concat the features of each layer. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + """ + + def __init__(self, pool_scales=(1, 3, 6, 8)): + super().__init__( + [nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) + + def forward(self, feats): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(feats) + ppm_outs.append(ppm_out.view(*feats.shape[:2], -1)) + concat_outs = torch.cat(ppm_outs, dim=2) + return concat_outs + + +class SelfAttentionBlock(_SelfAttentionBlock): + """Make a ANN used SelfAttentionBlock. + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_scale (int): The scale of query feature map. + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, share_key_query, query_scale, key_pool_scales, + conv_cfg, norm_cfg, act_cfg): + key_psp = PPMConcat(key_pool_scales) + if query_scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=query_scale) + else: + query_downsample = None + super().__init__( + key_in_channels=low_in_channels, + query_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=share_key_query, + query_downsample=query_downsample, + key_downsample=key_psp, + key_query_num_convs=1, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + +class AFNB(nn.Module): + """Asymmetric Fusion Non-local Block(AFNB) + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + and query projection. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, query_scales, key_pool_scales, conv_cfg, + norm_cfg, act_cfg): + super().__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=False, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + out_channels + high_in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, low_feats, high_feats): + """Forward function.""" + priors = [stage(high_feats, low_feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, high_feats], 1)) + return output + + +class APNB(nn.Module): + """Asymmetric Pyramid Non-local Block (APNB) + + Args: + in_channels (int): Input channels of key/query feature, + which is the key feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, in_channels, channels, out_channels, query_scales, + key_pool_scales, conv_cfg, norm_cfg, act_cfg): + super().__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=in_channels, + high_in_channels=in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=True, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + 2 * in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, feats): + """Forward function.""" + priors = [stage(feats, feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, feats], 1)) + return output + + +@MODELS.register_module() +class ANNHead(BaseDecodeHead): + """Asymmetric Non-local Neural Networks for Semantic Segmentation. + + This head is the implementation of `ANNNet + `_. + + Args: + project_channels (int): Projection channels for Nonlocal. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): The pooling scales of key feature map. + Default: (1, 3, 6, 8). + """ + + def __init__(self, + project_channels, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + **kwargs): + super().__init__(input_transform='multiple_select', **kwargs) + assert len(self.in_channels) == 2 + low_in_channels, high_in_channels = self.in_channels + self.project_channels = project_channels + self.fusion = AFNB( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + out_channels=high_in_channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + high_in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.context = APNB( + in_channels=self.channels, + out_channels=self.channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + low_feats, high_feats = self._transform_inputs(inputs) + output = self.fusion(low_feats, high_feats) + output = self.dropout(output) + output = self.bottleneck(output) + output = self.context(output) + output = self.cls_seg(output) + + return output diff --git a/mmseg/models/decode_heads/apc_head.py b/mmseg/models/decode_heads/apc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..728f39659c63680944306fddc9e33b7c9172c1ba --- /dev/null +++ b/mmseg/models/decode_heads/apc_head.py @@ -0,0 +1,159 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import resize +from .decode_head import BaseDecodeHead + + +class ACM(nn.Module): + """Adaptive Context Module used in APCNet. + + Args: + pool_scale (int): Pooling scale used in Adaptive Context + Module to extract region features. + fusion (bool): Add one conv to fuse residual feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super().__init__() + self.pool_scale = pool_scale + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.pooled_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.global_info = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.gla = nn.Conv2d(self.channels, self.pool_scale**2, 1, 1, 0) + + self.residual_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + pooled_x = F.adaptive_avg_pool2d(x, self.pool_scale) + # [batch_size, channels, h, w] + x = self.input_redu_conv(x) + # [batch_size, channels, pool_scale, pool_scale] + pooled_x = self.pooled_redu_conv(pooled_x) + batch_size = x.size(0) + # [batch_size, pool_scale * pool_scale, channels] + pooled_x = pooled_x.view(batch_size, self.channels, + -1).permute(0, 2, 1).contiguous() + # [batch_size, h * w, pool_scale * pool_scale] + affinity_matrix = self.gla(x + resize( + self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:]) + ).permute(0, 2, 3, 1).reshape( + batch_size, -1, self.pool_scale**2) + affinity_matrix = F.sigmoid(affinity_matrix) + # [batch_size, h * w, channels] + z_out = torch.matmul(affinity_matrix, pooled_x) + # [batch_size, channels, h * w] + z_out = z_out.permute(0, 2, 1).contiguous() + # [batch_size, channels, h, w] + z_out = z_out.view(batch_size, self.channels, x.size(2), x.size(3)) + z_out = self.residual_conv(z_out) + z_out = F.relu(z_out + x) + if self.fusion: + z_out = self.fusion_conv(z_out) + + return z_out + + +@MODELS.register_module() +class APCHead(BaseDecodeHead): + """Adaptive Pyramid Context Network for Semantic Segmentation. + + This head is the implementation of + `APCNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Adaptive Context + Module. Default: (1, 2, 3, 6). + fusion (bool): Add one conv to fuse residual feature. + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs): + super().__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.fusion = fusion + acm_modules = [] + for pool_scale in self.pool_scales: + acm_modules.append( + ACM(pool_scale, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.acm_modules = nn.ModuleList(acm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + acm_outs = [x] + for acm_module in self.acm_modules: + acm_outs.append(acm_module(x)) + acm_outs = torch.cat(acm_outs, dim=1) + output = self.bottleneck(acm_outs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/aspp_head.py b/mmseg/models/decode_heads/aspp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..6d7185d7de58d35ef17e5d54e0e75b045e8724c4 --- /dev/null +++ b/mmseg/models/decode_heads/aspp_head.py @@ -0,0 +1,122 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import resize +from .decode_head import BaseDecodeHead + + +class ASPPModule(nn.ModuleList): + """Atrous Spatial Pyramid Pooling (ASPP) Module. + + Args: + dilations (tuple[int]): Dilation rate of each layer. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, + act_cfg): + super().__init__() + self.dilations = dilations + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for dilation in dilations: + self.append( + ConvModule( + self.in_channels, + self.channels, + 1 if dilation == 1 else 3, + dilation=dilation, + padding=0 if dilation == 1 else dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, x): + """Forward function.""" + aspp_outs = [] + for aspp_module in self: + aspp_outs.append(aspp_module(x)) + + return aspp_outs + + +@MODELS.register_module() +class ASPPHead(BaseDecodeHead): + """Rethinking Atrous Convolution for Semantic Image Segmentation. + + This head is the implementation of `DeepLabV3 + `_. + + Args: + dilations (tuple[int]): Dilation rates for ASPP module. + Default: (1, 6, 12, 18). + """ + + def __init__(self, dilations=(1, 6, 12, 18), **kwargs): + super().__init__(**kwargs) + assert isinstance(dilations, (list, tuple)) + self.dilations = dilations + self.image_pool = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.aspp_modules = ASPPModule( + dilations, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + (len(dilations) + 1) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _forward_feature(self, inputs): + """Forward function for feature maps before classifying each pixel with + ``self.cls_seg`` fc. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + feats (Tensor): A tensor of shape (batch_size, self.channels, + H, W) which is feature map for last layer of decoder head. + """ + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + feats = self.bottleneck(aspp_outs) + return feats + + def forward(self, inputs): + """Forward function.""" + output = self._forward_feature(inputs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/cascade_decode_head.py b/mmseg/models/decode_heads/cascade_decode_head.py new file mode 100644 index 0000000000000000000000000000000000000000..fe2bcb9302235e3881696dff6657e3e7fb12609b --- /dev/null +++ b/mmseg/models/decode_heads/cascade_decode_head.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod +from typing import List + +from torch import Tensor + +from mmseg.utils import ConfigType +from .decode_head import BaseDecodeHead + + +class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): + """Base class for cascade decode head used in + :class:`CascadeEncoderDecoder.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @abstractmethod + def forward(self, inputs, prev_output): + """Placeholder of forward function.""" + pass + + def loss(self, inputs: List[Tensor], prev_output: Tensor, + batch_data_samples: List[dict], train_cfg: ConfigType) -> Tensor: + """Forward function for training. + + Args: + inputs (List[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + batch_data_samples (List[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `metainfo` and `gt_sem_seg`. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs, prev_output) + losses = self.loss_by_feat(seg_logits, batch_data_samples) + + return losses + + def predict(self, inputs: List[Tensor], prev_output: Tensor, + batch_img_metas: List[dict], tese_cfg: ConfigType): + """Forward function for testing. + + Args: + inputs (List[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + batch_img_metas (dict): List Image info where each dict may also + contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', and 'pad_shape'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + seg_logits = self.forward(inputs, prev_output) + + return self.predict_by_feat(seg_logits, batch_img_metas) diff --git a/mmseg/models/decode_heads/cc_head.py b/mmseg/models/decode_heads/cc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..e9075a2648d77f6bca6bb29f3e7db52a329f7afb --- /dev/null +++ b/mmseg/models/decode_heads/cc_head.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.registry import MODELS +from .fcn_head import FCNHead + +try: + from mmcv.ops import CrissCrossAttention +except ModuleNotFoundError: + CrissCrossAttention = None + + +@MODELS.register_module() +class CCHead(FCNHead): + """CCNet: Criss-Cross Attention for Semantic Segmentation. + + This head is the implementation of `CCNet + `_. + + Args: + recurrence (int): Number of recurrence of Criss Cross Attention + module. Default: 2. + """ + + def __init__(self, recurrence=2, **kwargs): + if CrissCrossAttention is None: + raise RuntimeError('Please install mmcv-full for ' + 'CrissCrossAttention ops') + super().__init__(num_convs=2, **kwargs) + self.recurrence = recurrence + self.cca = CrissCrossAttention(self.channels) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + for _ in range(self.recurrence): + output = self.cca(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/da_head.py b/mmseg/models/decode_heads/da_head.py new file mode 100644 index 0000000000000000000000000000000000000000..d87214365d2f8695b60ccab0c1850669ff8dd295 --- /dev/null +++ b/mmseg/models/decode_heads/da_head.py @@ -0,0 +1,184 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Tuple + +import torch +import torch.nn.functional as F +from mmcv.cnn import ConvModule, Scale +from torch import Tensor, nn + +from mmseg.registry import MODELS +from mmseg.utils import SampleList, add_prefix +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PAM(_SelfAttentionBlock): + """Position Attention Module (PAM) + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + """ + + def __init__(self, in_channels, channels): + super().__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=1, + key_query_norm=False, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=False, + with_out=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None) + + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + out = super().forward(x, x) + + out = self.gamma(out) + x + return out + + +class CAM(nn.Module): + """Channel Attention Module (CAM)""" + + def __init__(self): + super().__init__() + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + batch_size, channels, height, width = x.size() + proj_query = x.view(batch_size, channels, -1) + proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max( + energy, -1, keepdim=True)[0].expand_as(energy) - energy + attention = F.softmax(energy_new, dim=-1) + proj_value = x.view(batch_size, channels, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(batch_size, channels, height, width) + + out = self.gamma(out) + x + return out + + +@MODELS.register_module() +class DAHead(BaseDecodeHead): + """Dual Attention Network for Scene Segmentation. + + This head is the implementation of `DANet + `_. + + Args: + pam_channels (int): The channels of Position Attention Module(PAM). + """ + + def __init__(self, pam_channels, **kwargs): + super().__init__(**kwargs) + self.pam_channels = pam_channels + self.pam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam = PAM(self.channels, pam_channels) + self.pam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + self.cam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam = CAM() + self.cam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + def pam_cls_seg(self, feat): + """PAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.pam_conv_seg(feat) + return output + + def cam_cls_seg(self, feat): + """CAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.cam_conv_seg(feat) + return output + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + pam_feat = self.pam_in_conv(x) + pam_feat = self.pam(pam_feat) + pam_feat = self.pam_out_conv(pam_feat) + pam_out = self.pam_cls_seg(pam_feat) + + cam_feat = self.cam_in_conv(x) + cam_feat = self.cam(cam_feat) + cam_feat = self.cam_out_conv(cam_feat) + cam_out = self.cam_cls_seg(cam_feat) + + feat_sum = pam_feat + cam_feat + pam_cam_out = self.cls_seg(feat_sum) + + return pam_cam_out, pam_out, cam_out + + def predict(self, inputs, batch_img_metas: List[dict], test_cfg, + **kwargs) -> List[Tensor]: + """Forward function for testing, only ``pam_cam`` is used.""" + seg_logits = self.forward(inputs)[0] + return self.predict_by_feat(seg_logits, batch_img_metas, **kwargs) + + def loss_by_feat(self, seg_logit: Tuple[Tensor], + batch_data_samples: SampleList, **kwargs) -> dict: + """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" + pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit + loss = dict() + loss.update( + add_prefix( + super().loss_by_feat(pam_cam_seg_logit, batch_data_samples), + 'pam_cam')) + loss.update( + add_prefix(super().loss_by_feat(pam_seg_logit, batch_data_samples), + 'pam')) + loss.update( + add_prefix(super().loss_by_feat(cam_seg_logit, batch_data_samples), + 'cam')) + return loss diff --git a/mmseg/models/decode_heads/ddr_head.py b/mmseg/models/decode_heads/ddr_head.py new file mode 100644 index 0000000000000000000000000000000000000000..ba26d6503c09d7efb3ca6664c7baf59c9e6e3ce9 --- /dev/null +++ b/mmseg/models/decode_heads/ddr_head.py @@ -0,0 +1,116 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple, Union + +import torch.nn as nn +from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer +from torch import Tensor + +from mmseg.models.decode_heads.decode_head import BaseDecodeHead +from mmseg.models.losses import accuracy +from mmseg.models.utils import resize +from mmseg.registry import MODELS +from mmseg.utils import OptConfigType, SampleList + + +@MODELS.register_module() +class DDRHead(BaseDecodeHead): + """Decode head for DDRNet. + + Args: + in_channels (int): Number of input channels. + channels (int): Number of output channels. + num_classes (int): Number of classes. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + """ + + def __init__(self, + in_channels: int, + channels: int, + num_classes: int, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + **kwargs): + super().__init__( + in_channels, + channels, + num_classes=num_classes, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs) + + self.head = self._make_base_head(self.in_channels, self.channels) + self.aux_head = self._make_base_head(self.in_channels // 2, + self.channels) + self.aux_cls_seg = nn.Conv2d( + self.channels, self.out_channels, kernel_size=1) + + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def forward( + self, + inputs: Union[Tensor, + Tuple[Tensor]]) -> Union[Tensor, Tuple[Tensor]]: + if self.training: + c3_feat, c5_feat = inputs + x_c = self.head(c5_feat) + x_c = self.cls_seg(x_c) + x_s = self.aux_head(c3_feat) + x_s = self.aux_cls_seg(x_s) + + return x_c, x_s + else: + x_c = self.head(inputs) + x_c = self.cls_seg(x_c) + return x_c + + def _make_base_head(self, in_channels: int, + channels: int) -> nn.Sequential: + layers = [ + ConvModule( + in_channels, + channels, + kernel_size=3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + order=('norm', 'act', 'conv')), + build_norm_layer(self.norm_cfg, channels)[1], + build_activation_layer(self.act_cfg), + ] + + return nn.Sequential(*layers) + + def loss_by_feat(self, seg_logits: Tuple[Tensor], + batch_data_samples: SampleList) -> dict: + loss = dict() + context_logit, spatial_logit = seg_logits + seg_label = self._stack_batch_gt(batch_data_samples) + + context_logit = resize( + context_logit, + size=seg_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + spatial_logit = resize( + spatial_logit, + size=seg_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + seg_label = seg_label.squeeze(1) + + loss['loss_context'] = self.loss_decode[0](context_logit, seg_label) + loss['loss_spatial'] = self.loss_decode[1](spatial_logit, seg_label) + loss['acc_seg'] = accuracy( + context_logit, seg_label, ignore_index=self.ignore_index) + + return loss diff --git a/mmseg/models/decode_heads/decode_head.py b/mmseg/models/decode_heads/decode_head.py new file mode 100644 index 0000000000000000000000000000000000000000..179d871fd18d1af3e06a62e1e731572fb85683e2 --- /dev/null +++ b/mmseg/models/decode_heads/decode_head.py @@ -0,0 +1,366 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from abc import ABCMeta, abstractmethod +from typing import List, Tuple + +import torch +import torch.nn as nn +from mmengine.model import BaseModule +from torch import Tensor + +from mmseg.structures import build_pixel_sampler +from mmseg.utils import ConfigType, SampleList +from ..builder import build_loss +from ..losses import accuracy +from ..utils import resize + + +class BaseDecodeHead(BaseModule, metaclass=ABCMeta): + """Base class for BaseDecodeHead. + + 1. The ``init_weights`` method is used to initialize decode_head's + model parameters. After segmentor initialization, ``init_weights`` + is triggered when ``segmentor.init_weights()`` is called externally. + + 2. The ``loss`` method is used to calculate the loss of decode_head, + which includes two steps: (1) the decode_head model performs forward + propagation to obtain the feature maps (2) The ``loss_by_feat`` method + is called based on the feature maps to calculate the loss. + + .. code:: text + + loss(): forward() -> loss_by_feat() + + 3. The ``predict`` method is used to predict segmentation results, + which includes two steps: (1) the decode_head model performs forward + propagation to obtain the feature maps (2) The ``predict_by_feat`` method + is called based on the feature maps to predict segmentation results + including post-processing. + + .. code:: text + + predict(): forward() -> predict_by_feat() + + Args: + in_channels (int|Sequence[int]): Input channels. + channels (int): Channels after modules, before conv_seg. + num_classes (int): Number of classes. + out_channels (int): Output channels of conv_seg. Default: None. + threshold (float): Threshold for binary segmentation in the case of + `num_classes==1`. Default: None. + dropout_ratio (float): Ratio of dropout layer. Default: 0.1. + conv_cfg (dict|None): Config of conv layers. Default: None. + norm_cfg (dict|None): Config of norm layers. Default: None. + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU') + in_index (int|Sequence[int]): Input feature index. Default: -1 + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + Default: None. + loss_decode (dict | Sequence[dict]): Config of decode loss. + The `loss_name` is property of corresponding loss function which + could be shown in training log. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_ce'. + e.g. dict(type='CrossEntropyLoss'), + [dict(type='CrossEntropyLoss', loss_name='loss_ce'), + dict(type='DiceLoss', loss_name='loss_dice')] + Default: dict(type='CrossEntropyLoss'). + ignore_index (int | None): The label index to be ignored. When using + masked BCE loss, ignore_index should be set to None. Default: 255. + sampler (dict|None): The config of segmentation map sampler. + Default: None. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + """ + + def __init__(self, + in_channels, + channels, + *, + num_classes, + out_channels=None, + threshold=None, + dropout_ratio=0.1, + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + in_index=-1, + input_transform=None, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + ignore_index=255, + sampler=None, + align_corners=False, + init_cfg=dict( + type='Normal', std=0.01, override=dict(name='conv_seg'))): + super().__init__(init_cfg) + self._init_inputs(in_channels, in_index, input_transform) + self.channels = channels + self.dropout_ratio = dropout_ratio + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.in_index = in_index + + self.ignore_index = ignore_index + self.align_corners = align_corners + + if out_channels is None: + if num_classes == 2: + warnings.warn('For binary segmentation, we suggest using' + '`out_channels = 1` to define the output' + 'channels of segmentor, and use `threshold`' + 'to convert `seg_logits` into a prediction' + 'applying a threshold') + out_channels = num_classes + + if out_channels != num_classes and out_channels != 1: + raise ValueError( + 'out_channels should be equal to num_classes,' + 'except binary segmentation set out_channels == 1 and' + f'num_classes == 2, but got out_channels={out_channels}' + f'and num_classes={num_classes}') + + if out_channels == 1 and threshold is None: + threshold = 0.3 + warnings.warn('threshold is not defined for binary, and defaults' + 'to 0.3') + self.num_classes = num_classes + self.out_channels = out_channels + self.threshold = threshold + + if isinstance(loss_decode, dict): + self.loss_decode = build_loss(loss_decode) + elif isinstance(loss_decode, (list, tuple)): + self.loss_decode = nn.ModuleList() + for loss in loss_decode: + self.loss_decode.append(build_loss(loss)) + else: + raise TypeError(f'loss_decode must be a dict or sequence of dict,\ + but got {type(loss_decode)}') + + if sampler is not None: + self.sampler = build_pixel_sampler(sampler, context=self) + else: + self.sampler = None + + self.conv_seg = nn.Conv2d(channels, self.out_channels, kernel_size=1) + if dropout_ratio > 0: + self.dropout = nn.Dropout2d(dropout_ratio) + else: + self.dropout = None + + def extra_repr(self): + """Extra repr.""" + s = f'input_transform={self.input_transform}, ' \ + f'ignore_index={self.ignore_index}, ' \ + f'align_corners={self.align_corners}' + return s + + def _init_inputs(self, in_channels, in_index, input_transform): + """Check and initialize input transforms. + + The in_channels, in_index and input_transform must match. + Specifically, when input_transform is None, only single feature map + will be selected. So in_channels and in_index must be of type int. + When input_transform + + Args: + in_channels (int|Sequence[int]): Input channels. + in_index (int|Sequence[int]): Input feature index. + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + """ + + if input_transform is not None: + assert input_transform in ['resize_concat', 'multiple_select'] + self.input_transform = input_transform + self.in_index = in_index + if input_transform is not None: + assert isinstance(in_channels, (list, tuple)) + assert isinstance(in_index, (list, tuple)) + assert len(in_channels) == len(in_index) + if input_transform == 'resize_concat': + self.in_channels = sum(in_channels) + else: + self.in_channels = in_channels + else: + assert isinstance(in_channels, int) + assert isinstance(in_index, int) + self.in_channels = in_channels + + def _transform_inputs(self, inputs): + """Transform inputs for decoder. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + Tensor: The transformed inputs + """ + + if self.input_transform == 'resize_concat': + inputs = [inputs[i] for i in self.in_index] + upsampled_inputs = [ + resize( + input=x, + size=inputs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) for x in inputs + ] + inputs = torch.cat(upsampled_inputs, dim=1) + elif self.input_transform == 'multiple_select': + inputs = [inputs[i] for i in self.in_index] + else: + inputs = inputs[self.in_index] + + return inputs + + @abstractmethod + def forward(self, inputs): + """Placeholder of forward function.""" + pass + + def cls_seg(self, feat): + """Classify each pixel.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.conv_seg(feat) + return output + + def loss(self, inputs: Tuple[Tensor], batch_data_samples: SampleList, + train_cfg: ConfigType) -> dict: + """Forward function for training. + + Args: + inputs (Tuple[Tensor]): List of multi-level img features. + batch_data_samples (list[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `img_metas` or `gt_semantic_seg`. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs) + losses = self.loss_by_feat(seg_logits, batch_data_samples) + return losses + + def predict(self, inputs: Tuple[Tensor], batch_img_metas: List[dict], + test_cfg: ConfigType) -> Tensor: + """Forward function for prediction. + + Args: + inputs (Tuple[Tensor]): List of multi-level img features. + batch_img_metas (dict): List Image info where each dict may also + contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', and 'pad_shape'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Outputs segmentation logits map. + """ + seg_logits = self.forward(inputs) + + return self.predict_by_feat(seg_logits, batch_img_metas) + + def _stack_batch_gt(self, batch_data_samples: SampleList) -> Tensor: + gt_semantic_segs = [ + data_sample.gt_sem_seg.data for data_sample in batch_data_samples + ] + return torch.stack(gt_semantic_segs, dim=0) + + def loss_by_feat(self, seg_logits: Tensor, + batch_data_samples: SampleList) -> dict: + """Compute segmentation loss. + + Args: + seg_logits (Tensor): The output from decode head forward function. + batch_data_samples (List[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `metainfo` and `gt_sem_seg`. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + + seg_label = self._stack_batch_gt(batch_data_samples) + loss = dict() + seg_logits = resize( + input=seg_logits, + size=seg_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + if self.sampler is not None: + seg_weight = self.sampler.sample(seg_logits, seg_label) + else: + seg_weight = None + seg_label = seg_label.squeeze(1) + + if not isinstance(self.loss_decode, nn.ModuleList): + losses_decode = [self.loss_decode] + else: + losses_decode = self.loss_decode + for loss_decode in losses_decode: + if loss_decode.loss_name not in loss: + loss[loss_decode.loss_name] = loss_decode( + seg_logits, + seg_label, + weight=seg_weight, + ignore_index=self.ignore_index) + else: + loss[loss_decode.loss_name] += loss_decode( + seg_logits, + seg_label, + weight=seg_weight, + ignore_index=self.ignore_index) + + loss['acc_seg'] = accuracy( + seg_logits, seg_label, ignore_index=self.ignore_index) + return loss + + def predict_by_feat(self, seg_logits: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Transform a batch of output seg_logits to the input shape. + + Args: + seg_logits (Tensor): The output from decode head forward function. + batch_img_metas (list[dict]): Meta information of each image, e.g., + image size, scaling factor, etc. + + Returns: + Tensor: Outputs segmentation logits map. + """ + + if isinstance(batch_img_metas[0]['img_shape'], torch.Size): + # slide inference + size = batch_img_metas[0]['img_shape'] + elif 'pad_shape' in batch_img_metas[0]: + size = batch_img_metas[0]['pad_shape'][:2] + else: + size = batch_img_metas[0]['img_shape'] + + seg_logits = resize( + input=seg_logits, + size=size, + mode='bilinear', + align_corners=self.align_corners) + return seg_logits diff --git a/mmseg/models/decode_heads/dm_head.py b/mmseg/models/decode_heads/dm_head.py new file mode 100644 index 0000000000000000000000000000000000000000..7694abd8ac3a470d543c580bd97adceb5b647f7c --- /dev/null +++ b/mmseg/models/decode_heads/dm_head.py @@ -0,0 +1,141 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer + +from mmseg.registry import MODELS +from .decode_head import BaseDecodeHead + + +class DCM(nn.Module): + """Dynamic Convolutional Module used in DMNet. + + Args: + filter_size (int): The filter size of generated convolution kernel + used in Dynamic Convolutional Module. + fusion (bool): Add one conv to fuse DCM output feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super().__init__() + self.filter_size = filter_size + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1, + 0) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.norm_cfg is not None: + self.norm = build_norm_layer(self.norm_cfg, self.channels)[1] + else: + self.norm = None + self.activate = build_activation_layer(self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + generated_filter = self.filter_gen_conv( + F.adaptive_avg_pool2d(x, self.filter_size)) + x = self.input_redu_conv(x) + b, c, h, w = x.shape + # [1, b * c, h, w], c = self.channels + x = x.view(1, b * c, h, w) + # [b * c, 1, filter_size, filter_size] + generated_filter = generated_filter.view(b * c, 1, self.filter_size, + self.filter_size) + pad = (self.filter_size - 1) // 2 + if (self.filter_size - 1) % 2 == 0: + p2d = (pad, pad, pad, pad) + else: + p2d = (pad + 1, pad, pad + 1, pad) + x = F.pad(input=x, pad=p2d, mode='constant', value=0) + # [1, b * c, h, w] + output = F.conv2d(input=x, weight=generated_filter, groups=b * c) + # [b, c, h, w] + output = output.view(b, c, h, w) + if self.norm is not None: + output = self.norm(output) + output = self.activate(output) + + if self.fusion: + output = self.fusion_conv(output) + + return output + + +@MODELS.register_module() +class DMHead(BaseDecodeHead): + """Dynamic Multi-scale Filters for Semantic Segmentation. + + This head is the implementation of + `DMNet `_. + + Args: + filter_sizes (tuple[int]): The size of generated convolutional filters + used in Dynamic Convolutional Module. Default: (1, 3, 5, 7). + fusion (bool): Add one conv to fuse DCM output feature. + """ + + def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs): + super().__init__(**kwargs) + assert isinstance(filter_sizes, (list, tuple)) + self.filter_sizes = filter_sizes + self.fusion = fusion + dcm_modules = [] + for filter_size in self.filter_sizes: + dcm_modules.append( + DCM(filter_size, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.dcm_modules = nn.ModuleList(dcm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(filter_sizes) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + dcm_outs = [x] + for dcm_module in self.dcm_modules: + dcm_outs.append(dcm_module(x)) + dcm_outs = torch.cat(dcm_outs, dim=1) + output = self.bottleneck(dcm_outs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/dnl_head.py b/mmseg/models/decode_heads/dnl_head.py new file mode 100644 index 0000000000000000000000000000000000000000..248c11814108d02e88fa7e0cada061b3366e33ff --- /dev/null +++ b/mmseg/models/decode_heads/dnl_head.py @@ -0,0 +1,137 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.cnn import NonLocal2d +from torch import nn + +from mmseg.registry import MODELS +from .fcn_head import FCNHead + + +class DisentangledNonLocal2d(NonLocal2d): + """Disentangled Non-Local Blocks. + + Args: + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, *arg, temperature, **kwargs): + super().__init__(*arg, **kwargs) + self.temperature = temperature + self.conv_mask = nn.Conv2d(self.in_channels, 1, kernel_size=1) + + def embedded_gaussian(self, theta_x, phi_x): + """Embedded gaussian with temperature.""" + + # NonLocal2d pairwise_weight: [N, HxW, HxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= torch.tensor( + theta_x.shape[-1], + dtype=torch.float, + device=pairwise_weight.device)**torch.tensor( + 0.5, device=pairwise_weight.device) + pairwise_weight /= torch.tensor( + self.temperature, device=pairwise_weight.device) + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def forward(self, x): + # x: [N, C, H, W] + n = x.size(0) + + # g_x: [N, HxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # theta_x: [N, HxW, C], phi_x: [N, C, HxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + # subtract mean + theta_x -= theta_x.mean(dim=-2, keepdim=True) + phi_x -= phi_x.mean(dim=-1, keepdim=True) + + pairwise_func = getattr(self, self.mode) + # pairwise_weight: [N, HxW, HxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # y: [N, HxW, C] + y = torch.matmul(pairwise_weight, g_x) + # y: [N, C, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, + *x.size()[2:]) + + # unary_mask: [N, 1, HxW] + unary_mask = self.conv_mask(x) + unary_mask = unary_mask.view(n, 1, -1) + unary_mask = unary_mask.softmax(dim=-1) + # unary_x: [N, 1, C] + unary_x = torch.matmul(unary_mask, g_x) + # unary_x: [N, C, 1, 1] + unary_x = unary_x.permute(0, 2, 1).contiguous().reshape( + n, self.inter_channels, 1, 1) + + output = x + self.conv_out(y + unary_x) + + return output + + +@MODELS.register_module() +class DNLHead(FCNHead): + """Disentangled Non-Local Neural Networks. + + This head is the implementation of `DNLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: False. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + temperature=0.05, + **kwargs): + super().__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.temperature = temperature + self.dnl_block = DisentangledNonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode, + temperature=self.temperature) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.dnl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/dpt_head.py b/mmseg/models/decode_heads/dpt_head.py new file mode 100644 index 0000000000000000000000000000000000000000..d2cfd89daa4df48601e930cfd158dcf3c9a6a837 --- /dev/null +++ b/mmseg/models/decode_heads/dpt_head.py @@ -0,0 +1,294 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, Linear, build_activation_layer +from mmengine.model import BaseModule + +from mmseg.registry import MODELS +from ..utils import resize +from .decode_head import BaseDecodeHead + + +class ReassembleBlocks(BaseModule): + """ViTPostProcessBlock, process cls_token in ViT backbone output and + rearrange the feature vector to feature map. + + Args: + in_channels (int): ViT feature channels. Default: 768. + out_channels (List): output channels of each stage. + Default: [96, 192, 384, 768]. + readout_type (str): Type of readout operation. Default: 'ignore'. + patch_size (int): The patch size. Default: 16. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + def __init__(self, + in_channels=768, + out_channels=[96, 192, 384, 768], + readout_type='ignore', + patch_size=16, + init_cfg=None): + super().__init__(init_cfg) + + assert readout_type in ['ignore', 'add', 'project'] + self.readout_type = readout_type + self.patch_size = patch_size + + self.projects = nn.ModuleList([ + ConvModule( + in_channels=in_channels, + out_channels=out_channel, + kernel_size=1, + act_cfg=None, + ) for out_channel in out_channels + ]) + + self.resize_layers = nn.ModuleList([ + nn.ConvTranspose2d( + in_channels=out_channels[0], + out_channels=out_channels[0], + kernel_size=4, + stride=4, + padding=0), + nn.ConvTranspose2d( + in_channels=out_channels[1], + out_channels=out_channels[1], + kernel_size=2, + stride=2, + padding=0), + nn.Identity(), + nn.Conv2d( + in_channels=out_channels[3], + out_channels=out_channels[3], + kernel_size=3, + stride=2, + padding=1) + ]) + if self.readout_type == 'project': + self.readout_projects = nn.ModuleList() + for _ in range(len(self.projects)): + self.readout_projects.append( + nn.Sequential( + Linear(2 * in_channels, in_channels), + build_activation_layer(dict(type='GELU')))) + + def forward(self, inputs): + assert isinstance(inputs, list) + out = [] + for i, x in enumerate(inputs): + assert len(x) == 2 + x, cls_token = x[0], x[1] + feature_shape = x.shape + if self.readout_type == 'project': + x = x.flatten(2).permute((0, 2, 1)) + readout = cls_token.unsqueeze(1).expand_as(x) + x = self.readout_projects[i](torch.cat((x, readout), -1)) + x = x.permute(0, 2, 1).reshape(feature_shape) + elif self.readout_type == 'add': + x = x.flatten(2) + cls_token.unsqueeze(-1) + x = x.reshape(feature_shape) + else: + pass + x = self.projects[i](x) + x = self.resize_layers[i](x) + out.append(x) + return out + + +class PreActResidualConvUnit(BaseModule): + """ResidualConvUnit, pre-activate residual unit. + + Args: + in_channels (int): number of channels in the input feature map. + act_cfg (dict): dictionary to construct and config activation layer. + norm_cfg (dict): dictionary to construct and config norm layer. + stride (int): stride of the first block. Default: 1 + dilation (int): dilation rate for convs layers. Default: 1. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + def __init__(self, + in_channels, + act_cfg, + norm_cfg, + stride=1, + dilation=1, + init_cfg=None): + super().__init__(init_cfg) + + self.conv1 = ConvModule( + in_channels, + in_channels, + 3, + stride=stride, + padding=dilation, + dilation=dilation, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + bias=False, + order=('act', 'conv', 'norm')) + + self.conv2 = ConvModule( + in_channels, + in_channels, + 3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + bias=False, + order=('act', 'conv', 'norm')) + + def forward(self, inputs): + inputs_ = inputs.clone() + x = self.conv1(inputs) + x = self.conv2(x) + return x + inputs_ + + +class FeatureFusionBlock(BaseModule): + """FeatureFusionBlock, merge feature map from different stages. + + Args: + in_channels (int): Input channels. + act_cfg (dict): The activation config for ResidualConvUnit. + norm_cfg (dict): Config dict for normalization layer. + expand (bool): Whether expand the channels in post process block. + Default: False. + align_corners (bool): align_corner setting for bilinear upsample. + Default: True. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + def __init__(self, + in_channels, + act_cfg, + norm_cfg, + expand=False, + align_corners=True, + init_cfg=None): + super().__init__(init_cfg) + + self.in_channels = in_channels + self.expand = expand + self.align_corners = align_corners + + self.out_channels = in_channels + if self.expand: + self.out_channels = in_channels // 2 + + self.project = ConvModule( + self.in_channels, + self.out_channels, + kernel_size=1, + act_cfg=None, + bias=True) + + self.res_conv_unit1 = PreActResidualConvUnit( + in_channels=self.in_channels, act_cfg=act_cfg, norm_cfg=norm_cfg) + self.res_conv_unit2 = PreActResidualConvUnit( + in_channels=self.in_channels, act_cfg=act_cfg, norm_cfg=norm_cfg) + + def forward(self, *inputs): + x = inputs[0] + if len(inputs) == 2: + if x.shape != inputs[1].shape: + res = resize( + inputs[1], + size=(x.shape[2], x.shape[3]), + mode='bilinear', + align_corners=False) + else: + res = inputs[1] + x = x + self.res_conv_unit1(res) + x = self.res_conv_unit2(x) + x = resize( + x, + scale_factor=2, + mode='bilinear', + align_corners=self.align_corners) + x = self.project(x) + return x + + +@MODELS.register_module() +class DPTHead(BaseDecodeHead): + """Vision Transformers for Dense Prediction. + + This head is implemented of `DPT `_. + + Args: + embed_dims (int): The embed dimension of the ViT backbone. + Default: 768. + post_process_channels (List): Out channels of post process conv + layers. Default: [96, 192, 384, 768]. + readout_type (str): Type of readout operation. Default: 'ignore'. + patch_size (int): The patch size. Default: 16. + expand_channels (bool): Whether expand the channels in post process + block. Default: False. + act_cfg (dict): The activation config for residual conv unit. + Default dict(type='ReLU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + """ + + def __init__(self, + embed_dims=768, + post_process_channels=[96, 192, 384, 768], + readout_type='ignore', + patch_size=16, + expand_channels=False, + act_cfg=dict(type='ReLU'), + norm_cfg=dict(type='BN'), + **kwargs): + super().__init__(**kwargs) + + self.in_channels = self.in_channels + self.expand_channels = expand_channels + self.reassemble_blocks = ReassembleBlocks(embed_dims, + post_process_channels, + readout_type, patch_size) + + self.post_process_channels = [ + channel * math.pow(2, i) if expand_channels else channel + for i, channel in enumerate(post_process_channels) + ] + self.convs = nn.ModuleList() + for channel in self.post_process_channels: + self.convs.append( + ConvModule( + channel, + self.channels, + kernel_size=3, + padding=1, + act_cfg=None, + bias=False)) + self.fusion_blocks = nn.ModuleList() + for _ in range(len(self.convs)): + self.fusion_blocks.append( + FeatureFusionBlock(self.channels, act_cfg, norm_cfg)) + self.fusion_blocks[0].res_conv_unit1 = None + self.project = ConvModule( + self.channels, + self.channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg) + self.num_fusion_blocks = len(self.fusion_blocks) + self.num_reassemble_blocks = len(self.reassemble_blocks.resize_layers) + self.num_post_process_channels = len(self.post_process_channels) + assert self.num_fusion_blocks == self.num_reassemble_blocks + assert self.num_reassemble_blocks == self.num_post_process_channels + + def forward(self, inputs): + assert len(inputs) == self.num_reassemble_blocks + x = self._transform_inputs(inputs) + x = self.reassemble_blocks(x) + x = [self.convs[i](feature) for i, feature in enumerate(x)] + out = self.fusion_blocks[0](x[-1]) + for i in range(1, len(self.fusion_blocks)): + out = self.fusion_blocks[i](out, x[-(i + 1)]) + out = self.project(out) + out = self.cls_seg(out) + return out diff --git a/mmseg/models/decode_heads/ema_head.py b/mmseg/models/decode_heads/ema_head.py new file mode 100644 index 0000000000000000000000000000000000000000..ab8dbb0c29b9b533dad962e48d71ae055f20aa07 --- /dev/null +++ b/mmseg/models/decode_heads/ema_head.py @@ -0,0 +1,169 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from .decode_head import BaseDecodeHead + + +def reduce_mean(tensor): + """Reduce mean when distributed training.""" + if not (dist.is_available() and dist.is_initialized()): + return tensor + tensor = tensor.clone() + dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) + return tensor + + +class EMAModule(nn.Module): + """Expectation Maximization Attention Module used in EMANet. + + Args: + channels (int): Channels of the whole module. + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + """ + + def __init__(self, channels, num_bases, num_stages, momentum): + super().__init__() + assert num_stages >= 1, 'num_stages must be at least 1!' + self.num_bases = num_bases + self.num_stages = num_stages + self.momentum = momentum + + bases = torch.zeros(1, channels, self.num_bases) + bases.normal_(0, math.sqrt(2. / self.num_bases)) + # [1, channels, num_bases] + bases = F.normalize(bases, dim=1, p=2) + self.register_buffer('bases', bases) + + def forward(self, feats): + """Forward function.""" + batch_size, channels, height, width = feats.size() + # [batch_size, channels, height*width] + feats = feats.view(batch_size, channels, height * width) + # [batch_size, channels, num_bases] + bases = self.bases.repeat(batch_size, 1, 1) + + with torch.no_grad(): + for i in range(self.num_stages): + # [batch_size, height*width, num_bases] + attention = torch.einsum('bcn,bck->bnk', feats, bases) + attention = F.softmax(attention, dim=2) + # l1 norm + attention_normed = F.normalize(attention, dim=1, p=1) + # [batch_size, channels, num_bases] + bases = torch.einsum('bcn,bnk->bck', feats, attention_normed) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + + feats_recon = torch.einsum('bck,bnk->bcn', bases, attention) + feats_recon = feats_recon.view(batch_size, channels, height, width) + + if self.training: + bases = bases.mean(dim=0, keepdim=True) + bases = reduce_mean(bases) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + self.bases = (1 - + self.momentum) * self.bases + self.momentum * bases + + return feats_recon + + +@MODELS.register_module() +class EMAHead(BaseDecodeHead): + """Expectation Maximization Attention Networks for Semantic Segmentation. + + This head is the implementation of `EMANet + `_. + + Args: + ema_channels (int): EMA module channels + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + concat_input (bool): Whether concat the input and output of convs + before classification layer. Default: True + momentum (float): Momentum to update the base. Default: 0.1. + """ + + def __init__(self, + ema_channels, + num_bases, + num_stages, + concat_input=True, + momentum=0.1, + **kwargs): + super().__init__(**kwargs) + self.ema_channels = ema_channels + self.num_bases = num_bases + self.num_stages = num_stages + self.concat_input = concat_input + self.momentum = momentum + self.ema_module = EMAModule(self.ema_channels, self.num_bases, + self.num_stages, self.momentum) + + self.ema_in_conv = ConvModule( + self.in_channels, + self.ema_channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # project (0, inf) -> (-inf, inf) + self.ema_mid_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=None, + act_cfg=None) + for param in self.ema_mid_conv.parameters(): + param.requires_grad = False + + self.ema_out_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.bottleneck = ConvModule( + self.ema_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.ema_in_conv(x) + identity = feats + feats = self.ema_mid_conv(feats) + recon = self.ema_module(feats) + recon = F.relu(recon, inplace=True) + recon = self.ema_out_conv(recon) + output = F.relu(identity + recon, inplace=True) + output = self.bottleneck(output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/enc_head.py b/mmseg/models/decode_heads/enc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..ef48fb6995365ba374b29ea265608087500f27dc --- /dev/null +++ b/mmseg/models/decode_heads/enc_head.py @@ -0,0 +1,197 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule, build_norm_layer +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.utils import ConfigType, SampleList +from ..builder import build_loss +from ..utils import Encoding, resize +from .decode_head import BaseDecodeHead + + +class EncModule(nn.Module): + """Encoding Module used in EncNet. + + Args: + in_channels (int): Input channels. + num_codes (int): Number of code words. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg): + super().__init__() + self.encoding_project = ConvModule( + in_channels, + in_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + # TODO: resolve this hack + # change to 1d + if norm_cfg is not None: + encoding_norm_cfg = norm_cfg.copy() + if encoding_norm_cfg['type'] in ['BN', 'IN']: + encoding_norm_cfg['type'] += '1d' + else: + encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace( + '2d', '1d') + else: + # fallback to BN1d + encoding_norm_cfg = dict(type='BN1d') + self.encoding = nn.Sequential( + Encoding(channels=in_channels, num_codes=num_codes), + build_norm_layer(encoding_norm_cfg, num_codes)[1], + nn.ReLU(inplace=True)) + self.fc = nn.Sequential( + nn.Linear(in_channels, in_channels), nn.Sigmoid()) + + def forward(self, x): + """Forward function.""" + encoding_projection = self.encoding_project(x) + encoding_feat = self.encoding(encoding_projection).mean(dim=1) + batch_size, channels, _, _ = x.size() + gamma = self.fc(encoding_feat) + y = gamma.view(batch_size, channels, 1, 1) + output = F.relu_(x + x * y) + return encoding_feat, output + + +@MODELS.register_module() +class EncHead(BaseDecodeHead): + """Context Encoding for Semantic Segmentation. + + This head is the implementation of `EncNet + `_. + + Args: + num_codes (int): Number of code words. Default: 32. + use_se_loss (bool): Whether use Semantic Encoding Loss (SE-loss) to + regularize the training. Default: True. + add_lateral (bool): Whether use lateral connection to fuse features. + Default: False. + loss_se_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss', use_sigmoid=True). + """ + + def __init__(self, + num_codes=32, + use_se_loss=True, + add_lateral=False, + loss_se_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=0.2), + **kwargs): + super().__init__(input_transform='multiple_select', **kwargs) + self.use_se_loss = use_se_loss + self.add_lateral = add_lateral + self.num_codes = num_codes + self.bottleneck = ConvModule( + self.in_channels[-1], + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if add_lateral: + self.lateral_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the last one + self.lateral_convs.append( + ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.fusion = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.enc_module = EncModule( + self.channels, + num_codes=num_codes, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.use_se_loss: + self.loss_se_decode = build_loss(loss_se_decode) + self.se_layer = nn.Linear(self.channels, self.num_classes) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + feat = self.bottleneck(inputs[-1]) + if self.add_lateral: + laterals = [ + resize( + lateral_conv(inputs[i]), + size=feat.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + feat = self.fusion(torch.cat([feat, *laterals], 1)) + encode_feat, output = self.enc_module(feat) + output = self.cls_seg(output) + if self.use_se_loss: + se_output = self.se_layer(encode_feat) + return output, se_output + else: + return output + + def predict(self, inputs: Tuple[Tensor], batch_img_metas: List[dict], + test_cfg: ConfigType): + """Forward function for testing, ignore se_loss.""" + if self.use_se_loss: + seg_logits = self.forward(inputs)[0] + else: + seg_logits = self.forward(inputs) + return self.predict_by_feat(seg_logits, batch_img_metas) + + @staticmethod + def _convert_to_onehot_labels(seg_label, num_classes): + """Convert segmentation label to onehot. + + Args: + seg_label (Tensor): Segmentation label of shape (N, H, W). + num_classes (int): Number of classes. + + Returns: + Tensor: Onehot labels of shape (N, num_classes). + """ + + batch_size = seg_label.size(0) + onehot_labels = seg_label.new_zeros((batch_size, num_classes)) + for i in range(batch_size): + hist = seg_label[i].float().histc( + bins=num_classes, min=0, max=num_classes - 1) + onehot_labels[i] = hist > 0 + return onehot_labels + + def loss_by_feat(self, seg_logit: Tuple[Tensor], + batch_data_samples: SampleList, **kwargs) -> dict: + """Compute segmentation and semantic encoding loss.""" + seg_logit, se_seg_logit = seg_logit + loss = dict() + loss.update(super().loss_by_feat(seg_logit, batch_data_samples)) + + seg_label = self._stack_batch_gt(batch_data_samples) + se_loss = self.loss_se_decode( + se_seg_logit, + self._convert_to_onehot_labels(seg_label, self.num_classes)) + loss['loss_se'] = se_loss + return loss diff --git a/mmseg/models/decode_heads/fcn_head.py b/mmseg/models/decode_heads/fcn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..341801888368d307da6b926a2c89f72b6b06476d --- /dev/null +++ b/mmseg/models/decode_heads/fcn_head.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from .decode_head import BaseDecodeHead + + +@MODELS.register_module() +class FCNHead(BaseDecodeHead): + """Fully Convolution Networks for Semantic Segmentation. + + This head is implemented of `FCNNet `_. + + Args: + num_convs (int): Number of convs in the head. Default: 2. + kernel_size (int): The kernel size for convs in the head. Default: 3. + concat_input (bool): Whether concat the input and output of convs + before classification layer. + dilation (int): The dilation rate for convs in the head. Default: 1. + """ + + def __init__(self, + num_convs=2, + kernel_size=3, + concat_input=True, + dilation=1, + **kwargs): + assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) + self.num_convs = num_convs + self.concat_input = concat_input + self.kernel_size = kernel_size + super().__init__(**kwargs) + if num_convs == 0: + assert self.in_channels == self.channels + + conv_padding = (kernel_size // 2) * dilation + convs = [] + convs.append( + ConvModule( + self.in_channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + for i in range(num_convs - 1): + convs.append( + ConvModule( + self.channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if num_convs == 0: + self.convs = nn.Identity() + else: + self.convs = nn.Sequential(*convs) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=kernel_size, + padding=kernel_size // 2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _forward_feature(self, inputs): + """Forward function for feature maps before classifying each pixel with + ``self.cls_seg`` fc. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + feats (Tensor): A tensor of shape (batch_size, self.channels, + H, W) which is feature map for last layer of decoder head. + """ + x = self._transform_inputs(inputs) + feats = self.convs(x) + if self.concat_input: + feats = self.conv_cat(torch.cat([x, feats], dim=1)) + return feats + + def forward(self, inputs): + """Forward function.""" + output = self._forward_feature(inputs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/fpn_head.py b/mmseg/models/decode_heads/fpn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..25f481fe81c5f4f0aa37903aaf135dc63c930bf8 --- /dev/null +++ b/mmseg/models/decode_heads/fpn_head.py @@ -0,0 +1,68 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import Upsample, resize +from .decode_head import BaseDecodeHead + + +@MODELS.register_module() +class FPNHead(BaseDecodeHead): + """Panoptic Feature Pyramid Networks. + + This head is the implementation of `Semantic FPN + `_. + + Args: + feature_strides (tuple[int]): The strides for input feature maps. + stack_lateral. All strides suppose to be power of 2. The first + one is of largest resolution. + """ + + def __init__(self, feature_strides, **kwargs): + super().__init__(input_transform='multiple_select', **kwargs) + assert len(feature_strides) == len(self.in_channels) + assert min(feature_strides) == feature_strides[0] + self.feature_strides = feature_strides + + self.scale_heads = nn.ModuleList() + for i in range(len(feature_strides)): + head_length = max( + 1, + int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) + scale_head = [] + for k in range(head_length): + scale_head.append( + ConvModule( + self.in_channels[i] if k == 0 else self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if feature_strides[i] != feature_strides[0]: + scale_head.append( + Upsample( + scale_factor=2, + mode='bilinear', + align_corners=self.align_corners)) + self.scale_heads.append(nn.Sequential(*scale_head)) + + def forward(self, inputs): + + x = self._transform_inputs(inputs) + + output = self.scale_heads[0](x[0]) + for i in range(1, len(self.feature_strides)): + # non inplace + output = output + resize( + self.scale_heads[i](x[i]), + size=output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/gc_head.py b/mmseg/models/decode_heads/gc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..14f0ef021c1143d493e17f347f1f4da1145470b8 --- /dev/null +++ b/mmseg/models/decode_heads/gc_head.py @@ -0,0 +1,48 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.cnn import ContextBlock + +from mmseg.registry import MODELS +from .fcn_head import FCNHead + + +@MODELS.register_module() +class GCHead(FCNHead): + """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. + + This head is the implementation of `GCNet + `_. + + Args: + ratio (float): Multiplier of channels ratio. Default: 1/4. + pooling_type (str): The pooling type of context aggregation. + Options are 'att', 'avg'. Default: 'avg'. + fusion_types (tuple[str]): The fusion type for feature fusion. + Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) + """ + + def __init__(self, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + **kwargs): + super().__init__(num_convs=2, **kwargs) + self.ratio = ratio + self.pooling_type = pooling_type + self.fusion_types = fusion_types + self.gc_block = ContextBlock( + in_channels=self.channels, + ratio=self.ratio, + pooling_type=self.pooling_type, + fusion_types=self.fusion_types) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.gc_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/ham_head.py b/mmseg/models/decode_heads/ham_head.py new file mode 100644 index 0000000000000000000000000000000000000000..073d8011b05dc8c5e8d48cc8b77484a27f7b2100 --- /dev/null +++ b/mmseg/models/decode_heads/ham_head.py @@ -0,0 +1,255 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Originally from https://github.com/visual-attention-network/segnext +# Licensed under the Apache License, Version 2.0 (the "License") +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmengine.device import get_device + +from mmseg.registry import MODELS +from ..utils import resize +from .decode_head import BaseDecodeHead + + +class Matrix_Decomposition_2D_Base(nn.Module): + """Base class of 2D Matrix Decomposition. + + Args: + MD_S (int): The number of spatial coefficient in + Matrix Decomposition, it may be used for calculation + of the number of latent dimension D in Matrix + Decomposition. Defaults: 1. + MD_R (int): The number of latent dimension R in + Matrix Decomposition. Defaults: 64. + train_steps (int): The number of iteration steps in + Multiplicative Update (MU) rule to solve Non-negative + Matrix Factorization (NMF) in training. Defaults: 6. + eval_steps (int): The number of iteration steps in + Multiplicative Update (MU) rule to solve Non-negative + Matrix Factorization (NMF) in evaluation. Defaults: 7. + inv_t (int): Inverted multiple number to make coefficient + smaller in softmax. Defaults: 100. + rand_init (bool): Whether to initialize randomly. + Defaults: True. + """ + + def __init__(self, + MD_S=1, + MD_R=64, + train_steps=6, + eval_steps=7, + inv_t=100, + rand_init=True): + super().__init__() + + self.S = MD_S + self.R = MD_R + + self.train_steps = train_steps + self.eval_steps = eval_steps + + self.inv_t = inv_t + + self.rand_init = rand_init + + def _build_bases(self, B, S, D, R, device=None): + raise NotImplementedError + + def local_step(self, x, bases, coef): + raise NotImplementedError + + def local_inference(self, x, bases): + # (B * S, D, N)^T @ (B * S, D, R) -> (B * S, N, R) + coef = torch.bmm(x.transpose(1, 2), bases) + coef = F.softmax(self.inv_t * coef, dim=-1) + + steps = self.train_steps if self.training else self.eval_steps + for _ in range(steps): + bases, coef = self.local_step(x, bases, coef) + + return bases, coef + + def compute_coef(self, x, bases, coef): + raise NotImplementedError + + def forward(self, x, return_bases=False): + """Forward Function.""" + B, C, H, W = x.shape + + # (B, C, H, W) -> (B * S, D, N) + D = C // self.S + N = H * W + x = x.view(B * self.S, D, N) + if not self.rand_init and not hasattr(self, 'bases'): + bases = self._build_bases(1, self.S, D, self.R, device=x.device) + self.register_buffer('bases', bases) + + # (S, D, R) -> (B * S, D, R) + if self.rand_init: + bases = self._build_bases(B, self.S, D, self.R, device=x.device) + else: + bases = self.bases.repeat(B, 1, 1) + + bases, coef = self.local_inference(x, bases) + + # (B * S, N, R) + coef = self.compute_coef(x, bases, coef) + + # (B * S, D, R) @ (B * S, N, R)^T -> (B * S, D, N) + x = torch.bmm(bases, coef.transpose(1, 2)) + + # (B * S, D, N) -> (B, C, H, W) + x = x.view(B, C, H, W) + + return x + + +class NMF2D(Matrix_Decomposition_2D_Base): + """Non-negative Matrix Factorization (NMF) module. + + It is inherited from ``Matrix_Decomposition_2D_Base`` module. + """ + + def __init__(self, args=dict()): + super().__init__(**args) + + self.inv_t = 1 + + def _build_bases(self, B, S, D, R, device=None): + """Build bases in initialization.""" + if device is None: + device = get_device() + bases = torch.rand((B * S, D, R)).to(device) + bases = F.normalize(bases, dim=1) + + return bases + + def local_step(self, x, bases, coef): + """Local step in iteration to renew bases and coefficient.""" + # (B * S, D, N)^T @ (B * S, D, R) -> (B * S, N, R) + numerator = torch.bmm(x.transpose(1, 2), bases) + # (B * S, N, R) @ [(B * S, D, R)^T @ (B * S, D, R)] -> (B * S, N, R) + denominator = coef.bmm(bases.transpose(1, 2).bmm(bases)) + # Multiplicative Update + coef = coef * numerator / (denominator + 1e-6) + + # (B * S, D, N) @ (B * S, N, R) -> (B * S, D, R) + numerator = torch.bmm(x, coef) + # (B * S, D, R) @ [(B * S, N, R)^T @ (B * S, N, R)] -> (B * S, D, R) + denominator = bases.bmm(coef.transpose(1, 2).bmm(coef)) + # Multiplicative Update + bases = bases * numerator / (denominator + 1e-6) + + return bases, coef + + def compute_coef(self, x, bases, coef): + """Compute coefficient.""" + # (B * S, D, N)^T @ (B * S, D, R) -> (B * S, N, R) + numerator = torch.bmm(x.transpose(1, 2), bases) + # (B * S, N, R) @ (B * S, D, R)^T @ (B * S, D, R) -> (B * S, N, R) + denominator = coef.bmm(bases.transpose(1, 2).bmm(bases)) + # multiplication update + coef = coef * numerator / (denominator + 1e-6) + + return coef + + +class Hamburger(nn.Module): + """Hamburger Module. It consists of one slice of "ham" (matrix + decomposition) and two slices of "bread" (linear transformation). + + Args: + ham_channels (int): Input and output channels of feature. + ham_kwargs (dict): Config of matrix decomposition module. + norm_cfg (dict | None): Config of norm layers. + """ + + def __init__(self, + ham_channels=512, + ham_kwargs=dict(), + norm_cfg=None, + **kwargs): + super().__init__() + + self.ham_in = ConvModule( + ham_channels, ham_channels, 1, norm_cfg=None, act_cfg=None) + + self.ham = NMF2D(ham_kwargs) + + self.ham_out = ConvModule( + ham_channels, ham_channels, 1, norm_cfg=norm_cfg, act_cfg=None) + + def forward(self, x): + enjoy = self.ham_in(x) + enjoy = F.relu(enjoy, inplace=True) + enjoy = self.ham(enjoy) + enjoy = self.ham_out(enjoy) + ham = F.relu(x + enjoy, inplace=True) + + return ham + + +@MODELS.register_module() +class LightHamHead(BaseDecodeHead): + """SegNeXt decode head. + + This decode head is the implementation of `SegNeXt: Rethinking + Convolutional Attention Design for Semantic + Segmentation `_. + Inspiration from https://github.com/visual-attention-network/segnext. + + Specifically, LightHamHead is inspired by HamNet from + `Is Attention Better Than Matrix Decomposition? + `. + + Args: + ham_channels (int): input channels for Hamburger. + Defaults: 512. + ham_kwargs (int): kwagrs for Ham. Defaults: dict(). + """ + + def __init__(self, ham_channels=512, ham_kwargs=dict(), **kwargs): + super().__init__(input_transform='multiple_select', **kwargs) + self.ham_channels = ham_channels + + self.squeeze = ConvModule( + sum(self.in_channels), + self.ham_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.hamburger = Hamburger(ham_channels, ham_kwargs, **kwargs) + + self.align = ConvModule( + self.ham_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + + inputs = [ + resize( + level, + size=inputs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) for level in inputs + ] + + inputs = torch.cat(inputs, dim=1) + # apply a conv block to squeeze feature map + x = self.squeeze(inputs) + # apply hamburger module + x = self.hamburger(x) + + # apply a conv block to align feature map + output = self.align(x) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/isa_head.py b/mmseg/models/decode_heads/isa_head.py new file mode 100644 index 0000000000000000000000000000000000000000..355f215f39007d0153c2fdb3b22a40e7f11a01e3 --- /dev/null +++ b/mmseg/models/decode_heads/isa_head.py @@ -0,0 +1,143 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class SelfAttentionBlock(_SelfAttentionBlock): + """Self-Attention Module. + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict | None): Config of activation layers. + """ + + def __init__(self, in_channels, channels, conv_cfg, norm_cfg, act_cfg): + super().__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.output_project = self.build_project( + in_channels, + in_channels, + num_convs=1, + use_conv_module=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x): + """Forward function.""" + context = super().forward(x, x) + return self.output_project(context) + + +@MODELS.register_module() +class ISAHead(BaseDecodeHead): + """Interlaced Sparse Self-Attention for Semantic Segmentation. + + This head is the implementation of `ISA + `_. + + Args: + isa_channels (int): The channels of ISA Module. + down_factor (tuple[int]): The local group size of ISA. + """ + + def __init__(self, isa_channels, down_factor=(8, 8), **kwargs): + super().__init__(**kwargs) + self.down_factor = down_factor + + self.in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.global_relation = SelfAttentionBlock( + self.channels, + isa_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.local_relation = SelfAttentionBlock( + self.channels, + isa_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.out_conv = ConvModule( + self.channels * 2, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x_ = self._transform_inputs(inputs) + x = self.in_conv(x_) + residual = x + + n, c, h, w = x.size() + loc_h, loc_w = self.down_factor # size of local group in H- and W-axes + glb_h, glb_w = math.ceil(h / loc_h), math.ceil(w / loc_w) + pad_h, pad_w = glb_h * loc_h - h, glb_w * loc_w - w + if pad_h > 0 or pad_w > 0: # pad if the size is not divisible + padding = (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, + pad_h - pad_h // 2) + x = F.pad(x, padding) + + # global relation + x = x.view(n, c, glb_h, loc_h, glb_w, loc_w) + # do permutation to gather global group + x = x.permute(0, 3, 5, 1, 2, 4) # (n, loc_h, loc_w, c, glb_h, glb_w) + x = x.reshape(-1, c, glb_h, glb_w) + # apply attention within each global group + x = self.global_relation(x) # (n * loc_h * loc_w, c, glb_h, glb_w) + + # local relation + x = x.view(n, loc_h, loc_w, c, glb_h, glb_w) + # do permutation to gather local group + x = x.permute(0, 4, 5, 3, 1, 2) # (n, glb_h, glb_w, c, loc_h, loc_w) + x = x.reshape(-1, c, loc_h, loc_w) + # apply attention within each local group + x = self.local_relation(x) # (n * glb_h * glb_w, c, loc_h, loc_w) + + # permute each pixel back to its original position + x = x.view(n, glb_h, glb_w, c, loc_h, loc_w) + x = x.permute(0, 3, 1, 4, 2, 5) # (n, c, glb_h, loc_h, glb_w, loc_w) + x = x.reshape(n, c, glb_h * loc_h, glb_w * loc_w) + if pad_h > 0 or pad_w > 0: # remove padding + x = x[:, :, pad_h // 2:pad_h // 2 + h, pad_w // 2:pad_w // 2 + w] + + x = self.out_conv(torch.cat([x, residual], dim=1)) + out = self.cls_seg(x) + + return out diff --git a/mmseg/models/decode_heads/knet_head.py b/mmseg/models/decode_heads/knet_head.py new file mode 100644 index 0000000000000000000000000000000000000000..82d3a2807685cdc896c881095f46fd50a450018e --- /dev/null +++ b/mmseg/models/decode_heads/knet_head.py @@ -0,0 +1,461 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer +from mmcv.cnn.bricks.transformer import (FFN, MultiheadAttention, + build_transformer_layer) +from mmengine.logging import print_log +from torch import Tensor + +from mmseg.models.decode_heads.decode_head import BaseDecodeHead +from mmseg.registry import MODELS +from mmseg.utils import SampleList + + +@MODELS.register_module() +class KernelUpdator(nn.Module): + """Dynamic Kernel Updator in Kernel Update Head. + + Args: + in_channels (int): The number of channels of input feature map. + Default: 256. + feat_channels (int): The number of middle-stage channels in + the kernel updator. Default: 64. + out_channels (int): The number of output channels. + gate_sigmoid (bool): Whether use sigmoid function in gate + mechanism. Default: True. + gate_norm_act (bool): Whether add normalization and activation + layer in gate mechanism. Default: False. + activate_out: Whether add activation after gate mechanism. + Default: False. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='LN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + """ + + def __init__( + self, + in_channels=256, + feat_channels=64, + out_channels=None, + gate_sigmoid=True, + gate_norm_act=False, + activate_out=False, + norm_cfg=dict(type='LN'), + act_cfg=dict(type='ReLU', inplace=True), + ): + super().__init__() + self.in_channels = in_channels + self.feat_channels = feat_channels + self.out_channels_raw = out_channels + self.gate_sigmoid = gate_sigmoid + self.gate_norm_act = gate_norm_act + self.activate_out = activate_out + self.act_cfg = act_cfg + self.norm_cfg = norm_cfg + self.out_channels = out_channels if out_channels else in_channels + + self.num_params_in = self.feat_channels + self.num_params_out = self.feat_channels + self.dynamic_layer = nn.Linear( + self.in_channels, self.num_params_in + self.num_params_out) + self.input_layer = nn.Linear(self.in_channels, + self.num_params_in + self.num_params_out, + 1) + self.input_gate = nn.Linear(self.in_channels, self.feat_channels, 1) + self.update_gate = nn.Linear(self.in_channels, self.feat_channels, 1) + if self.gate_norm_act: + self.gate_norm = build_norm_layer(norm_cfg, self.feat_channels)[1] + + self.norm_in = build_norm_layer(norm_cfg, self.feat_channels)[1] + self.norm_out = build_norm_layer(norm_cfg, self.feat_channels)[1] + self.input_norm_in = build_norm_layer(norm_cfg, self.feat_channels)[1] + self.input_norm_out = build_norm_layer(norm_cfg, self.feat_channels)[1] + + self.activation = build_activation_layer(act_cfg) + + self.fc_layer = nn.Linear(self.feat_channels, self.out_channels, 1) + self.fc_norm = build_norm_layer(norm_cfg, self.out_channels)[1] + + def forward(self, update_feature, input_feature): + """Forward function of KernelUpdator. + + Args: + update_feature (torch.Tensor): Feature map assembled from + each group. It would be reshaped with last dimension + shape: `self.in_channels`. + input_feature (torch.Tensor): Intermediate feature + with shape: (N, num_classes, conv_kernel_size**2, channels). + Returns: + Tensor: The output tensor of shape (N*C1/C2, K*K, C2), where N is + the number of classes, C1 and C2 are the feature map channels of + KernelUpdateHead and KernelUpdator, respectively. + """ + + update_feature = update_feature.reshape(-1, self.in_channels) + num_proposals = update_feature.size(0) + # dynamic_layer works for + # phi_1 and psi_3 in Eq.(4) and (5) of K-Net paper + parameters = self.dynamic_layer(update_feature) + param_in = parameters[:, :self.num_params_in].view( + -1, self.feat_channels) + param_out = parameters[:, -self.num_params_out:].view( + -1, self.feat_channels) + + # input_layer works for + # phi_2 and psi_4 in Eq.(4) and (5) of K-Net paper + input_feats = self.input_layer( + input_feature.reshape(num_proposals, -1, self.feat_channels)) + input_in = input_feats[..., :self.num_params_in] + input_out = input_feats[..., -self.num_params_out:] + + # `gate_feats` is F^G in K-Net paper + gate_feats = input_in * param_in.unsqueeze(-2) + if self.gate_norm_act: + gate_feats = self.activation(self.gate_norm(gate_feats)) + + input_gate = self.input_norm_in(self.input_gate(gate_feats)) + update_gate = self.norm_in(self.update_gate(gate_feats)) + if self.gate_sigmoid: + input_gate = input_gate.sigmoid() + update_gate = update_gate.sigmoid() + param_out = self.norm_out(param_out) + input_out = self.input_norm_out(input_out) + + if self.activate_out: + param_out = self.activation(param_out) + input_out = self.activation(input_out) + + # Gate mechanism. Eq.(5) in original paper. + # param_out has shape (batch_size, feat_channels, out_channels) + features = update_gate * param_out.unsqueeze( + -2) + input_gate * input_out + + features = self.fc_layer(features) + features = self.fc_norm(features) + features = self.activation(features) + + return features + + +@MODELS.register_module() +class KernelUpdateHead(nn.Module): + """Kernel Update Head in K-Net. + + Args: + num_classes (int): Number of classes. Default: 150. + num_ffn_fcs (int): The number of fully-connected layers in + FFNs. Default: 2. + num_heads (int): The number of parallel attention heads. + Default: 8. + num_mask_fcs (int): The number of fully connected layers for + mask prediction. Default: 3. + feedforward_channels (int): The hidden dimension of FFNs. + Defaults: 2048. + in_channels (int): The number of channels of input feature map. + Default: 256. + out_channels (int): The number of output channels. + Default: 256. + dropout (float): The Probability of an element to be + zeroed in MultiheadAttention and FFN. Default 0.0. + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + ffn_act_cfg (dict): Config of activation layers in FFN. + Default: dict(type='ReLU'). + conv_kernel_size (int): The kernel size of convolution in + Kernel Update Head for dynamic kernel updation. + Default: 1. + feat_transform_cfg (dict | None): Config of feature transform. + Default: None. + kernel_init (bool): Whether initiate mask kernel in mask head. + Default: False. + with_ffn (bool): Whether add FFN in kernel update head. + Default: True. + feat_gather_stride (int): Stride of convolution in feature transform. + Default: 1. + mask_transform_stride (int): Stride of mask transform. + Default: 1. + kernel_updator_cfg (dict): Config of kernel updator. + Default: dict( + type='DynamicConv', + in_channels=256, + feat_channels=64, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN')). + """ + + def __init__(self, + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=3, + feedforward_channels=2048, + in_channels=256, + out_channels=256, + dropout=0.0, + act_cfg=dict(type='ReLU', inplace=True), + ffn_act_cfg=dict(type='ReLU', inplace=True), + conv_kernel_size=1, + feat_transform_cfg=None, + kernel_init=False, + with_ffn=True, + feat_gather_stride=1, + mask_transform_stride=1, + kernel_updator_cfg=dict( + type='DynamicConv', + in_channels=256, + feat_channels=64, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))): + super().__init__() + self.num_classes = num_classes + self.in_channels = in_channels + self.out_channels = out_channels + self.fp16_enabled = False + self.dropout = dropout + self.num_heads = num_heads + self.kernel_init = kernel_init + self.with_ffn = with_ffn + self.conv_kernel_size = conv_kernel_size + self.feat_gather_stride = feat_gather_stride + self.mask_transform_stride = mask_transform_stride + + self.attention = MultiheadAttention(in_channels * conv_kernel_size**2, + num_heads, dropout) + self.attention_norm = build_norm_layer( + dict(type='LN'), in_channels * conv_kernel_size**2)[1] + self.kernel_update_conv = build_transformer_layer(kernel_updator_cfg) + + if feat_transform_cfg is not None: + kernel_size = feat_transform_cfg.pop('kernel_size', 1) + transform_channels = in_channels + self.feat_transform = ConvModule( + transform_channels, + in_channels, + kernel_size, + stride=feat_gather_stride, + padding=int(feat_gather_stride // 2), + **feat_transform_cfg) + else: + self.feat_transform = None + + if self.with_ffn: + self.ffn = FFN( + in_channels, + feedforward_channels, + num_ffn_fcs, + act_cfg=ffn_act_cfg, + dropout=dropout) + self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1] + + self.mask_fcs = nn.ModuleList() + for _ in range(num_mask_fcs): + self.mask_fcs.append( + nn.Linear(in_channels, in_channels, bias=False)) + self.mask_fcs.append( + build_norm_layer(dict(type='LN'), in_channels)[1]) + self.mask_fcs.append(build_activation_layer(act_cfg)) + + self.fc_mask = nn.Linear(in_channels, out_channels) + + def init_weights(self): + """Use xavier initialization for all weight parameter and set + classification head bias as a specific value when use focal loss.""" + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + else: + # adopt the default initialization for + # the weight and bias of the layer norm + pass + if self.kernel_init: + print_log( + 'mask kernel in mask head is normal initialized by std 0.01') + nn.init.normal_(self.fc_mask.weight, mean=0, std=0.01) + + def forward(self, x, proposal_feat, mask_preds, mask_shape=None): + """Forward function of Dynamic Instance Interactive Head. + + Args: + x (Tensor): Feature map from FPN with shape + (batch_size, feature_dimensions, H , W). + proposal_feat (Tensor): Intermediate feature get from + diihead in last stage, has shape + (batch_size, num_proposals, feature_dimensions) + mask_preds (Tensor): mask prediction from the former stage in shape + (batch_size, num_proposals, H, W). + + Returns: + Tuple: The first tensor is predicted mask with shape + (N, num_classes, H, W), the second tensor is dynamic kernel + with shape (N, num_classes, channels, K, K). + """ + N, num_proposals = proposal_feat.shape[:2] + if self.feat_transform is not None: + x = self.feat_transform(x) + + C, H, W = x.shape[-3:] + + mask_h, mask_w = mask_preds.shape[-2:] + if mask_h != H or mask_w != W: + gather_mask = F.interpolate( + mask_preds, (H, W), align_corners=False, mode='bilinear') + else: + gather_mask = mask_preds + + sigmoid_masks = gather_mask.softmax(dim=1) + + # Group Feature Assembling. Eq.(3) in original paper. + # einsum is faster than bmm by 30% + x_feat = torch.einsum('bnhw,bchw->bnc', sigmoid_masks, x) + + # obj_feat in shape [B, N, C, K, K] -> [B, N, C, K*K] -> [B, N, K*K, C] + proposal_feat = proposal_feat.reshape(N, num_proposals, + self.in_channels, + -1).permute(0, 1, 3, 2) + obj_feat = self.kernel_update_conv(x_feat, proposal_feat) + + # [B, N, K*K, C] -> [B, N, K*K*C] -> [N, B, K*K*C] + obj_feat = obj_feat.reshape(N, num_proposals, -1).permute(1, 0, 2) + obj_feat = self.attention_norm(self.attention(obj_feat)) + # [N, B, K*K*C] -> [B, N, K*K*C] + obj_feat = obj_feat.permute(1, 0, 2) + + # obj_feat in shape [B, N, K*K*C] -> [B, N, K*K, C] + obj_feat = obj_feat.reshape(N, num_proposals, -1, self.in_channels) + + # FFN + if self.with_ffn: + obj_feat = self.ffn_norm(self.ffn(obj_feat)) + + mask_feat = obj_feat + + for reg_layer in self.mask_fcs: + mask_feat = reg_layer(mask_feat) + + # [B, N, K*K, C] -> [B, N, C, K*K] + mask_feat = self.fc_mask(mask_feat).permute(0, 1, 3, 2) + + if (self.mask_transform_stride == 2 and self.feat_gather_stride == 1): + mask_x = F.interpolate( + x, scale_factor=0.5, mode='bilinear', align_corners=False) + H, W = mask_x.shape[-2:] + else: + mask_x = x + # group conv is 5x faster than unfold and uses about 1/5 memory + # Group conv vs. unfold vs. concat batch, 2.9ms :13.5ms :3.8ms + # Group conv vs. unfold vs. concat batch, 278 : 1420 : 369 + # but in real training group conv is slower than concat batch + # so we keep using concat batch. + # fold_x = F.unfold( + # mask_x, + # self.conv_kernel_size, + # padding=int(self.conv_kernel_size // 2)) + # mask_feat = mask_feat.reshape(N, num_proposals, -1) + # new_mask_preds = torch.einsum('bnc,bcl->bnl', mask_feat, fold_x) + # [B, N, C, K*K] -> [B*N, C, K, K] + mask_feat = mask_feat.reshape(N, num_proposals, C, + self.conv_kernel_size, + self.conv_kernel_size) + # [B, C, H, W] -> [1, B*C, H, W] + new_mask_preds = [] + for i in range(N): + new_mask_preds.append( + F.conv2d( + mask_x[i:i + 1], + mask_feat[i], + padding=int(self.conv_kernel_size // 2))) + + new_mask_preds = torch.cat(new_mask_preds, dim=0) + new_mask_preds = new_mask_preds.reshape(N, num_proposals, H, W) + if self.mask_transform_stride == 2: + new_mask_preds = F.interpolate( + new_mask_preds, + scale_factor=2, + mode='bilinear', + align_corners=False) + + if mask_shape is not None and mask_shape[0] != H: + new_mask_preds = F.interpolate( + new_mask_preds, + mask_shape, + align_corners=False, + mode='bilinear') + + return new_mask_preds, obj_feat.permute(0, 1, 3, 2).reshape( + N, num_proposals, self.in_channels, self.conv_kernel_size, + self.conv_kernel_size) + + +@MODELS.register_module() +class IterativeDecodeHead(BaseDecodeHead): + """K-Net: Towards Unified Image Segmentation. + + This head is the implementation of + `K-Net: `_. + + Args: + num_stages (int): The number of stages (kernel update heads) + in IterativeDecodeHead. Default: 3. + kernel_generate_head:(dict): Config of kernel generate head which + generate mask predictions, dynamic kernels and class predictions + for next kernel update heads. + kernel_update_head (dict): Config of kernel update head which refine + dynamic kernels and class predictions iteratively. + + """ + + def __init__(self, num_stages, kernel_generate_head, kernel_update_head, + **kwargs): + # ``IterativeDecodeHead`` would skip initialization of + # ``BaseDecodeHead`` which would be called when building + # ``self.kernel_generate_head``. + super(BaseDecodeHead, self).__init__(**kwargs) + assert num_stages == len(kernel_update_head) + self.num_stages = num_stages + self.kernel_generate_head = MODELS.build(kernel_generate_head) + self.kernel_update_head = nn.ModuleList() + self.align_corners = self.kernel_generate_head.align_corners + self.num_classes = self.kernel_generate_head.num_classes + self.input_transform = self.kernel_generate_head.input_transform + self.ignore_index = self.kernel_generate_head.ignore_index + self.out_channels = self.num_classes + + for head_cfg in kernel_update_head: + self.kernel_update_head.append(MODELS.build(head_cfg)) + + def forward(self, inputs): + """Forward function.""" + feats = self.kernel_generate_head._forward_feature(inputs) + sem_seg = self.kernel_generate_head.cls_seg(feats) + seg_kernels = self.kernel_generate_head.conv_seg.weight.clone() + seg_kernels = seg_kernels[None].expand( + feats.size(0), *seg_kernels.size()) + + stage_segs = [sem_seg] + for i in range(self.num_stages): + sem_seg, seg_kernels = self.kernel_update_head[i](feats, + seg_kernels, + sem_seg) + stage_segs.append(sem_seg) + if self.training: + return stage_segs + # only return the prediction of the last stage during testing + return stage_segs[-1] + + def loss_by_feat(self, seg_logits: List[Tensor], + batch_data_samples: SampleList, **kwargs) -> dict: + losses = dict() + for i, logit in enumerate(seg_logits): + loss = self.kernel_generate_head.loss_by_feat( + logit, batch_data_samples) + for k, v in loss.items(): + losses[f'{k}.s{i}'] = v + + return losses diff --git a/mmseg/models/decode_heads/lraspp_head.py b/mmseg/models/decode_heads/lraspp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..ba2465f27522e6ff106fcdf94a46aab42881260a --- /dev/null +++ b/mmseg/models/decode_heads/lraspp_head.py @@ -0,0 +1,91 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmengine.utils import is_tuple_of + +from mmseg.registry import MODELS +from ..utils import resize +from .decode_head import BaseDecodeHead + + +@MODELS.register_module() +class LRASPPHead(BaseDecodeHead): + """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3. + + This head is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + branch_channels (tuple[int]): The number of output channels in every + each branch. Default: (32, 64). + """ + + def __init__(self, branch_channels=(32, 64), **kwargs): + super().__init__(**kwargs) + if self.input_transform != 'multiple_select': + raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform ' + f'must be \'multiple_select\'. But received ' + f'\'{self.input_transform}\'') + assert is_tuple_of(branch_channels, int) + assert len(branch_channels) == len(self.in_channels) - 1 + self.branch_channels = branch_channels + + self.convs = nn.Sequential() + self.conv_ups = nn.Sequential() + for i in range(len(branch_channels)): + self.convs.add_module( + f'conv{i}', + nn.Conv2d( + self.in_channels[i], branch_channels[i], 1, bias=False)) + self.conv_ups.add_module( + f'conv_up{i}', + ConvModule( + self.channels + branch_channels[i], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False)) + + self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1) + + self.aspp_conv = ConvModule( + self.in_channels[-1], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False) + self.image_pool = nn.Sequential( + nn.AvgPool2d(kernel_size=49, stride=(16, 20)), + ConvModule( + self.in_channels[2], + self.channels, + 1, + act_cfg=dict(type='Sigmoid'), + bias=False)) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + + x = inputs[-1] + + x = self.aspp_conv(x) * resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = self.conv_up_input(x) + + for i in range(len(self.branch_channels) - 1, -1, -1): + x = resize( + x, + size=inputs[i].size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = torch.cat([x, self.convs[i](inputs[i])], 1) + x = self.conv_ups[i](x) + + return self.cls_seg(x) diff --git a/mmseg/models/decode_heads/mask2former_head.py b/mmseg/models/decode_heads/mask2former_head.py new file mode 100644 index 0000000000000000000000000000000000000000..0135af0645830f5cf98595318c4bb20220e64b0b --- /dev/null +++ b/mmseg/models/decode_heads/mask2former_head.py @@ -0,0 +1,163 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmengine.model import BaseModule + +try: + from mmdet.models.dense_heads import \ + Mask2FormerHead as MMDET_Mask2FormerHead +except ModuleNotFoundError: + MMDET_Mask2FormerHead = BaseModule + +from mmengine.structures import InstanceData +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.structures.seg_data_sample import SegDataSample +from mmseg.utils import ConfigType, SampleList + + +@MODELS.register_module() +class Mask2FormerHead(MMDET_Mask2FormerHead): + """Implements the Mask2Former head. + + See `Mask2Former: Masked-attention Mask Transformer for Universal Image + Segmentation `_ for details. + + Args: + num_classes (int): Number of classes. Default: 150. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + ignore_index (int): The label index to be ignored. Default: 255. + """ + + def __init__(self, + num_classes, + align_corners=False, + ignore_index=255, + **kwargs): + super().__init__(**kwargs) + + self.num_classes = num_classes + self.align_corners = align_corners + self.out_channels = num_classes + self.ignore_index = ignore_index + + feat_channels = kwargs['feat_channels'] + self.cls_embed = nn.Linear(feat_channels, self.num_classes + 1) + + def _seg_data_to_instance_data(self, batch_data_samples: SampleList): + """Perform forward propagation to convert paradigm from MMSegmentation + to MMDetection to ensure ``MMDET_Mask2FormerHead`` could be called + normally. Specifically, ``batch_gt_instances`` would be added. + + Args: + batch_data_samples (List[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as + `gt_sem_seg`. + + Returns: + tuple[Tensor]: A tuple contains two lists. + + - batch_gt_instances (list[:obj:`InstanceData`]): Batch of + gt_instance. It usually includes ``labels``, each is + unique ground truth label id of images, with + shape (num_gt, ) and ``masks``, each is ground truth + masks of each instances of a image, shape (num_gt, h, w). + - batch_img_metas (list[dict]): List of image meta information. + """ + batch_img_metas = [] + batch_gt_instances = [] + + for data_sample in batch_data_samples: + batch_img_metas.append(data_sample.metainfo) + gt_sem_seg = data_sample.gt_sem_seg.data + classes = torch.unique( + gt_sem_seg, + sorted=False, + return_inverse=False, + return_counts=False) + + # remove ignored region + gt_labels = classes[classes != self.ignore_index] + + masks = [] + for class_id in gt_labels: + masks.append(gt_sem_seg == class_id) + + if len(masks) == 0: + gt_masks = torch.zeros( + (0, gt_sem_seg.shape[-2], + gt_sem_seg.shape[-1])).to(gt_sem_seg).long() + else: + gt_masks = torch.stack(masks).squeeze(1).long() + + instance_data = InstanceData(labels=gt_labels, masks=gt_masks) + batch_gt_instances.append(instance_data) + return batch_gt_instances, batch_img_metas + + def loss(self, x: Tuple[Tensor], batch_data_samples: SampleList, + train_cfg: ConfigType) -> dict: + """Perform forward propagation and loss calculation of the decoder head + on the features of the upstream network. + + Args: + x (tuple[Tensor]): Multi-level features from the upstream + network, each is a 4D-tensor. + batch_data_samples (List[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as + `gt_sem_seg`. + train_cfg (ConfigType): Training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components. + """ + # batch SegDataSample to InstanceDataSample + batch_gt_instances, batch_img_metas = self._seg_data_to_instance_data( + batch_data_samples) + + # forward + all_cls_scores, all_mask_preds = self(x, batch_data_samples) + + # loss + losses = self.loss_by_feat(all_cls_scores, all_mask_preds, + batch_gt_instances, batch_img_metas) + + return losses + + def predict(self, x: Tuple[Tensor], batch_img_metas: List[dict], + test_cfg: ConfigType) -> Tuple[Tensor]: + """Test without augmentaton. + + Args: + x (tuple[Tensor]): Multi-level features from the + upstream network, each is a 4D-tensor. + batch_img_metas (List[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as + `gt_sem_seg`. + test_cfg (ConfigType): Test config. + + Returns: + Tensor: A tensor of segmentation mask. + """ + batch_data_samples = [ + SegDataSample(metainfo=metainfo) for metainfo in batch_img_metas + ] + + all_cls_scores, all_mask_preds = self(x, batch_data_samples) + mask_cls_results = all_cls_scores[-1] + mask_pred_results = all_mask_preds[-1] + if 'pad_shape' in batch_img_metas[0]: + size = batch_img_metas[0]['pad_shape'] + else: + size = batch_img_metas[0]['img_shape'] + # upsample mask + mask_pred_results = F.interpolate( + mask_pred_results, size=size, mode='bilinear', align_corners=False) + cls_score = F.softmax(mask_cls_results, dim=-1)[..., :-1] + mask_pred = mask_pred_results.sigmoid() + seg_logits = torch.einsum('bqc, bqhw->bchw', cls_score, mask_pred) + return seg_logits diff --git a/mmseg/models/decode_heads/maskformer_head.py b/mmseg/models/decode_heads/maskformer_head.py new file mode 100644 index 0000000000000000000000000000000000000000..6e61a7f63a33a508955a866e57c139ce8c40e0f6 --- /dev/null +++ b/mmseg/models/decode_heads/maskformer_head.py @@ -0,0 +1,174 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmengine.model import BaseModule + +try: + from mmdet.models.dense_heads import MaskFormerHead as MMDET_MaskFormerHead +except ModuleNotFoundError: + MMDET_MaskFormerHead = BaseModule + +from mmengine.structures import InstanceData +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.structures.seg_data_sample import SegDataSample +from mmseg.utils import ConfigType, SampleList + + +@MODELS.register_module() +class MaskFormerHead(MMDET_MaskFormerHead): + """Implements the MaskFormer head. + + See `Per-Pixel Classification is Not All You Need for Semantic Segmentation + `_ for details. + + Args: + num_classes (int): Number of classes. Default: 150. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + ignore_index (int): The label index to be ignored. Default: 255. + """ + + def __init__(self, + num_classes: int = 150, + align_corners: bool = False, + ignore_index: int = 255, + **kwargs) -> None: + super().__init__(**kwargs) + + self.out_channels = kwargs['out_channels'] + self.align_corners = True + self.num_classes = num_classes + self.align_corners = align_corners + self.out_channels = num_classes + self.ignore_index = ignore_index + + feat_channels = kwargs['feat_channels'] + self.cls_embed = nn.Linear(feat_channels, self.num_classes + 1) + + def _seg_data_to_instance_data(self, batch_data_samples: SampleList): + """Perform forward propagation to convert paradigm from MMSegmentation + to MMDetection to ensure ``MMDET_MaskFormerHead`` could be called + normally. Specifically, ``batch_gt_instances`` would be added. + + Args: + batch_data_samples (List[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as + `gt_sem_seg`. + + Returns: + tuple[Tensor]: A tuple contains two lists. + + - batch_gt_instances (list[:obj:`InstanceData`]): Batch of + gt_instance. It usually includes ``labels``, each is + unique ground truth label id of images, with + shape (num_gt, ) and ``masks``, each is ground truth + masks of each instances of a image, shape (num_gt, h, w). + - batch_img_metas (list[dict]): List of image meta information. + """ + batch_img_metas = [] + batch_gt_instances = [] + for data_sample in batch_data_samples: + # Add `batch_input_shape` in metainfo of data_sample, which would + # be used in MaskFormerHead of MMDetection. + metainfo = data_sample.metainfo + metainfo['batch_input_shape'] = metainfo['img_shape'] + data_sample.set_metainfo(metainfo) + batch_img_metas.append(data_sample.metainfo) + gt_sem_seg = data_sample.gt_sem_seg.data + classes = torch.unique( + gt_sem_seg, + sorted=False, + return_inverse=False, + return_counts=False) + + # remove ignored region + gt_labels = classes[classes != self.ignore_index] + + masks = [] + for class_id in gt_labels: + masks.append(gt_sem_seg == class_id) + + if len(masks) == 0: + gt_masks = torch.zeros((0, gt_sem_seg.shape[-2], + gt_sem_seg.shape[-1])).to(gt_sem_seg) + else: + gt_masks = torch.stack(masks).squeeze(1) + + instance_data = InstanceData( + labels=gt_labels, masks=gt_masks.long()) + batch_gt_instances.append(instance_data) + return batch_gt_instances, batch_img_metas + + def loss(self, x: Tuple[Tensor], batch_data_samples: SampleList, + train_cfg: ConfigType) -> dict: + """Perform forward propagation and loss calculation of the decoder head + on the features of the upstream network. + + Args: + x (tuple[Tensor]): Multi-level features from the upstream + network, each is a 4D-tensor. + batch_data_samples (List[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as + `gt_sem_seg`. + train_cfg (ConfigType): Training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components. + """ + # batch SegDataSample to InstanceDataSample + batch_gt_instances, batch_img_metas = self._seg_data_to_instance_data( + batch_data_samples) + + # forward + all_cls_scores, all_mask_preds = self(x, batch_data_samples) + + # loss + losses = self.loss_by_feat(all_cls_scores, all_mask_preds, + batch_gt_instances, batch_img_metas) + + return losses + + def predict(self, x: Tuple[Tensor], batch_img_metas: List[dict], + test_cfg: ConfigType) -> Tuple[Tensor]: + """Test without augmentaton. + + Args: + x (tuple[Tensor]): Multi-level features from the + upstream network, each is a 4D-tensor. + batch_img_metas (List[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as + `gt_sem_seg`. + test_cfg (ConfigType): Test config. + + Returns: + Tensor: A tensor of segmentation mask. + """ + + batch_data_samples = [] + for metainfo in batch_img_metas: + metainfo['batch_input_shape'] = metainfo['img_shape'] + batch_data_samples.append(SegDataSample(metainfo=metainfo)) + # Forward function of MaskFormerHead from MMDetection needs + # 'batch_data_samples' as inputs, which is image shape actually. + all_cls_scores, all_mask_preds = self(x, batch_data_samples) + mask_cls_results = all_cls_scores[-1] + mask_pred_results = all_mask_preds[-1] + + # upsample masks + img_shape = batch_img_metas[0]['batch_input_shape'] + mask_pred_results = F.interpolate( + mask_pred_results, + size=img_shape, + mode='bilinear', + align_corners=False) + + # semantic inference + cls_score = F.softmax(mask_cls_results, dim=-1)[..., :-1] + mask_pred = mask_pred_results.sigmoid() + seg_logits = torch.einsum('bqc,bqhw->bchw', cls_score, mask_pred) + return seg_logits diff --git a/mmseg/models/decode_heads/nl_head.py b/mmseg/models/decode_heads/nl_head.py new file mode 100644 index 0000000000000000000000000000000000000000..0ffcc2a2f081127f109deb0ad5bd1be0d6f50493 --- /dev/null +++ b/mmseg/models/decode_heads/nl_head.py @@ -0,0 +1,50 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.cnn import NonLocal2d + +from mmseg.registry import MODELS +from .fcn_head import FCNHead + + +@MODELS.register_module() +class NLHead(FCNHead): + """Non-local Neural Networks. + + This head is the implementation of `NLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: True. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + **kwargs): + super().__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.nl_block = NonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.nl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/ocr_head.py b/mmseg/models/decode_heads/ocr_head.py new file mode 100644 index 0000000000000000000000000000000000000000..9afe37bebd6c16ff184dc482ae358eb7ae9a093a --- /dev/null +++ b/mmseg/models/decode_heads/ocr_head.py @@ -0,0 +1,127 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from ..utils import resize +from .cascade_decode_head import BaseCascadeDecodeHead + + +class SpatialGatherModule(nn.Module): + """Aggregate the context features according to the initial predicted + probability distribution. + + Employ the soft-weighted method to aggregate the context. + """ + + def __init__(self, scale): + super().__init__() + self.scale = scale + + def forward(self, feats, probs): + """Forward function.""" + batch_size, num_classes, height, width = probs.size() + channels = feats.size(1) + probs = probs.view(batch_size, num_classes, -1) + feats = feats.view(batch_size, channels, -1) + # [batch_size, height*width, num_classes] + feats = feats.permute(0, 2, 1) + # [batch_size, channels, height*width] + probs = F.softmax(self.scale * probs, dim=2) + # [batch_size, channels, num_classes] + ocr_context = torch.matmul(probs, feats) + ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3) + return ocr_context + + +class ObjectAttentionBlock(_SelfAttentionBlock): + """Make a OCR used SelfAttentionBlock.""" + + def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, + act_cfg): + if scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=scale) + else: + query_downsample = None + super().__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=query_downsample, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=True, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.bottleneck = ConvModule( + in_channels * 2, + in_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, query_feats, key_feats): + """Forward function.""" + context = super().forward(query_feats, key_feats) + output = self.bottleneck(torch.cat([context, query_feats], dim=1)) + if self.query_downsample is not None: + output = resize(query_feats) + + return output + + +@MODELS.register_module() +class OCRHead(BaseCascadeDecodeHead): + """Object-Contextual Representations for Semantic Segmentation. + + This head is the implementation of `OCRNet + `_. + + Args: + ocr_channels (int): The intermediate channels of OCR block. + scale (int): The scale of probability map in SpatialGatherModule in + Default: 1. + """ + + def __init__(self, ocr_channels, scale=1, **kwargs): + super().__init__(**kwargs) + self.ocr_channels = ocr_channels + self.scale = scale + self.object_context_block = ObjectAttentionBlock( + self.channels, + self.ocr_channels, + self.scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.spatial_gather_module = SpatialGatherModule(self.scale) + + self.bottleneck = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs, prev_output): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.bottleneck(x) + context = self.spatial_gather_module(feats, prev_output) + object_context = self.object_context_block(feats, context) + output = self.cls_seg(object_context) + + return output diff --git a/mmseg/models/decode_heads/pid_head.py b/mmseg/models/decode_heads/pid_head.py new file mode 100644 index 0000000000000000000000000000000000000000..c092cb32d07c279c1d6a45d2e02baccb8e5ffa33 --- /dev/null +++ b/mmseg/models/decode_heads/pid_head.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Tuple, Union + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer +from mmengine.model import BaseModule +from torch import Tensor + +from mmseg.models.decode_heads.decode_head import BaseDecodeHead +from mmseg.models.losses import accuracy +from mmseg.models.utils import resize +from mmseg.registry import MODELS +from mmseg.utils import OptConfigType, SampleList + + +class BasePIDHead(BaseModule): + """Base class for PID head. + + Args: + in_channels (int): Number of input channels. + channels (int): Number of output channels. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict or list[dict], optional): Init config dict. + Default: None. + """ + + def __init__(self, + in_channels: int, + channels: int, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv = ConvModule( + in_channels, + channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + order=('norm', 'act', 'conv')) + _, self.norm = build_norm_layer(norm_cfg, num_features=channels) + self.act = build_activation_layer(act_cfg) + + def forward(self, x: Tensor, cls_seg: Optional[nn.Module]) -> Tensor: + """Forward function. + Args: + x (Tensor): Input tensor. + cls_seg (nn.Module, optional): The classification head. + + Returns: + Tensor: Output tensor. + """ + x = self.conv(x) + x = self.norm(x) + x = self.act(x) + if cls_seg is not None: + x = cls_seg(x) + return x + + +@MODELS.register_module() +class PIDHead(BaseDecodeHead): + """Decode head for PIDNet. + + Args: + in_channels (int): Number of input channels. + channels (int): Number of output channels. + num_classes (int): Number of classes. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + """ + + def __init__(self, + in_channels: int, + channels: int, + num_classes: int, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + **kwargs): + super().__init__( + in_channels, + channels, + num_classes=num_classes, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs) + self.i_head = BasePIDHead(in_channels, channels, norm_cfg, act_cfg) + self.p_head = BasePIDHead(in_channels // 2, channels, norm_cfg, + act_cfg) + self.d_head = BasePIDHead( + in_channels // 2, + in_channels // 4, + norm_cfg, + ) + self.p_cls_seg = nn.Conv2d(channels, self.out_channels, kernel_size=1) + self.d_cls_seg = nn.Conv2d(in_channels // 4, 1, kernel_size=1) + + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def forward( + self, + inputs: Union[Tensor, + Tuple[Tensor]]) -> Union[Tensor, Tuple[Tensor]]: + """Forward function. + Args: + inputs (Tensor | tuple[Tensor]): Input tensor or tuple of + Tensor. When training, the input is a tuple of three tensors, + (p_feat, i_feat, d_feat), and the output is a tuple of three + tensors, (p_seg_logit, i_seg_logit, d_seg_logit). + When inference, only the head of integral branch is used, and + input is a tensor of integral feature map, and the output is + the segmentation logit. + + Returns: + Tensor | tuple[Tensor]: Output tensor or tuple of tensors. + """ + if self.training: + x_p, x_i, x_d = inputs + x_p = self.p_head(x_p, self.p_cls_seg) + x_i = self.i_head(x_i, self.cls_seg) + x_d = self.d_head(x_d, self.d_cls_seg) + return x_p, x_i, x_d + else: + return self.i_head(inputs, self.cls_seg) + + def _stack_batch_gt(self, batch_data_samples: SampleList) -> Tuple[Tensor]: + gt_semantic_segs = [ + data_sample.gt_sem_seg.data for data_sample in batch_data_samples + ] + gt_edge_segs = [ + data_sample.gt_edge_map.data for data_sample in batch_data_samples + ] + gt_sem_segs = torch.stack(gt_semantic_segs, dim=0) + gt_edge_segs = torch.stack(gt_edge_segs, dim=0) + return gt_sem_segs, gt_edge_segs + + def loss_by_feat(self, seg_logits: Tuple[Tensor], + batch_data_samples: SampleList) -> dict: + loss = dict() + p_logit, i_logit, d_logit = seg_logits + sem_label, bd_label = self._stack_batch_gt(batch_data_samples) + p_logit = resize( + input=p_logit, + size=sem_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + i_logit = resize( + input=i_logit, + size=sem_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + d_logit = resize( + input=d_logit, + size=bd_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + sem_label = sem_label.squeeze(1) + bd_label = bd_label.squeeze(1) + loss['loss_sem_p'] = self.loss_decode[0]( + p_logit, sem_label, ignore_index=self.ignore_index) + loss['loss_sem_i'] = self.loss_decode[1](i_logit, sem_label) + loss['loss_bd'] = self.loss_decode[2](d_logit, bd_label) + filler = torch.ones_like(sem_label) * self.ignore_index + sem_bd_label = torch.where( + torch.sigmoid(d_logit[:, 0, :, :]) > 0.8, sem_label, filler) + loss['loss_sem_bd'] = self.loss_decode[3](i_logit, sem_bd_label) + loss['acc_seg'] = accuracy( + i_logit, sem_label, ignore_index=self.ignore_index) + return loss diff --git a/mmseg/models/decode_heads/point_head.py b/mmseg/models/decode_heads/point_head.py new file mode 100644 index 0000000000000000000000000000000000000000..e8e433d66249a4690cea3e33e95ec54d58ee3a07 --- /dev/null +++ b/mmseg/models/decode_heads/point_head.py @@ -0,0 +1,367 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py # noqa + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +try: + from mmcv.ops import point_sample +except ModuleNotFoundError: + point_sample = None + +from typing import List + +from mmseg.registry import MODELS +from mmseg.utils import SampleList +from ..losses import accuracy +from ..utils import resize +from .cascade_decode_head import BaseCascadeDecodeHead + + +def calculate_uncertainty(seg_logits): + """Estimate uncertainty based on seg logits. + + For each location of the prediction ``seg_logits`` we estimate + uncertainty as the difference between top first and top second + predicted logits. + + Args: + seg_logits (Tensor): Semantic segmentation logits, + shape (batch_size, num_classes, height, width). + + Returns: + scores (Tensor): T uncertainty scores with the most uncertain + locations having the highest uncertainty score, shape ( + batch_size, 1, height, width) + """ + top2_scores = torch.topk(seg_logits, k=2, dim=1)[0] + return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) + + +@MODELS.register_module() +class PointHead(BaseCascadeDecodeHead): + """A mask point head use in PointRend. + + This head is implemented of `PointRend: Image Segmentation as + Rendering `_. + ``PointHead`` use shared multi-layer perceptron (equivalent to + nn.Conv1d) to predict the logit of input points. The fine-grained feature + and coarse feature will be concatenate together for predication. + + Args: + num_fcs (int): Number of fc layers in the head. Default: 3. + in_channels (int): Number of input channels. Default: 256. + fc_channels (int): Number of fc channels. Default: 256. + num_classes (int): Number of classes for logits. Default: 80. + class_agnostic (bool): Whether use class agnostic classification. + If so, the output channels of logits will be 1. Default: False. + coarse_pred_each_layer (bool): Whether concatenate coarse feature with + the output of each fc layer. Default: True. + conv_cfg (dict|None): Dictionary to construct and config conv layer. + Default: dict(type='Conv1d')) + norm_cfg (dict|None): Dictionary to construct and config norm layer. + Default: None. + loss_point (dict): Dictionary to construct and config loss layer of + point head. Default: dict(type='CrossEntropyLoss', use_mask=True, + loss_weight=1.0). + """ + + def __init__(self, + num_fcs=3, + coarse_pred_each_layer=True, + conv_cfg=dict(type='Conv1d'), + norm_cfg=None, + act_cfg=dict(type='ReLU', inplace=False), + **kwargs): + super().__init__( + input_transform='multiple_select', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + init_cfg=dict( + type='Normal', std=0.01, override=dict(name='fc_seg')), + **kwargs) + if point_sample is None: + raise RuntimeError('Please install mmcv-full for ' + 'point_sample ops') + + self.num_fcs = num_fcs + self.coarse_pred_each_layer = coarse_pred_each_layer + + fc_in_channels = sum(self.in_channels) + self.num_classes + fc_channels = self.channels + self.fcs = nn.ModuleList() + for k in range(num_fcs): + fc = ConvModule( + fc_in_channels, + fc_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.fcs.append(fc) + fc_in_channels = fc_channels + fc_in_channels += self.num_classes if self.coarse_pred_each_layer \ + else 0 + self.fc_seg = nn.Conv1d( + fc_in_channels, + self.num_classes, + kernel_size=1, + stride=1, + padding=0) + if self.dropout_ratio > 0: + self.dropout = nn.Dropout(self.dropout_ratio) + delattr(self, 'conv_seg') + + def cls_seg(self, feat): + """Classify each pixel with fc.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.fc_seg(feat) + return output + + def forward(self, fine_grained_point_feats, coarse_point_feats): + x = torch.cat([fine_grained_point_feats, coarse_point_feats], dim=1) + for fc in self.fcs: + x = fc(x) + if self.coarse_pred_each_layer: + x = torch.cat((x, coarse_point_feats), dim=1) + return self.cls_seg(x) + + def _get_fine_grained_point_feats(self, x, points): + """Sample from fine grained features. + + Args: + x (list[Tensor]): Feature pyramid from by neck or backbone. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + fine_grained_feats (Tensor): Sampled fine grained feature, + shape (batch_size, sum(channels of x), num_points). + """ + + fine_grained_feats_list = [ + point_sample(_, points, align_corners=self.align_corners) + for _ in x + ] + if len(fine_grained_feats_list) > 1: + fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1) + else: + fine_grained_feats = fine_grained_feats_list[0] + + return fine_grained_feats + + def _get_coarse_point_feats(self, prev_output, points): + """Sample from fine grained features. + + Args: + prev_output (list[Tensor]): Prediction of previous decode head. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + coarse_feats (Tensor): Sampled coarse feature, shape (batch_size, + num_classes, num_points). + """ + + coarse_feats = point_sample( + prev_output, points, align_corners=self.align_corners) + + return coarse_feats + + def loss(self, inputs, prev_output, batch_data_samples: SampleList, + train_cfg, **kwargs): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + batch_data_samples (list[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `img_metas` or `gt_semantic_seg`. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + x = self._transform_inputs(inputs) + with torch.no_grad(): + points = self.get_points_train( + prev_output, calculate_uncertainty, cfg=train_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats(prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + + losses = self.loss_by_feat(point_logits, points, batch_data_samples) + + return losses + + def predict(self, inputs, prev_output, batch_img_metas: List[dict], + test_cfg, **kwargs): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + + x = self._transform_inputs(inputs) + refined_seg_logits = prev_output.clone() + for _ in range(test_cfg.subdivision_steps): + refined_seg_logits = resize( + refined_seg_logits, + scale_factor=test_cfg.scale_factor, + mode='bilinear', + align_corners=self.align_corners) + batch_size, channels, height, width = refined_seg_logits.shape + point_indices, points = self.get_points_test( + refined_seg_logits, calculate_uncertainty, cfg=test_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats( + prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + + point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1) + refined_seg_logits = refined_seg_logits.reshape( + batch_size, channels, height * width) + refined_seg_logits = refined_seg_logits.scatter_( + 2, point_indices, point_logits) + refined_seg_logits = refined_seg_logits.view( + batch_size, channels, height, width) + + return self.predict_by_feat(refined_seg_logits, batch_img_metas, + **kwargs) + + def loss_by_feat(self, point_logits, points, batch_data_samples, **kwargs): + """Compute segmentation loss.""" + gt_semantic_seg = self._stack_batch_gt(batch_data_samples) + point_label = point_sample( + gt_semantic_seg.float(), + points, + mode='nearest', + align_corners=self.align_corners) + point_label = point_label.squeeze(1).long() + + loss = dict() + if not isinstance(self.loss_decode, nn.ModuleList): + losses_decode = [self.loss_decode] + else: + losses_decode = self.loss_decode + for loss_module in losses_decode: + loss['point' + loss_module.loss_name] = loss_module( + point_logits, point_label, ignore_index=self.ignore_index) + + loss['acc_point'] = accuracy( + point_logits, point_label, ignore_index=self.ignore_index) + return loss + + def get_points_train(self, seg_logits, uncertainty_func, cfg): + """Sample points for training. + + Sample points in [0, 1] x [0, 1] coordinate space based on their + uncertainty. The uncertainties are calculated for each point using + 'uncertainty_func' function that takes point's logit prediction as + input. + + Args: + seg_logits (Tensor): Semantic segmentation logits, shape ( + batch_size, num_classes, height, width). + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Training config of point head. + + Returns: + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains the coordinates of ``num_points`` sampled + points. + """ + num_points = cfg.num_points + oversample_ratio = cfg.oversample_ratio + importance_sample_ratio = cfg.importance_sample_ratio + assert oversample_ratio >= 1 + assert 0 <= importance_sample_ratio <= 1 + batch_size = seg_logits.shape[0] + num_sampled = int(num_points * oversample_ratio) + point_coords = torch.rand( + batch_size, num_sampled, 2, device=seg_logits.device) + point_logits = point_sample(seg_logits, point_coords) + # It is crucial to calculate uncertainty based on the sampled + # prediction value for the points. Calculating uncertainties of the + # coarse predictions first and sampling them for points leads to + # incorrect results. To illustrate this: assume uncertainty func( + # logits)=-abs(logits), a sampled point between two coarse + # predictions with -1 and 1 logits has 0 logits, and therefore 0 + # uncertainty value. However, if we calculate uncertainties for the + # coarse predictions first, both will have -1 uncertainty, + # and sampled point will get -1 uncertainty. + point_uncertainties = uncertainty_func(point_logits) + num_uncertain_points = int(importance_sample_ratio * num_points) + num_random_points = num_points - num_uncertain_points + idx = torch.topk( + point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] + shift = num_sampled * torch.arange( + batch_size, dtype=torch.long, device=seg_logits.device) + idx += shift[:, None] + point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( + batch_size, num_uncertain_points, 2) + if num_random_points > 0: + rand_point_coords = torch.rand( + batch_size, num_random_points, 2, device=seg_logits.device) + point_coords = torch.cat((point_coords, rand_point_coords), dim=1) + return point_coords + + def get_points_test(self, seg_logits, uncertainty_func, cfg): + """Sample points for testing. + + Find ``num_points`` most uncertain points from ``uncertainty_map``. + + Args: + seg_logits (Tensor): A tensor of shape (batch_size, num_classes, + height, width) for class-specific or class-agnostic prediction. + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Testing config of point head. + + Returns: + point_indices (Tensor): A tensor of shape (batch_size, num_points) + that contains indices from [0, height x width) of the most + uncertain points. + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains [0, 1] x [0, 1] normalized coordinates of the + most uncertain points from the ``height x width`` grid . + """ + + num_points = cfg.subdivision_num_points + uncertainty_map = uncertainty_func(seg_logits) + batch_size, _, height, width = uncertainty_map.shape + h_step = 1.0 / height + w_step = 1.0 / width + + uncertainty_map = uncertainty_map.view(batch_size, height * width) + num_points = min(height * width, num_points) + point_indices = uncertainty_map.topk(num_points, dim=1)[1] + point_coords = torch.zeros( + batch_size, + num_points, + 2, + dtype=torch.float, + device=seg_logits.device) + point_coords[:, :, 0] = w_step / 2.0 + (point_indices % + width).float() * w_step + point_coords[:, :, 1] = h_step / 2.0 + (point_indices // + width).float() * h_step + return point_indices, point_coords diff --git a/mmseg/models/decode_heads/psa_head.py b/mmseg/models/decode_heads/psa_head.py new file mode 100644 index 0000000000000000000000000000000000000000..13ee5c58a569bb46612625b85685cd61b7e9df3e --- /dev/null +++ b/mmseg/models/decode_heads/psa_head.py @@ -0,0 +1,197 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import resize +from .decode_head import BaseDecodeHead + +try: + from mmcv.ops import PSAMask +except ModuleNotFoundError: + PSAMask = None + + +@MODELS.register_module() +class PSAHead(BaseDecodeHead): + """Point-wise Spatial Attention Network for Scene Parsing. + + This head is the implementation of `PSANet + `_. + + Args: + mask_size (tuple[int]): The PSA mask size. It usually equals input + size. + psa_type (str): The type of psa module. Options are 'collect', + 'distribute', 'bi-direction'. Default: 'bi-direction' + compact (bool): Whether use compact map for 'collect' mode. + Default: True. + shrink_factor (int): The downsample factors of psa mask. Default: 2. + normalization_factor (float): The normalize factor of attention. + psa_softmax (bool): Whether use softmax for attention. + """ + + def __init__(self, + mask_size, + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + **kwargs): + if PSAMask is None: + raise RuntimeError('Please install mmcv-full for PSAMask ops') + super().__init__(**kwargs) + assert psa_type in ['collect', 'distribute', 'bi-direction'] + self.psa_type = psa_type + self.compact = compact + self.shrink_factor = shrink_factor + self.mask_size = mask_size + mask_h, mask_w = mask_size + self.psa_softmax = psa_softmax + if normalization_factor is None: + normalization_factor = mask_h * mask_w + self.normalization_factor = normalization_factor + + self.reduce = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + if psa_type == 'bi-direction': + self.reduce_p = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention_p = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + self.psamask_collect = PSAMask('collect', mask_size) + self.psamask_distribute = PSAMask('distribute', mask_size) + else: + self.psamask = PSAMask(psa_type, mask_size) + self.proj = ConvModule( + self.channels * (2 if psa_type == 'bi-direction' else 1), + self.in_channels, + kernel_size=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + self.in_channels * 2, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + identity = x + align_corners = self.align_corners + if self.psa_type in ['collect', 'distribute']: + out = self.reduce(x) + n, c, h, w = out.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + out = resize( + out, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y = self.attention(out) + if self.compact: + if self.psa_type == 'collect': + y = y.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y = self.psamask(y) + if self.psa_softmax: + y = F.softmax(y, dim=1) + out = torch.bmm( + out.view(n, c, h * w), y.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + else: + x_col = self.reduce(x) + x_dis = self.reduce_p(x) + n, c, h, w = x_col.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + x_col = resize( + x_col, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + x_dis = resize( + x_dis, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y_col = self.attention(x_col) + y_dis = self.attention_p(x_dis) + if self.compact: + y_dis = y_dis.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y_col = self.psamask_collect(y_col) + y_dis = self.psamask_distribute(y_dis) + if self.psa_softmax: + y_col = F.softmax(y_col, dim=1) + y_dis = F.softmax(y_dis, dim=1) + x_col = torch.bmm( + x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + x_dis = torch.bmm( + x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + out = torch.cat([x_col, x_dis], 1) + out = self.proj(out) + out = resize( + out, + size=identity.shape[2:], + mode='bilinear', + align_corners=align_corners) + out = self.bottleneck(torch.cat((identity, out), dim=1)) + out = self.cls_seg(out) + return out diff --git a/mmseg/models/decode_heads/psp_head.py b/mmseg/models/decode_heads/psp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..a40ec41dec281e53815e9753ee2ba1a5da76bd05 --- /dev/null +++ b/mmseg/models/decode_heads/psp_head.py @@ -0,0 +1,117 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import resize +from .decode_head import BaseDecodeHead + + +class PPM(nn.ModuleList): + """Pooling Pyramid Module used in PSPNet. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + align_corners (bool): align_corners argument of F.interpolate. + """ + + def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, + act_cfg, align_corners, **kwargs): + super().__init__() + self.pool_scales = pool_scales + self.align_corners = align_corners + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for pool_scale in pool_scales: + self.append( + nn.Sequential( + nn.AdaptiveAvgPool2d(pool_scale), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + **kwargs))) + + def forward(self, x): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(x) + upsampled_ppm_out = resize( + ppm_out, + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ppm_outs.append(upsampled_ppm_out) + return ppm_outs + + +@MODELS.register_module() +class PSPHead(BaseDecodeHead): + """Pyramid Scene Parsing Network. + + This head is the implementation of + `PSPNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super().__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.psp_modules = PPM( + self.pool_scales, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _forward_feature(self, inputs): + """Forward function for feature maps before classifying each pixel with + ``self.cls_seg`` fc. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + feats (Tensor): A tensor of shape (batch_size, self.channels, + H, W) which is feature map for last layer of decoder head. + """ + x = self._transform_inputs(inputs) + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + feats = self.bottleneck(psp_outs) + return feats + + def forward(self, inputs): + """Forward function.""" + output = self._forward_feature(inputs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/san_head.py b/mmseg/models/decode_heads/san_head.py new file mode 100644 index 0000000000000000000000000000000000000000..d20da801924080efeee30a246331af2e2e5df352 --- /dev/null +++ b/mmseg/models/decode_heads/san_head.py @@ -0,0 +1,736 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from functools import partial +from typing import Dict, List, Tuple + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, build_norm_layer +from mmcv.cnn.bricks.transformer import BaseTransformerLayer +from mmcv.ops import point_sample +from mmengine.dist import all_reduce +from mmengine.model.weight_init import (caffe2_xavier_init, normal_init, + trunc_normal_) +from mmengine.runner.checkpoint import CheckpointLoader, load_state_dict +from mmengine.structures import InstanceData +from torch import Tensor +from torch.nn import functional as F + +from mmseg.models.backbones.vit import TransformerEncoderLayer +from mmseg.registry import MODELS +from mmseg.utils import (ConfigType, MatchMasks, SampleList, + seg_data_to_instance_data) +from ..utils import (MLP, LayerNorm2d, PatchEmbed, cross_attn_layer, + get_uncertain_point_coords_with_randomness, resize) +from .decode_head import BaseDecodeHead + + +class MLPMaskDecoder(nn.Module): + """Module for decoding query and visual features with MLP layers to + generate the attention biases and the mask proposals.""" + + def __init__( + self, + *, + in_channels: int, + total_heads: int = 1, + total_layers: int = 1, + embed_channels: int = 256, + mlp_channels: int = 256, + mlp_num_layers: int = 3, + rescale_attn_bias: bool = False, + ): + super().__init__() + self.total_heads = total_heads + self.total_layers = total_layers + + dense_affine_func = partial(nn.Conv2d, kernel_size=1) + # Query Branch + self.query_mlp = MLP(in_channels, mlp_channels, embed_channels, + mlp_num_layers) + # Pixel Branch + self.pix_mlp = MLP( + in_channels, + mlp_channels, + embed_channels, + mlp_num_layers, + affine_func=dense_affine_func, + ) + # Attention Bias Branch + self.attn_mlp = MLP( + in_channels, + mlp_channels, + embed_channels * self.total_heads * self.total_layers, + mlp_num_layers, + affine_func=dense_affine_func, + ) + if rescale_attn_bias: + self.bias_scaling = nn.Linear(1, 1) + else: + self.bias_scaling = nn.Identity() + + def forward(self, query: torch.Tensor, + x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]: + """Forward function. + Args: + query (Tensor): Query Tokens [B,N,C]. + x (Tensor): Visual features [B,C,H,W] + + Return: + mask_preds (Tensor): Mask proposals. + attn_bias (List[Tensor]): List of attention bias. + """ + query = self.query_mlp(query) + pix = self.pix_mlp(x) + b, c, h, w = pix.shape + # preidict mask + mask_preds = torch.einsum('bqc,bchw->bqhw', query, pix) + # generate attn bias + attn = self.attn_mlp(x) + attn = attn.reshape(b, self.total_layers, self.total_heads, c, h, w) + attn_bias = torch.einsum('bqc,blnchw->blnqhw', query, attn) + attn_bias = self.bias_scaling(attn_bias[..., None]).squeeze(-1) + attn_bias = attn_bias.chunk(self.total_layers, dim=1) + attn_bias = [attn.squeeze(1) for attn in attn_bias] + return mask_preds, attn_bias + + +class SideAdapterNetwork(nn.Module): + """Side Adapter Network for predicting mask proposals and attention bias. + + Args: + in_channels (int): Number of input channels. Default: 3. + clip_channels (int): Number of channels of visual features. + Default: 768. + embed_dims (int): embedding dimension. Default: 240. + patch_size (int): The patch size. Default: 16. + patch_bias (bool): Whether use bias in patch embedding. + Default: True. + num_queries (int): Number of queries for mask proposals. + Default: 100. + fusion_index (List[int]): The layer number of the encode + transformer to fuse with the CLIP feature. + Default: [0, 1, 2, 3]. + cfg_encoder (ConfigType): Configs for the encode layers. + cfg_decoder (ConfigType): Configs for the decode layers. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + """ + + def __init__( + self, + in_channels: int = 3, + clip_channels: int = 768, + embed_dims: int = 240, + patch_size: int = 16, + patch_bias: bool = True, + num_queries: int = 100, + fusion_index: list = [0, 1, 2, 3], + cfg_encoder: ConfigType = ..., + cfg_decoder: ConfigType = ..., + norm_cfg: dict = dict(type='LN'), + ): + super().__init__() + + self.patch_embed = PatchEmbed( + in_channels=in_channels, + embed_dims=embed_dims, + conv_type='Conv2d', + kernel_size=patch_size, + stride=patch_size, + padding=0, + input_size=(640, 640), + bias=patch_bias, + norm_cfg=None, + init_cfg=None, + ) + ori_h, ori_w = self.patch_embed.init_out_size + num_patches = ori_h * ori_w + self.pos_embed = nn.Parameter( + torch.randn(1, num_patches, embed_dims) * .02) + self.query_pos_embed = nn.Parameter( + torch.zeros(1, num_queries, embed_dims)) + self.query_embed = nn.Parameter( + torch.zeros(1, num_queries, embed_dims)) + encode_layers = [] + for i in range(cfg_encoder.num_encode_layer): + encode_layers.append( + TransformerEncoderLayer( + embed_dims=embed_dims, + num_heads=cfg_encoder.num_heads, + feedforward_channels=cfg_encoder.mlp_ratio * embed_dims, + norm_cfg=norm_cfg)) + self.encode_layers = nn.ModuleList(encode_layers) + conv_clips = [] + for i in range(len(fusion_index)): + conv_clips.append( + nn.Sequential( + LayerNorm2d(clip_channels), + ConvModule( + clip_channels, + embed_dims, + kernel_size=1, + norm_cfg=None, + act_cfg=None))) + self.conv_clips = nn.ModuleList(conv_clips) + self.fusion_index = fusion_index + self.mask_decoder = MLPMaskDecoder( + in_channels=embed_dims, + total_heads=cfg_decoder.num_heads, + total_layers=cfg_decoder.num_layers, + embed_channels=cfg_decoder.embed_channels, + mlp_channels=cfg_decoder.mlp_channels, + mlp_num_layers=cfg_decoder.num_mlp, + rescale_attn_bias=cfg_decoder.rescale) + + def init_weights(self): + trunc_normal_(self.pos_embed, std=0.02) + nn.init.normal_(self.query_embed, std=0.02) + nn.init.normal_(self.query_pos_embed, std=0.02) + for i in range(len(self.conv_clips)): + caffe2_xavier_init(self.conv_clips[i][1].conv) + + def fuse_clip(self, fused_index: int, x: torch.Tensor, + clip_feature: torch.Tensor, hwshape: Tuple[int, + int], L: int): + """Fuse CLIP feature and visual tokens.""" + fused_clip = (resize( + self.conv_clips[fused_index](clip_feature.contiguous()), + size=hwshape, + mode='bilinear', + align_corners=False)).permute(0, 2, 3, 1).reshape(x[:, -L:, + ...].shape) + x = torch.cat([x[:, :-L, ...], x[:, -L:, ...] + fused_clip], dim=1) + return x + + def encode_feature(self, image: torch.Tensor, + clip_features: List[torch.Tensor], + deep_supervision_idxs: List[int]) -> List[List]: + """Encode images by a lightweight vision transformer.""" + assert len(self.fusion_index) == len(clip_features) + x, hwshape = self.patch_embed(image) + ori_h, ori_w = self.patch_embed.init_out_size + pos_embed = self.pos_embed + if self.pos_embed.shape[1] != x.shape[1]: + # resize the position embedding + pos_embed = ( + resize( + self.pos_embed.reshape(1, ori_h, ori_w, + -1).permute(0, 3, 1, 2), + size=hwshape, + mode='bicubic', + align_corners=False, + ).flatten(2).permute(0, 2, 1)) + pos_embed = torch.cat([ + self.query_pos_embed.expand(pos_embed.shape[0], -1, -1), pos_embed + ], + dim=1) + x = torch.cat([self.query_embed.expand(x.shape[0], -1, -1), x], dim=1) + x = x + pos_embed + L = hwshape[0] * hwshape[1] + fused_index = 0 + if self.fusion_index[fused_index] == 0: + x = self.fuse_clip(fused_index, x, clip_features[0][0], hwshape, L) + fused_index += 1 + outs = [] + for index, block in enumerate(self.encode_layers, start=1): + x = block(x) + if index < len(self.fusion_index + ) and index == self.fusion_index[fused_index]: + x = self.fuse_clip(fused_index, x, + clip_features[fused_index][0], hwshape, L) + fused_index += 1 + x_query = x[:, :-L, ...] + x_feat = x[:, -L:, ...].permute(0, 2, 1)\ + .reshape(x.shape[0], x.shape[-1], hwshape[0], hwshape[1]) + + if index in deep_supervision_idxs or index == len( + self.encode_layers): + outs.append({'query': x_query, 'x': x_feat}) + + if index < len(self.encode_layers): + x = x + pos_embed + return outs + + def decode_feature(self, features): + mask_embeds = [] + attn_biases = [] + for feature in features: + mask_embed, attn_bias = self.mask_decoder(**feature) + mask_embeds.append(mask_embed) + attn_biases.append(attn_bias) + return mask_embeds, attn_biases + + def forward( + self, image: torch.Tensor, clip_features: List[torch.Tensor], + deep_supervision_idxs: List[int] + ) -> Tuple[List[torch.Tensor], List[List[torch.Tensor]]]: + """Forward function.""" + features = self.encode_feature(image, clip_features, + deep_supervision_idxs) + mask_embeds, attn_biases = self.decode_feature(features) + return mask_embeds, attn_biases + + +class RecWithAttnbias(nn.Module): + """Mask recognition module by applying the attention biases to rest deeper + CLIP layers. + + Args: + sos_token_format (str): The format of sos token. It should be + chosen from ["cls_token", "learnable_token", "pos_embedding"]. + Default: 'cls_token'. + sos_token_num (int): Number of sos token. It should be equal to + the number of quries. Default: 100. + num_layers (int): Number of rest CLIP layers for mask recognition. + Default: 3. + cross_attn (bool): Whether use cross attention to update sos token. + Default: False. + embed_dims (int): The feature dimension of CLIP layers. + Default: 768. + num_heads (int): Parallel attention heads of CLIP layers. + Default: 768. + mlp_ratio (int): Ratio of mlp hidden dim to embedding dim. + Default: 4. + qkv_bias (bool): Whether to use bias in multihead-attention. + Default: True. + out_dims (int): Number of channels of the output mask proposals. + It should be equal to the out_dims of text_encoder. + Default: 512. + final_norm (True): Whether use norm layer for sos token. + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + frozen_exclude (List): List of parameters that are not to be frozen. + """ + + def __init__(self, + sos_token_format: str = 'cls_token', + sos_token_num: int = 100, + num_layers: int = 3, + cross_attn: bool = False, + embed_dims: int = 768, + num_heads: int = 12, + mlp_ratio: int = 4, + num_fcs: int = 2, + qkv_bias: bool = True, + out_dims: int = 512, + final_norm: bool = True, + act_cfg: dict = dict(type='GELU'), + norm_cfg: dict = dict(type='LN'), + frozen_exclude: List = []): + super().__init__() + + assert sos_token_format in [ + 'cls_token', 'learnable_token', 'pos_embedding' + ] + self.sos_token_format = sos_token_format + self.sos_token_num = sos_token_num + self.frozen_exclude = frozen_exclude + self.cross_attn = cross_attn + self.num_layers = num_layers + self.num_heads = num_heads + if sos_token_format in ['learnable_token', 'pos_embedding']: + self.sos_token = nn.Parameter( + torch.randn(sos_token_num, 1, self.proj.shape[0])) + self.frozen.append('sos_token') + + layers = [] + for i in range(num_layers): + layers.append( + BaseTransformerLayer( + attn_cfgs=dict( + type='MultiheadAttention', + embed_dims=embed_dims, + num_heads=num_heads, + batch_first=False, + bias=qkv_bias), + ffn_cfgs=dict( + type='FFN', + embed_dims=embed_dims, + feedforward_channels=mlp_ratio * embed_dims, + act_cfg=act_cfg), + operation_order=('norm', 'self_attn', 'norm', 'ffn'))) + self.layers = nn.ModuleList(layers) + + self.ln_post = build_norm_layer(norm_cfg, embed_dims)[1] + self.proj = nn.Linear(embed_dims, out_dims, bias=False) + + self.final_norm = final_norm + self._freeze() + + def init_weights(self, rec_state_dict): + if hasattr(self, 'sos_token'): + normal_init(self.sos_token, std=0.02) + if rec_state_dict is not None: + load_state_dict(self, rec_state_dict, strict=False, logger=None) + else: + super().init_weights() + + def _freeze(self): + if 'all' in self.frozen_exclude: + return + for name, param in self.named_parameters(): + if not any([exclude in name for exclude in self.frozen_exclude]): + param.requires_grad = False + + def _build_attn_biases(self, attn_biases, target_shape): + formatted_attn_biases = [] + for attn_bias in attn_biases: + # convert it to proper format: N*num_head,L,L + # attn_bias: [N, num_head/1, num_sos,H,W] + n, num_head, num_sos, h, w = attn_bias.shape + # reshape and downsample + attn_bias = F.adaptive_max_pool2d( + attn_bias.reshape(n, num_head * num_sos, h, w), + output_size=target_shape) + attn_bias = attn_bias.reshape(n, num_head, num_sos, *target_shape) + + true_num_head = self.num_heads + assert (num_head == 1 or num_head + == true_num_head), f'num_head={num_head} is not supported.' + if num_head == 1: + attn_bias = attn_bias.repeat(1, true_num_head, 1, 1, 1) + attn_bias = attn_bias.reshape(n * true_num_head, num_sos, -1) + L = attn_bias.shape[-1] + if self.cross_attn: + # [n*num_head, num_sos, L] + formatted_attn_biases.append(attn_bias) + else: + # [n*num_head, num_sos+1+L, num_sos+1+L] + new_attn_bias = attn_bias.new_zeros(num_sos + 1 + L, + num_sos + 1 + L) + new_attn_bias[:, :num_sos] = -100 + new_attn_bias[torch.arange(num_sos), torch.arange(num_sos)] = 0 + new_attn_bias[:num_sos, num_sos] = -100 + new_attn_bias = ( + new_attn_bias[None, ...].expand(n * true_num_head, -1, + -1).clone()) + new_attn_bias[..., :num_sos, -L:] = attn_bias + formatted_attn_biases.append(new_attn_bias) + + if len(formatted_attn_biases) == 1: + formatted_attn_biases = [ + formatted_attn_biases[0] for _ in range(self.num_layers) + ] + return formatted_attn_biases + + def forward(self, bias: List[Tensor], feature: List[Tensor]): + """Forward function to recognize the category of masks + Args: + bias (List[Tensor]): Attention bias for transformer layers + feature (List[Tensor]): Output of the image encoder, + including cls_token and img_feature. + """ + cls_token = feature[1].unsqueeze(0) + img_feature = feature[0] + b, c, h, w = img_feature.shape + # construct clip shadow features + x = torch.cat( + [cls_token, + img_feature.reshape(b, c, -1).permute(2, 0, 1)]) + + # construct sos token + if self.sos_token_format == 'cls_token': + sos_token = cls_token.repeat(self.sos_token_num, 1, 1) + elif self.sos_token_format == 'learnable_token': + sos_token = self.sos_token.expand(-1, b, -1) + elif self.sos_token_format == 'pos_embedding': + sos_token = self.sos_token.expand(-1, b, -1) + cls_token + + # construct attn bias + attn_biases = self._build_attn_biases(bias, target_shape=(h, w)) + + if self.cross_attn: + for i, block in enumerate(self.layers): + if self.cross_attn: + sos_token = cross_attn_layer( + block, + sos_token, + x[1:, ], + attn_biases[i], + ) + if i < len(self.layers) - 1: + x = block(x) + else: + x = torch.cat([sos_token, x], dim=0) + for i, block in enumerate(self.layers): + x = block(x, attn_masks=[attn_biases[i]]) + sos_token = x[:self.sos_token_num] + + sos_token = sos_token.permute(1, 0, 2) # LND -> NLD + sos_token = self.ln_post(sos_token) + sos_token = self.proj(sos_token) + if self.final_norm: + sos_token = F.normalize(sos_token, dim=-1) + return sos_token + + +@MODELS.register_module() +class SideAdapterCLIPHead(BaseDecodeHead): + """Side Adapter Network (SAN) for open-vocabulary semantic segmentation + with pre-trained vision-language model. + + This decode head is the implementation of `Side Adapter Network + for Open-Vocabulary Semantic Segmentation` + . + Modified from https://github.com/MendelXu/SAN/blob/main/san/model/side_adapter/side_adapter.py # noqa:E501 + Copyright (c) 2023 MendelXu. + Licensed under the MIT License + + Args: + num_classes (int): the number of classes. + san_cfg (ConfigType): Configs for SideAdapterNetwork module + maskgen_cfg (ConfigType): Configs for RecWithAttnbias module + """ + + def __init__(self, num_classes: int, san_cfg: ConfigType, + maskgen_cfg: ConfigType, deep_supervision_idxs: List[int], + train_cfg: ConfigType, **kwargs): + super().__init__( + in_channels=san_cfg.in_channels, + channels=san_cfg.embed_dims, + num_classes=num_classes, + **kwargs) + assert san_cfg.num_queries == maskgen_cfg.sos_token_num, \ + 'num_queries in san_cfg should be equal to sos_token_num ' \ + 'in maskgen_cfg' + del self.conv_seg + self.side_adapter_network = SideAdapterNetwork(**san_cfg) + self.rec_with_attnbias = RecWithAttnbias(**maskgen_cfg) + self.deep_supervision_idxs = deep_supervision_idxs + self.train_cfg = train_cfg + if train_cfg: + self.match_masks = MatchMasks( + num_points=train_cfg.num_points, + num_queries=san_cfg.num_queries, + num_classes=num_classes, + assigner=train_cfg.assigner) + + def init_weights(self): + + rec_state_dict = None + if isinstance(self.init_cfg, dict) and \ + self.init_cfg.get('type') == 'Pretrained_Part': + checkpoint = CheckpointLoader.load_checkpoint( + self.init_cfg['checkpoint'], logger=None, map_location='cpu') + + rec_state_dict = checkpoint.copy() + para_prefix = 'decode_head.rec_with_attnbias' + prefix_len = len(para_prefix) + 1 + for k, v in checkpoint.items(): + rec_state_dict.pop(k) + if para_prefix in k: + rec_state_dict[k[prefix_len:]] = v + + self.side_adapter_network.init_weights() + self.rec_with_attnbias.init_weights(rec_state_dict) + + def forward(self, inputs: Tuple[Tensor], + deep_supervision_idxs) -> Tuple[List]: + """Forward function. + + Args: + inputs (Tuple[Tensor]): A triplet including images, + list of multi-level visual features from image encoder and + class embeddings from text_encoder. + + Returns: + mask_props (List[Tensor]): Mask proposals predicted by SAN. + mask_logits (List[Tensor]): Class logits of mask proposals. + """ + imgs, clip_feature, class_embeds = inputs + # predict mask proposals and attention bias + mask_props, attn_biases = self.side_adapter_network( + imgs, clip_feature, deep_supervision_idxs) + + # mask recognition with attention bias + mask_embeds = [ + self.rec_with_attnbias(att_bias, clip_feature[-1]) + for att_bias in attn_biases + ] + # Obtain class prediction of masks by comparing the similarity + # between the image token and the text embedding of class names. + mask_logits = [ + torch.einsum('bqc,nc->bqn', mask_embed, class_embeds) + for mask_embed in mask_embeds + ] + return mask_props, mask_logits + + def predict(self, inputs: Tuple[Tensor], batch_img_metas: List[dict], + test_cfg: ConfigType) -> Tensor: + """Forward function for prediction. + + Args: + inputs (Tuple[Tensor]): Images, visual features from image encoder + and class embedding from text encoder. + batch_img_metas (dict): List Image info where each dict may also + contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', and 'pad_shape'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Outputs segmentation logits map. + """ + mask_props, mask_logits = self.forward(inputs, []) + + return self.predict_by_feat([mask_props[-1], mask_logits[-1]], + batch_img_metas) + + def predict_by_feat(self, seg_logits: List[Tensor], + batch_img_metas: List[dict]) -> Tensor: + """1. Transform a batch of mask proposals to the input shape. + 2. Generate segmentation map with mask proposals and class logits. + """ + mask_pred = seg_logits[0] + cls_score = seg_logits[1] + if isinstance(batch_img_metas[0]['img_shape'], torch.Size): + # slide inference + size = batch_img_metas[0]['img_shape'] + elif 'pad_shape' in batch_img_metas[0]: + size = batch_img_metas[0]['pad_shape'][:2] + else: + size = batch_img_metas[0]['img_shape'] + # upsample mask + mask_pred = F.interpolate( + mask_pred, size=size, mode='bilinear', align_corners=False) + + mask_cls = F.softmax(cls_score, dim=-1)[..., :-1] + mask_pred = mask_pred.sigmoid() + seg_logits = torch.einsum('bqc,bqhw->bchw', mask_cls, mask_pred) + return seg_logits + + def loss(self, x: Tuple[Tensor], batch_data_samples: SampleList, + train_cfg: ConfigType) -> dict: + """Perform forward propagation and loss calculation of the decoder head + on the features of the upstream network. + + Args: + x (tuple[Tensor]): Multi-level features from the upstream + network, each is a 4D-tensor. + batch_data_samples (List[:obj:`SegDataSample`]): The Data + Samples. It usually includes information such as + `gt_sem_seg`. + train_cfg (ConfigType): Training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components. + """ + # batch SegDataSample to InstanceDataSample + batch_gt_instances = seg_data_to_instance_data(self.ignore_index, + batch_data_samples) + + # forward + all_mask_props, all_mask_logits = self.forward( + x, self.deep_supervision_idxs) + + # loss + losses = self.loss_by_feat(all_mask_logits, all_mask_props, + batch_gt_instances) + + return losses + + def loss_by_feat( + self, all_cls_scores: Tensor, all_mask_preds: Tensor, + batch_gt_instances: List[InstanceData]) -> Dict[str, Tensor]: + """Loss function. + + Args: + all_cls_scores (Tensor): Classification scores for all decoder + layers with shape (num_decoder, batch_size, num_queries, + cls_out_channels). Note `cls_out_channels` should includes + background. + all_mask_preds (Tensor): Mask scores for all decoder layers with + shape (num_decoder, batch_size, num_queries, h, w). + batch_gt_instances (list[obj:`InstanceData`]): each contains + ``labels`` and ``masks``. + + Returns: + dict[str, Tensor]: A dictionary of loss components. + """ + num_dec_layers = len(all_cls_scores) + batch_gt_instances_list = [ + batch_gt_instances for _ in range(num_dec_layers) + ] + + losses = [] + for i in range(num_dec_layers): + cls_scores = all_cls_scores[i] + mask_preds = all_mask_preds[i] + # matching N mask predictions to K category labels + (labels, mask_targets, mask_weights, + avg_factor) = self.match_masks.get_targets( + cls_scores, mask_preds, batch_gt_instances_list[i]) + cls_scores = cls_scores.flatten(0, 1) + labels = labels.flatten(0, 1) + num_total_masks = cls_scores.new_tensor([avg_factor], + dtype=torch.float) + all_reduce(num_total_masks, op='mean') + num_total_masks = max(num_total_masks, 1) + + # extract positive ones + # shape (batch_size, num_queries, h, w) -> (num_total_gts, h, w) + mask_preds = mask_preds[mask_weights > 0] + + if mask_targets.shape[0] != 0: + with torch.no_grad(): + points_coords = get_uncertain_point_coords_with_randomness( + mask_preds.unsqueeze(1), None, + self.train_cfg.num_points, + self.train_cfg.oversample_ratio, + self.train_cfg.importance_sample_ratio) + # shape (num_total_gts, h, w) + # -> (num_total_gts, num_points) + mask_point_targets = point_sample( + mask_targets.unsqueeze(1).float(), + points_coords).squeeze(1) + # shape (num_queries, h, w) -> (num_queries, num_points) + mask_point_preds = point_sample( + mask_preds.unsqueeze(1), points_coords).squeeze(1) + + if not isinstance(self.loss_decode, nn.ModuleList): + losses_decode = [self.loss_decode] + else: + losses_decode = self.loss_decode + loss = dict() + for loss_decode in losses_decode: + if 'loss_cls' in loss_decode.loss_name: + if loss_decode.loss_name == 'loss_cls_ce': + loss[loss_decode.loss_name] = loss_decode( + cls_scores, labels) + else: + assert False, "Only support 'CrossEntropyLoss' in" \ + ' classification loss' + + elif 'loss_mask' in loss_decode.loss_name: + if mask_targets.shape[0] == 0: + loss[loss_decode.loss_name] = mask_preds.sum() + elif loss_decode.loss_name == 'loss_mask_ce': + loss[loss_decode.loss_name] = loss_decode( + mask_point_preds, + mask_point_targets, + avg_factor=num_total_masks * + self.train_cfg.num_points) + elif loss_decode.loss_name == 'loss_mask_dice': + loss[loss_decode.loss_name] = loss_decode( + mask_point_preds, + mask_point_targets, + avg_factor=num_total_masks) + else: + assert False, "Only support 'CrossEntropyLoss' and" \ + " 'DiceLoss' in mask loss" + else: + assert False, "Only support for 'loss_cls' and 'loss_mask'" + + losses.append(loss) + + loss_dict = dict() + # loss from the last decoder layer + loss_dict.update(losses[-1]) + # loss from other decoder layers + for i, loss in enumerate(losses[:-1]): + for k, v in loss.items(): + loss_dict[f'd{self.deep_supervision_idxs[i]}.{k}'] = v + return loss_dict diff --git a/mmseg/models/decode_heads/segformer_head.py b/mmseg/models/decode_heads/segformer_head.py new file mode 100644 index 0000000000000000000000000000000000000000..f9eb0b320b4e7b892e0540cea5ba5ea7054f8008 --- /dev/null +++ b/mmseg/models/decode_heads/segformer_head.py @@ -0,0 +1,66 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.models.decode_heads.decode_head import BaseDecodeHead +from mmseg.registry import MODELS +from ..utils import resize + + +@MODELS.register_module() +class SegformerHead(BaseDecodeHead): + """The all mlp Head of segformer. + + This head is the implementation of + `Segformer ` _. + + Args: + interpolate_mode: The interpolate mode of MLP head upsample operation. + Default: 'bilinear'. + """ + + def __init__(self, interpolate_mode='bilinear', **kwargs): + super().__init__(input_transform='multiple_select', **kwargs) + + self.interpolate_mode = interpolate_mode + num_inputs = len(self.in_channels) + + assert num_inputs == len(self.in_index) + + self.convs = nn.ModuleList() + for i in range(num_inputs): + self.convs.append( + ConvModule( + in_channels=self.in_channels[i], + out_channels=self.channels, + kernel_size=1, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + self.fusion_conv = ConvModule( + in_channels=self.channels * num_inputs, + out_channels=self.channels, + kernel_size=1, + norm_cfg=self.norm_cfg) + + def forward(self, inputs): + # Receive 4 stage backbone feature map: 1/4, 1/8, 1/16, 1/32 + inputs = self._transform_inputs(inputs) + outs = [] + for idx in range(len(inputs)): + x = inputs[idx] + conv = self.convs[idx] + outs.append( + resize( + input=conv(x), + size=inputs[0].shape[2:], + mode=self.interpolate_mode, + align_corners=self.align_corners)) + + out = self.fusion_conv(torch.cat(outs, dim=1)) + + out = self.cls_seg(out) + + return out diff --git a/mmseg/models/decode_heads/segmenter_mask_head.py b/mmseg/models/decode_heads/segmenter_mask_head.py new file mode 100644 index 0000000000000000000000000000000000000000..85d27735ba8015772324177716b5e8d5f357295c --- /dev/null +++ b/mmseg/models/decode_heads/segmenter_mask_head.py @@ -0,0 +1,132 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import build_norm_layer +from mmengine.model import ModuleList +from mmengine.model.weight_init import (constant_init, trunc_normal_, + trunc_normal_init) + +from mmseg.models.backbones.vit import TransformerEncoderLayer +from mmseg.registry import MODELS +from .decode_head import BaseDecodeHead + + +@MODELS.register_module() +class SegmenterMaskTransformerHead(BaseDecodeHead): + """Segmenter: Transformer for Semantic Segmentation. + + This head is the implementation of + `Segmenter: `_. + + Args: + backbone_cfg:(dict): Config of backbone of + Context Path. + in_channels (int): The number of channels of input image. + num_layers (int): The depth of transformer. + num_heads (int): The number of attention heads. + embed_dims (int): The number of embedding dimension. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + drop_path_rate (float): stochastic depth rate. Default 0.1. + drop_rate (float): Probability of an element to be zeroed. + Default 0.0 + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + qkv_bias (bool): Enable bias for qkv if True. Default: True. + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + init_std (float): The value of std in weight initialization. + Default: 0.02. + """ + + def __init__( + self, + in_channels, + num_layers, + num_heads, + embed_dims, + mlp_ratio=4, + drop_path_rate=0.1, + drop_rate=0.0, + attn_drop_rate=0.0, + num_fcs=2, + qkv_bias=True, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + init_std=0.02, + **kwargs, + ): + super().__init__(in_channels=in_channels, **kwargs) + + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, num_layers)] + self.layers = ModuleList() + for i in range(num_layers): + self.layers.append( + TransformerEncoderLayer( + embed_dims=embed_dims, + num_heads=num_heads, + feedforward_channels=mlp_ratio * embed_dims, + attn_drop_rate=attn_drop_rate, + drop_rate=drop_rate, + drop_path_rate=dpr[i], + num_fcs=num_fcs, + qkv_bias=qkv_bias, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + batch_first=True, + )) + + self.dec_proj = nn.Linear(in_channels, embed_dims) + + self.cls_emb = nn.Parameter( + torch.randn(1, self.num_classes, embed_dims)) + self.patch_proj = nn.Linear(embed_dims, embed_dims, bias=False) + self.classes_proj = nn.Linear(embed_dims, embed_dims, bias=False) + + self.decoder_norm = build_norm_layer( + norm_cfg, embed_dims, postfix=1)[1] + self.mask_norm = build_norm_layer( + norm_cfg, self.num_classes, postfix=2)[1] + + self.init_std = init_std + + delattr(self, 'conv_seg') + + def init_weights(self): + trunc_normal_(self.cls_emb, std=self.init_std) + trunc_normal_init(self.patch_proj, std=self.init_std) + trunc_normal_init(self.classes_proj, std=self.init_std) + for n, m in self.named_modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=self.init_std, bias=0) + elif isinstance(m, nn.LayerNorm): + constant_init(m, val=1.0, bias=0.0) + + def forward(self, inputs): + x = self._transform_inputs(inputs) + b, c, h, w = x.shape + x = x.permute(0, 2, 3, 1).contiguous().view(b, -1, c) + + x = self.dec_proj(x) + cls_emb = self.cls_emb.expand(x.size(0), -1, -1) + x = torch.cat((x, cls_emb), 1) + for layer in self.layers: + x = layer(x) + x = self.decoder_norm(x) + + patches = self.patch_proj(x[:, :-self.num_classes]) + cls_seg_feat = self.classes_proj(x[:, -self.num_classes:]) + + patches = F.normalize(patches, dim=2, p=2) + cls_seg_feat = F.normalize(cls_seg_feat, dim=2, p=2) + + masks = patches @ cls_seg_feat.transpose(1, 2) + masks = self.mask_norm(masks) + masks = masks.permute(0, 2, 1).contiguous().view(b, -1, h, w) + + return masks diff --git a/mmseg/models/decode_heads/sep_aspp_head.py b/mmseg/models/decode_heads/sep_aspp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..9dba68c9ecc6909e47da4f2da6169d529910355d --- /dev/null +++ b/mmseg/models/decode_heads/sep_aspp_head.py @@ -0,0 +1,102 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule + +from mmseg.registry import MODELS +from ..utils import resize +from .aspp_head import ASPPHead, ASPPModule + + +class DepthwiseSeparableASPPModule(ASPPModule): + """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable + conv.""" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + for i, dilation in enumerate(self.dilations): + if dilation > 1: + self[i] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + 3, + dilation=dilation, + padding=dilation, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + +@MODELS.register_module() +class DepthwiseSeparableASPPHead(ASPPHead): + """Encoder-Decoder with Atrous Separable Convolution for Semantic Image + Segmentation. + + This head is the implementation of `DeepLabV3+ + `_. + + Args: + c1_in_channels (int): The input channels of c1 decoder. If is 0, + the no decoder will be used. + c1_channels (int): The intermediate channels of c1 decoder. + """ + + def __init__(self, c1_in_channels, c1_channels, **kwargs): + super().__init__(**kwargs) + assert c1_in_channels >= 0 + self.aspp_modules = DepthwiseSeparableASPPModule( + dilations=self.dilations, + in_channels=self.in_channels, + channels=self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if c1_in_channels > 0: + self.c1_bottleneck = ConvModule( + c1_in_channels, + c1_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + else: + self.c1_bottleneck = None + self.sep_bottleneck = nn.Sequential( + DepthwiseSeparableConvModule( + self.channels + c1_channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + DepthwiseSeparableConvModule( + self.channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + if self.c1_bottleneck is not None: + c1_output = self.c1_bottleneck(inputs[0]) + output = resize( + input=output, + size=c1_output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + output = torch.cat([output, c1_output], dim=1) + output = self.sep_bottleneck(output) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/sep_fcn_head.py b/mmseg/models/decode_heads/sep_fcn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..3b15983bceaeff48534bbceedfdf1c434a8d1d1f --- /dev/null +++ b/mmseg/models/decode_heads/sep_fcn_head.py @@ -0,0 +1,60 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.cnn import DepthwiseSeparableConvModule + +from mmseg.registry import MODELS +from .fcn_head import FCNHead + + +@MODELS.register_module() +class DepthwiseSeparableFCNHead(FCNHead): + """Depthwise-Separable Fully Convolutional Network for Semantic + Segmentation. + + This head is implemented according to `Fast-SCNN: Fast Semantic + Segmentation Network `_. + + Args: + in_channels(int): Number of output channels of FFM. + channels(int): Number of middle-stage channels in the decode head. + concat_input(bool): Whether to concatenate original decode input into + the result of several consecutive convolution layers. + Default: True. + num_classes(int): Used to determine the dimension of + final prediction tensor. + in_index(int): Correspond with 'out_indices' in FastSCNN backbone. + norm_cfg (dict | None): Config of norm layers. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + loss_decode(dict): Config of loss type and some + relevant additional options. + dw_act_cfg (dict):Activation config of depthwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: None. + """ + + def __init__(self, dw_act_cfg=None, **kwargs): + super().__init__(**kwargs) + self.convs[0] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg, + dw_act_cfg=dw_act_cfg) + + for i in range(1, self.num_convs): + self.convs[i] = DepthwiseSeparableConvModule( + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg, + dw_act_cfg=dw_act_cfg) + + if self.concat_input: + self.conv_cat = DepthwiseSeparableConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg, + dw_act_cfg=dw_act_cfg) diff --git a/mmseg/models/decode_heads/setr_mla_head.py b/mmseg/models/decode_heads/setr_mla_head.py new file mode 100644 index 0000000000000000000000000000000000000000..1975991a60cc720650b880060efe10753f213131 --- /dev/null +++ b/mmseg/models/decode_heads/setr_mla_head.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import Upsample +from .decode_head import BaseDecodeHead + + +@MODELS.register_module() +class SETRMLAHead(BaseDecodeHead): + """Multi level feature aggretation head of SETR. + + MLA head of `SETR `_. + + Args: + mlahead_channels (int): Channels of conv-conv-4x of multi-level feature + aggregation. Default: 128. + up_scale (int): The scale factor of interpolate. Default:4. + """ + + def __init__(self, mla_channels=128, up_scale=4, **kwargs): + super().__init__(input_transform='multiple_select', **kwargs) + self.mla_channels = mla_channels + + num_inputs = len(self.in_channels) + + # Refer to self.cls_seg settings of BaseDecodeHead + assert self.channels == num_inputs * mla_channels + + self.up_convs = nn.ModuleList() + for i in range(num_inputs): + self.up_convs.append( + nn.Sequential( + ConvModule( + in_channels=self.in_channels[i], + out_channels=mla_channels, + kernel_size=3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + in_channels=mla_channels, + out_channels=mla_channels, + kernel_size=3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + Upsample( + scale_factor=up_scale, + mode='bilinear', + align_corners=self.align_corners))) + + def forward(self, inputs): + inputs = self._transform_inputs(inputs) + outs = [] + for x, up_conv in zip(inputs, self.up_convs): + outs.append(up_conv(x)) + out = torch.cat(outs, dim=1) + out = self.cls_seg(out) + return out diff --git a/mmseg/models/decode_heads/setr_up_head.py b/mmseg/models/decode_heads/setr_up_head.py new file mode 100644 index 0000000000000000000000000000000000000000..9c796d8161088c2d7effe17f5ba71e43ff62e50c --- /dev/null +++ b/mmseg/models/decode_heads/setr_up_head.py @@ -0,0 +1,81 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import ConvModule, build_norm_layer + +from mmseg.registry import MODELS +from ..utils import Upsample +from .decode_head import BaseDecodeHead + + +@MODELS.register_module() +class SETRUPHead(BaseDecodeHead): + """Naive upsampling head and Progressive upsampling head of SETR. + + Naive or PUP head of `SETR `_. + + Args: + norm_layer (dict): Config dict for input normalization. + Default: norm_layer=dict(type='LN', eps=1e-6, requires_grad=True). + num_convs (int): Number of decoder convolutions. Default: 1. + up_scale (int): The scale factor of interpolate. Default:4. + kernel_size (int): The kernel size of convolution when decoding + feature information from backbone. Default: 3. + init_cfg (dict | list[dict] | None): Initialization config dict. + Default: dict( + type='Constant', val=1.0, bias=0, layer='LayerNorm'). + """ + + def __init__(self, + norm_layer=dict(type='LN', eps=1e-6, requires_grad=True), + num_convs=1, + up_scale=4, + kernel_size=3, + init_cfg=[ + dict(type='Constant', val=1.0, bias=0, layer='LayerNorm'), + dict( + type='Normal', + std=0.01, + override=dict(name='conv_seg')) + ], + **kwargs): + + assert kernel_size in [1, 3], 'kernel_size must be 1 or 3.' + + super().__init__(init_cfg=init_cfg, **kwargs) + + assert isinstance(self.in_channels, int) + + _, self.norm = build_norm_layer(norm_layer, self.in_channels) + + self.up_convs = nn.ModuleList() + in_channels = self.in_channels + out_channels = self.channels + for _ in range(num_convs): + self.up_convs.append( + nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=1, + padding=int(kernel_size - 1) // 2, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + Upsample( + scale_factor=up_scale, + mode='bilinear', + align_corners=self.align_corners))) + in_channels = out_channels + + def forward(self, x): + x = self._transform_inputs(x) + + n, c, h, w = x.shape + x = x.reshape(n, c, h * w).transpose(2, 1).contiguous() + x = self.norm(x) + x = x.transpose(1, 2).reshape(n, c, h, w).contiguous() + + for up_conv in self.up_convs: + x = up_conv(x) + out = self.cls_seg(x) + return out diff --git a/mmseg/models/decode_heads/stdc_head.py b/mmseg/models/decode_heads/stdc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..1c1c21e3083fcb5098d2458e44538c0cf5b8f0e4 --- /dev/null +++ b/mmseg/models/decode_heads/stdc_head.py @@ -0,0 +1,97 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn.functional as F +from mmengine.structures import PixelData +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.structures import SegDataSample +from mmseg.utils import SampleList +from .fcn_head import FCNHead + + +@MODELS.register_module() +class STDCHead(FCNHead): + """This head is the implementation of `Rethinking BiSeNet For Real-time + Semantic Segmentation `_. + + Args: + boundary_threshold (float): The threshold of calculating boundary. + Default: 0.1. + """ + + def __init__(self, boundary_threshold=0.1, **kwargs): + super().__init__(**kwargs) + self.boundary_threshold = boundary_threshold + # Using register buffer to make laplacian kernel on the same + # device of `seg_label`. + self.register_buffer( + 'laplacian_kernel', + torch.tensor([-1, -1, -1, -1, 8, -1, -1, -1, -1], + dtype=torch.float32, + requires_grad=False).reshape((1, 1, 3, 3))) + self.fusion_kernel = torch.nn.Parameter( + torch.tensor([[6. / 10], [3. / 10], [1. / 10]], + dtype=torch.float32).reshape(1, 3, 1, 1), + requires_grad=False) + + def loss_by_feat(self, seg_logits: Tensor, + batch_data_samples: SampleList) -> dict: + """Compute Detail Aggregation Loss.""" + # Note: The paper claims `fusion_kernel` is a trainable 1x1 conv + # parameters. However, it is a constant in original repo and other + # codebase because it would not be added into computation graph + # after threshold operation. + seg_label = self._stack_batch_gt(batch_data_samples).to( + self.laplacian_kernel) + boundary_targets = F.conv2d( + seg_label, self.laplacian_kernel, padding=1) + boundary_targets = boundary_targets.clamp(min=0) + boundary_targets[boundary_targets > self.boundary_threshold] = 1 + boundary_targets[boundary_targets <= self.boundary_threshold] = 0 + + boundary_targets_x2 = F.conv2d( + seg_label, self.laplacian_kernel, stride=2, padding=1) + boundary_targets_x2 = boundary_targets_x2.clamp(min=0) + + boundary_targets_x4 = F.conv2d( + seg_label, self.laplacian_kernel, stride=4, padding=1) + boundary_targets_x4 = boundary_targets_x4.clamp(min=0) + + boundary_targets_x4_up = F.interpolate( + boundary_targets_x4, boundary_targets.shape[2:], mode='nearest') + boundary_targets_x2_up = F.interpolate( + boundary_targets_x2, boundary_targets.shape[2:], mode='nearest') + + boundary_targets_x2_up[ + boundary_targets_x2_up > self.boundary_threshold] = 1 + boundary_targets_x2_up[ + boundary_targets_x2_up <= self.boundary_threshold] = 0 + + boundary_targets_x4_up[ + boundary_targets_x4_up > self.boundary_threshold] = 1 + boundary_targets_x4_up[ + boundary_targets_x4_up <= self.boundary_threshold] = 0 + + boundary_targets_pyramids = torch.stack( + (boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up), + dim=1) + + boundary_targets_pyramids = boundary_targets_pyramids.squeeze(2) + boudary_targets_pyramid = F.conv2d(boundary_targets_pyramids, + self.fusion_kernel) + + boudary_targets_pyramid[ + boudary_targets_pyramid > self.boundary_threshold] = 1 + boudary_targets_pyramid[ + boudary_targets_pyramid <= self.boundary_threshold] = 0 + + seg_labels = boudary_targets_pyramid.long() + batch_sample_list = [] + for label in seg_labels: + seg_data_sample = SegDataSample() + seg_data_sample.gt_sem_seg = PixelData(data=label) + batch_sample_list.append(seg_data_sample) + + loss = super().loss_by_feat(seg_logits, batch_sample_list) + return loss diff --git a/mmseg/models/decode_heads/uper_head.py b/mmseg/models/decode_heads/uper_head.py new file mode 100644 index 0000000000000000000000000000000000000000..b1ccc3173c0f1193e89ad48861aa7b5ee3b329cc --- /dev/null +++ b/mmseg/models/decode_heads/uper_head.py @@ -0,0 +1,139 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import resize +from .decode_head import BaseDecodeHead +from .psp_head import PPM + + +@MODELS.register_module() +class UPerHead(BaseDecodeHead): + """Unified Perceptual Parsing for Scene Understanding. + + This head is the implementation of `UPerNet + `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module applied on the last feature. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super().__init__(input_transform='multiple_select', **kwargs) + # PSP Module + self.psp_modules = PPM( + pool_scales, + self.in_channels[-1], + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels[-1] + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # FPN Module + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the top layer + l_conv = ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + fpn_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + self.fpn_bottleneck = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def psp_forward(self, inputs): + """Forward function of PSP module.""" + x = inputs[-1] + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + + return output + + def _forward_feature(self, inputs): + """Forward function for feature maps before classifying each pixel with + ``self.cls_seg`` fc. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + feats (Tensor): A tensor of shape (batch_size, self.channels, + H, W) which is feature map for last layer of decoder head. + """ + inputs = self._transform_inputs(inputs) + + # build laterals + laterals = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + laterals.append(self.psp_forward(inputs)) + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] = laterals[i - 1] + resize( + laterals[i], + size=prev_shape, + mode='bilinear', + align_corners=self.align_corners) + + # build outputs + fpn_outs = [ + self.fpn_convs[i](laterals[i]) + for i in range(used_backbone_levels - 1) + ] + # append psp feature + fpn_outs.append(laterals[-1]) + + for i in range(used_backbone_levels - 1, 0, -1): + fpn_outs[i] = resize( + fpn_outs[i], + size=fpn_outs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) + fpn_outs = torch.cat(fpn_outs, dim=1) + feats = self.fpn_bottleneck(fpn_outs) + return feats + + def forward(self, inputs): + """Forward function.""" + output = self._forward_feature(inputs) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/vpd_depth_head.py b/mmseg/models/decode_heads/vpd_depth_head.py new file mode 100644 index 0000000000000000000000000000000000000000..0c54c2da1b1e62b213f794a7d4e49cd3d753ca36 --- /dev/null +++ b/mmseg/models/decode_heads/vpd_depth_head.py @@ -0,0 +1,254 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List, Optional, Sequence, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer +from mmengine.model import BaseModule +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.utils import SampleList +from ..builder import build_loss +from ..utils import resize +from .decode_head import BaseDecodeHead + + +class VPDDepthDecoder(BaseModule): + """VPD Depth Decoder class. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + num_deconv_layers (int): Number of deconvolution layers. + num_deconv_filters (List[int]): List of output channels for + deconvolution layers. + init_cfg (Optional[Union[Dict, List[Dict]]], optional): Configuration + for weight initialization. Defaults to Normal for Conv2d and + ConvTranspose2d layers. + """ + + def __init__(self, + in_channels: int, + out_channels: int, + num_deconv_layers: int, + num_deconv_filters: List[int], + init_cfg: Optional[Union[Dict, List[Dict]]] = dict( + type='Normal', + std=0.001, + layer=['Conv2d', 'ConvTranspose2d'])): + super().__init__(init_cfg=init_cfg) + self.in_channels = in_channels + + self.deconv_layers = self._make_deconv_layer( + num_deconv_layers, + num_deconv_filters, + ) + + conv_layers = [] + conv_layers.append( + build_conv_layer( + dict(type='Conv2d'), + in_channels=num_deconv_filters[-1], + out_channels=out_channels, + kernel_size=3, + stride=1, + padding=1)) + conv_layers.append(build_norm_layer(dict(type='BN'), out_channels)[1]) + conv_layers.append(nn.ReLU(inplace=True)) + self.conv_layers = nn.Sequential(*conv_layers) + + self.up_sample = nn.Upsample( + scale_factor=2, mode='bilinear', align_corners=False) + + def forward(self, x): + """Forward pass through the decoder network.""" + out = self.deconv_layers(x) + out = self.conv_layers(out) + + out = self.up_sample(out) + out = self.up_sample(out) + + return out + + def _make_deconv_layer(self, num_layers, num_deconv_filters): + """Make deconv layers.""" + + layers = [] + in_channels = self.in_channels + for i in range(num_layers): + + num_channels = num_deconv_filters[i] + layers.append( + build_upsample_layer( + dict(type='deconv'), + in_channels=in_channels, + out_channels=num_channels, + kernel_size=2, + stride=2, + padding=0, + output_padding=0, + bias=False)) + layers.append(nn.BatchNorm2d(num_channels)) + layers.append(nn.ReLU(inplace=True)) + in_channels = num_channels + + return nn.Sequential(*layers) + + +@MODELS.register_module() +class VPDDepthHead(BaseDecodeHead): + """Depth Prediction Head for VPD. + + .. _`VPD`: https://arxiv.org/abs/2303.02153 + + Args: + max_depth (float): Maximum depth value. Defaults to 10.0. + in_channels (Sequence[int]): Number of input channels for each + convolutional layer. + embed_dim (int): Dimension of embedding. Defaults to 192. + feature_dim (int): Dimension of aggregated feature. Defaults to 1536. + num_deconv_layers (int): Number of deconvolution layers in the + decoder. Defaults to 3. + num_deconv_filters (Sequence[int]): Number of filters for each deconv + layer. Defaults to (32, 32, 32). + fmap_border (Union[int, Sequence[int]]): Feature map border for + cropping. Defaults to 0. + align_corners (bool): Flag for align_corners in interpolation. + Defaults to False. + loss_decode (dict): Configurations for the loss function. Defaults to + dict(type='SiLogLoss'). + init_cfg (dict): Initialization configurations. Defaults to + dict(type='TruncNormal', std=0.02, layer=['Conv2d', 'Linear']). + """ + + num_classes = 1 + out_channels = 1 + input_transform = None + + def __init__( + self, + max_depth: float = 10.0, + in_channels: Sequence[int] = [320, 640, 1280, 1280], + embed_dim: int = 192, + feature_dim: int = 1536, + num_deconv_layers: int = 3, + num_deconv_filters: Sequence[int] = (32, 32, 32), + fmap_border: Union[int, Sequence[int]] = 0, + align_corners: bool = False, + loss_decode: dict = dict(type='SiLogLoss'), + init_cfg=dict( + type='TruncNormal', std=0.02, layer=['Conv2d', 'Linear']), + ): + + super(BaseDecodeHead, self).__init__(init_cfg=init_cfg) + + # initialize parameters + self.in_channels = in_channels + self.max_depth = max_depth + self.align_corners = align_corners + + # feature map border + if isinstance(fmap_border, int): + fmap_border = (fmap_border, fmap_border) + self.fmap_border = fmap_border + + # define network layers + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels[0], in_channels[0], 3, stride=2, padding=1), + nn.GroupNorm(16, in_channels[0]), + nn.ReLU(), + nn.Conv2d(in_channels[0], in_channels[0], 3, stride=2, padding=1), + ) + self.conv2 = nn.Conv2d( + in_channels[1], in_channels[1], 3, stride=2, padding=1) + + self.conv_aggregation = nn.Sequential( + nn.Conv2d(sum(in_channels), feature_dim, 1), + nn.GroupNorm(16, feature_dim), + nn.ReLU(), + ) + + self.decoder = VPDDepthDecoder( + in_channels=embed_dim * 8, + out_channels=embed_dim, + num_deconv_layers=num_deconv_layers, + num_deconv_filters=num_deconv_filters) + + self.depth_pred_layer = nn.Sequential( + nn.Conv2d( + embed_dim, embed_dim, kernel_size=3, stride=1, padding=1), + nn.ReLU(inplace=False), + nn.Conv2d(embed_dim, 1, kernel_size=3, stride=1, padding=1)) + + # build loss + if isinstance(loss_decode, dict): + self.loss_decode = build_loss(loss_decode) + elif isinstance(loss_decode, (list, tuple)): + self.loss_decode = nn.ModuleList() + for loss in loss_decode: + self.loss_decode.append(build_loss(loss)) + else: + raise TypeError(f'loss_decode must be a dict or sequence of dict,\ + but got {type(loss_decode)}') + + def _stack_batch_gt(self, batch_data_samples: SampleList) -> Tensor: + gt_depth_maps = [ + data_sample.gt_depth_map.data for data_sample in batch_data_samples + ] + return torch.stack(gt_depth_maps, dim=0) + + def forward(self, x): + x = [ + x[0], x[1], + torch.cat([x[2], F.interpolate(x[3], scale_factor=2)], dim=1) + ] + x = torch.cat([self.conv1(x[0]), self.conv2(x[1]), x[2]], dim=1) + x = self.conv_aggregation(x) + + x = x[:, :, :x.size(2) - self.fmap_border[0], :x.size(3) - + self.fmap_border[1]].contiguous() + x = self.decoder(x) + out = self.depth_pred_layer(x) + + depth = torch.sigmoid(out) * self.max_depth + + return depth + + def loss_by_feat(self, pred_depth_map: Tensor, + batch_data_samples: SampleList) -> dict: + """Compute depth estimation loss. + + Args: + pred_depth_map (Tensor): The output from decode head forward + function. + batch_data_samples (List[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `metainfo` and `gt_dpeth_map`. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + + gt_depth_map = self._stack_batch_gt(batch_data_samples) + loss = dict() + pred_depth_map = resize( + input=pred_depth_map, + size=gt_depth_map.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + + if not isinstance(self.loss_decode, nn.ModuleList): + losses_decode = [self.loss_decode] + else: + losses_decode = self.loss_decode + for loss_decode in losses_decode: + if loss_decode.loss_name not in loss: + loss[loss_decode.loss_name] = loss_decode( + pred_depth_map, gt_depth_map) + else: + loss[loss_decode.loss_name] += loss_decode( + pred_depth_map, gt_depth_map) + + return loss diff --git a/mmseg/models/losses/__init__.py b/mmseg/models/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0467cb3ad89b8c0c57f7f8eb58cbc2e23f50cdb4 --- /dev/null +++ b/mmseg/models/losses/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .accuracy import Accuracy, accuracy +from .boundary_loss import BoundaryLoss +from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, + cross_entropy, mask_cross_entropy) +from .dice_loss import DiceLoss +from .focal_loss import FocalLoss +from .huasdorff_distance_loss import HuasdorffDisstanceLoss +from .lovasz_loss import LovaszLoss +from .ohem_cross_entropy_loss import OhemCrossEntropy +from .silog_loss import SiLogLoss +from .tversky_loss import TverskyLoss +from .utils import reduce_loss, weight_reduce_loss, weighted_loss + +__all__ = [ + 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', + 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', + 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss', + 'FocalLoss', 'TverskyLoss', 'OhemCrossEntropy', 'BoundaryLoss', + 'HuasdorffDisstanceLoss', 'SiLogLoss' +] diff --git a/mmseg/models/losses/__pycache__/__init__.cpython-39.pyc b/mmseg/models/losses/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21e51e0321b875905d26e14feda6c60653aaec8a Binary files /dev/null and b/mmseg/models/losses/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/accuracy.cpython-39.pyc b/mmseg/models/losses/__pycache__/accuracy.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b8511f21a1df9c8654b2834ab540f350d8eb45f Binary files /dev/null and b/mmseg/models/losses/__pycache__/accuracy.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/boundary_loss.cpython-39.pyc b/mmseg/models/losses/__pycache__/boundary_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba9fe2fb0f940b9914650d07c595662ba00f1bec Binary files /dev/null and b/mmseg/models/losses/__pycache__/boundary_loss.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/cross_entropy_loss.cpython-39.pyc b/mmseg/models/losses/__pycache__/cross_entropy_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9cf7c5f07e2eae8e020974bb1c0f7f40920a8f67 Binary files /dev/null and b/mmseg/models/losses/__pycache__/cross_entropy_loss.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/dice_loss.cpython-39.pyc b/mmseg/models/losses/__pycache__/dice_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92e9c6b402a069c9f7a72c64bb57431d6d2c129a Binary files /dev/null and b/mmseg/models/losses/__pycache__/dice_loss.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/focal_loss.cpython-39.pyc b/mmseg/models/losses/__pycache__/focal_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01893a0ef8efc04756d0a50b2794755e6febbf38 Binary files /dev/null and b/mmseg/models/losses/__pycache__/focal_loss.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/huasdorff_distance_loss.cpython-39.pyc b/mmseg/models/losses/__pycache__/huasdorff_distance_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75eb912451af8bde0d808b3213fbc6151d646338 Binary files /dev/null and b/mmseg/models/losses/__pycache__/huasdorff_distance_loss.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/lovasz_loss.cpython-39.pyc b/mmseg/models/losses/__pycache__/lovasz_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ee9e6303abe27ab377978c20e4b3b558be0822a Binary files /dev/null and b/mmseg/models/losses/__pycache__/lovasz_loss.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/ohem_cross_entropy_loss.cpython-39.pyc b/mmseg/models/losses/__pycache__/ohem_cross_entropy_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d53445fde81f0985b6f7bdbcd6897b63ed2b9cb Binary files /dev/null and b/mmseg/models/losses/__pycache__/ohem_cross_entropy_loss.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/silog_loss.cpython-39.pyc b/mmseg/models/losses/__pycache__/silog_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..67d67aebfae0d89b7936eb53a50da15d9dce8d79 Binary files /dev/null and b/mmseg/models/losses/__pycache__/silog_loss.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/tversky_loss.cpython-39.pyc b/mmseg/models/losses/__pycache__/tversky_loss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc50925b76bc73717447b053f70ae23808a211b6 Binary files /dev/null and b/mmseg/models/losses/__pycache__/tversky_loss.cpython-39.pyc differ diff --git a/mmseg/models/losses/__pycache__/utils.cpython-39.pyc b/mmseg/models/losses/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..818eb0cc8d745eeaab55efa3066697b933509da7 Binary files /dev/null and b/mmseg/models/losses/__pycache__/utils.cpython-39.pyc differ diff --git a/mmseg/models/losses/accuracy.py b/mmseg/models/losses/accuracy.py new file mode 100644 index 0000000000000000000000000000000000000000..1d9e2d7701088adadd5b6bb71c718c986b87a066 --- /dev/null +++ b/mmseg/models/losses/accuracy.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +def accuracy(pred, target, topk=1, thresh=None, ignore_index=None): + """Calculate accuracy according to the prediction and target. + + Args: + pred (torch.Tensor): The model prediction, shape (N, num_class, ...) + target (torch.Tensor): The target of each prediction, shape (N, , ...) + ignore_index (int | None): The label index to be ignored. Default: None + topk (int | tuple[int], optional): If the predictions in ``topk`` + matches the target, the predictions will be regarded as + correct ones. Defaults to 1. + thresh (float, optional): If not None, predictions with scores under + this threshold are considered incorrect. Default to None. + + Returns: + float | tuple[float]: If the input ``topk`` is a single integer, + the function will return a single float as accuracy. If + ``topk`` is a tuple containing multiple integers, the + function will return a tuple containing accuracies of + each ``topk`` number. + """ + assert isinstance(topk, (int, tuple)) + if isinstance(topk, int): + topk = (topk, ) + return_single = True + else: + return_single = False + + maxk = max(topk) + if pred.size(0) == 0: + accu = [pred.new_tensor(0.) for i in range(len(topk))] + return accu[0] if return_single else accu + assert pred.ndim == target.ndim + 1 + assert pred.size(0) == target.size(0) + assert maxk <= pred.size(1), \ + f'maxk {maxk} exceeds pred dimension {pred.size(1)}' + pred_value, pred_label = pred.topk(maxk, dim=1) + # transpose to shape (maxk, N, ...) + pred_label = pred_label.transpose(0, 1) + correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label)) + if thresh is not None: + # Only prediction values larger than thresh are counted as correct + correct = correct & (pred_value > thresh).t() + if ignore_index is not None: + correct = correct[:, target != ignore_index] + res = [] + eps = torch.finfo(torch.float32).eps + for k in topk: + # Avoid causing ZeroDivisionError when all pixels + # of an image are ignored + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + eps + if ignore_index is not None: + total_num = target[target != ignore_index].numel() + eps + else: + total_num = target.numel() + eps + res.append(correct_k.mul_(100.0 / total_num)) + return res[0] if return_single else res + + +class Accuracy(nn.Module): + """Accuracy calculation module.""" + + def __init__(self, topk=(1, ), thresh=None, ignore_index=None): + """Module to calculate the accuracy. + + Args: + topk (tuple, optional): The criterion used to calculate the + accuracy. Defaults to (1,). + thresh (float, optional): If not None, predictions with scores + under this threshold are considered incorrect. Default to None. + """ + super().__init__() + self.topk = topk + self.thresh = thresh + self.ignore_index = ignore_index + + def forward(self, pred, target): + """Forward function to calculate accuracy. + + Args: + pred (torch.Tensor): Prediction of models. + target (torch.Tensor): Target for each prediction. + + Returns: + tuple[float]: The accuracies under different topk criterions. + """ + return accuracy(pred, target, self.topk, self.thresh, + self.ignore_index) diff --git a/mmseg/models/losses/boundary_loss.py b/mmseg/models/losses/boundary_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..e86b850d87e1d26be8cbb700758dae8dead82c58 --- /dev/null +++ b/mmseg/models/losses/boundary_loss.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from mmseg.registry import MODELS + + +@MODELS.register_module() +class BoundaryLoss(nn.Module): + """Boundary loss. + + This function is modified from + `PIDNet `_. # noqa + Licensed under the MIT License. + + + Args: + loss_weight (float): Weight of the loss. Defaults to 1.0. + loss_name (str): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_boundary'. + """ + + def __init__(self, + loss_weight: float = 1.0, + loss_name: str = 'loss_boundary'): + super().__init__() + self.loss_weight = loss_weight + self.loss_name_ = loss_name + + def forward(self, bd_pre: Tensor, bd_gt: Tensor) -> Tensor: + """Forward function. + Args: + bd_pre (Tensor): Predictions of the boundary head. + bd_gt (Tensor): Ground truth of the boundary. + + Returns: + Tensor: Loss tensor. + """ + log_p = bd_pre.permute(0, 2, 3, 1).contiguous().view(1, -1) + target_t = bd_gt.view(1, -1).float() + + pos_index = (target_t == 1) + neg_index = (target_t == 0) + + weight = torch.zeros_like(log_p) + pos_num = pos_index.sum() + neg_num = neg_index.sum() + sum_num = pos_num + neg_num + weight[pos_index] = neg_num * 1.0 / sum_num + weight[neg_index] = pos_num * 1.0 / sum_num + + loss = F.binary_cross_entropy_with_logits( + log_p, target_t, weight, reduction='mean') + + return self.loss_weight * loss + + @property + def loss_name(self): + return self.loss_name_ diff --git a/mmseg/models/losses/cross_entropy_loss.py b/mmseg/models/losses/cross_entropy_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..988fb789c11eca9d002b2c02f227450d704aeaef --- /dev/null +++ b/mmseg/models/losses/cross_entropy_loss.py @@ -0,0 +1,311 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from mmseg.registry import MODELS +from .utils import get_class_weight, weight_reduce_loss + + +def cross_entropy(pred, + label, + weight=None, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=-100, + avg_non_ignore=False): + """cross_entropy. The wrapper function for :func:`F.cross_entropy` + + Args: + pred (torch.Tensor): The prediction with shape (N, 1). + label (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): Sample-wise loss weight. + Default: None. + class_weight (list[float], optional): The weight for each class. + Default: None. + reduction (str, optional): The method used to reduce the loss. + Options are 'none', 'mean' and 'sum'. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. Default: None. + ignore_index (int): Specifies a target value that is ignored and + does not contribute to the input gradients. When + ``avg_non_ignore `` is ``True``, and the ``reduction`` is + ``''mean''``, the loss is averaged over non-ignored targets. + Defaults: -100. + avg_non_ignore (bool): The flag decides to whether the loss is + only averaged over non-ignored targets. Default: False. + `New in version 0.23.0.` + """ + + # class_weight is a manual rescaling weight given to each class. + # If given, has to be a Tensor of size C element-wise losses + loss = F.cross_entropy( + pred, + label, + weight=class_weight, + reduction='none', + ignore_index=ignore_index) + + # apply weights and do the reduction + # average loss over non-ignored elements + # pytorch's official cross_entropy average loss over non-ignored elements + # refer to https://github.com/pytorch/pytorch/blob/56b43f4fec1f76953f15a627694d4bba34588969/torch/nn/functional.py#L2660 # noqa + if (avg_factor is None) and reduction == 'mean': + if class_weight is None: + if avg_non_ignore: + avg_factor = label.numel() - (label + == ignore_index).sum().item() + else: + avg_factor = label.numel() + + else: + # the average factor should take the class weights into account + label_weights = torch.stack([class_weight[cls] for cls in label + ]).to(device=class_weight.device) + + if avg_non_ignore: + label_weights[label == ignore_index] = 0 + avg_factor = label_weights.sum() + + if weight is not None: + weight = weight.float() + loss = weight_reduce_loss( + loss, weight=weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def _expand_onehot_labels(labels, label_weights, target_shape, ignore_index): + """Expand onehot labels to match the size of prediction.""" + bin_labels = labels.new_zeros(target_shape) + valid_mask = (labels >= 0) & (labels != ignore_index) + inds = torch.nonzero(valid_mask, as_tuple=True) + + if inds[0].numel() > 0: + if labels.dim() == 3: + bin_labels[inds[0], labels[valid_mask], inds[1], inds[2]] = 1 + else: + bin_labels[inds[0], labels[valid_mask]] = 1 + + valid_mask = valid_mask.unsqueeze(1).expand(target_shape).float() + + if label_weights is None: + bin_label_weights = valid_mask + else: + bin_label_weights = label_weights.unsqueeze(1).expand(target_shape) + bin_label_weights = bin_label_weights * valid_mask + + return bin_labels, bin_label_weights, valid_mask + + +def binary_cross_entropy(pred, + label, + weight=None, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=-100, + avg_non_ignore=False, + **kwargs): + """Calculate the binary CrossEntropy loss. + + Args: + pred (torch.Tensor): The prediction with shape (N, 1). + label (torch.Tensor): The learning label of the prediction. + Note: In bce loss, label < 0 is invalid. + weight (torch.Tensor, optional): Sample-wise loss weight. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (int): The label index to be ignored. Default: -100. + avg_non_ignore (bool): The flag decides to whether the loss is + only averaged over non-ignored targets. Default: False. + `New in version 0.23.0.` + + Returns: + torch.Tensor: The calculated loss + """ + if pred.size(1) == 1: + # For binary class segmentation, the shape of pred is + # [N, 1, H, W] and that of label is [N, H, W]. + # As the ignore_index often set as 255, so the + # binary class label check should mask out + # ignore_index + assert label[label != ignore_index].max() <= 1, \ + 'For pred with shape [N, 1, H, W], its label must have at ' \ + 'most 2 classes' + pred = pred.squeeze(1) + if pred.dim() != label.dim(): + assert (pred.dim() == 2 and label.dim() == 1) or ( + pred.dim() == 4 and label.dim() == 3), \ + 'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' \ + 'H, W], label shape [N, H, W] are supported' + # `weight` returned from `_expand_onehot_labels` + # has been treated for valid (non-ignore) pixels + label, weight, valid_mask = _expand_onehot_labels( + label, weight, pred.shape, ignore_index) + else: + # should mask out the ignored elements + valid_mask = ((label >= 0) & (label != ignore_index)).float() + if weight is not None: + weight = weight * valid_mask + else: + weight = valid_mask + # average loss over non-ignored and valid elements + if reduction == 'mean' and avg_factor is None and avg_non_ignore: + avg_factor = valid_mask.sum().item() + + loss = F.binary_cross_entropy_with_logits( + pred, label.float(), pos_weight=class_weight, reduction='none') + # do the reduction for the weighted loss + loss = weight_reduce_loss( + loss, weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def mask_cross_entropy(pred, + target, + label, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=None, + **kwargs): + """Calculate the CrossEntropy loss for masks. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the number + of classes. + target (torch.Tensor): The learning label of the prediction. + label (torch.Tensor): ``label`` indicates the class label of the mask' + corresponding object. This will be used to select the mask in the + of the class which the object belongs to when the mask prediction + if not class-agnostic. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (None): Placeholder, to be consistent with other loss. + Default: None. + + Returns: + torch.Tensor: The calculated loss + """ + assert ignore_index is None, 'BCE loss does not support ignore_index' + # TODO: handle these two reserved arguments + assert reduction == 'mean' and avg_factor is None + num_rois = pred.size()[0] + inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, weight=class_weight, reduction='mean')[None] + + +@MODELS.register_module() +class CrossEntropyLoss(nn.Module): + """CrossEntropyLoss. + + Args: + use_sigmoid (bool, optional): Whether the prediction uses sigmoid + of softmax. Defaults to False. + use_mask (bool, optional): Whether to use mask cross entropy loss. + Defaults to False. + reduction (str, optional): . Defaults to 'mean'. + Options are "none", "mean" and "sum". + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + loss_name (str, optional): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_ce'. + avg_non_ignore (bool): The flag decides to whether the loss is + only averaged over non-ignored targets. Default: False. + `New in version 0.23.0.` + """ + + def __init__(self, + use_sigmoid=False, + use_mask=False, + reduction='mean', + class_weight=None, + loss_weight=1.0, + loss_name='loss_ce', + avg_non_ignore=False): + super().__init__() + assert (use_sigmoid is False) or (use_mask is False) + self.use_sigmoid = use_sigmoid + self.use_mask = use_mask + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + self.avg_non_ignore = avg_non_ignore + if not self.avg_non_ignore and self.reduction == 'mean': + warnings.warn( + 'Default ``avg_non_ignore`` is False, if you would like to ' + 'ignore the certain label and average loss over non-ignore ' + 'labels, which is the same with PyTorch official ' + 'cross_entropy, set ``avg_non_ignore=True``.') + + if self.use_sigmoid: + self.cls_criterion = binary_cross_entropy + elif self.use_mask: + self.cls_criterion = mask_cross_entropy + else: + self.cls_criterion = cross_entropy + self._loss_name = loss_name + + def extra_repr(self): + """Extra repr.""" + s = f'avg_non_ignore={self.avg_non_ignore}' + return s + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + ignore_index=-100, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + # Note: for BCE loss, label < 0 is invalid. + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + weight, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + avg_non_ignore=self.avg_non_ignore, + ignore_index=ignore_index, + **kwargs) + return loss_cls + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/mmseg/models/losses/dice_loss.py b/mmseg/models/losses/dice_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..fb2ffdba8daf867032b6d7b4e0d70a9b7a0c50fe --- /dev/null +++ b/mmseg/models/losses/dice_loss.py @@ -0,0 +1,202 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Union + +import torch +import torch.nn as nn + +from mmseg.registry import MODELS +from .utils import weight_reduce_loss + + +def _expand_onehot_labels_dice(pred: torch.Tensor, + target: torch.Tensor) -> torch.Tensor: + """Expand onehot labels to match the size of prediction. + + Args: + pred (torch.Tensor): The prediction, has a shape (N, num_class, H, W). + target (torch.Tensor): The learning label of the prediction, + has a shape (N, H, W). + + Returns: + torch.Tensor: The target after one-hot encoding, + has a shape (N, num_class, H, W). + """ + num_classes = pred.shape[1] + one_hot_target = torch.clamp(target, min=0, max=num_classes) + one_hot_target = torch.nn.functional.one_hot(one_hot_target, + num_classes + 1) + one_hot_target = one_hot_target[..., :num_classes].permute(0, 3, 1, 2) + return one_hot_target + + +def dice_loss(pred: torch.Tensor, + target: torch.Tensor, + weight: Union[torch.Tensor, None], + eps: float = 1e-3, + reduction: Union[str, None] = 'mean', + naive_dice: Union[bool, None] = False, + avg_factor: Union[int, None] = None, + ignore_index: Union[int, None] = 255) -> float: + """Calculate dice loss, there are two forms of dice loss is supported: + + - the one proposed in `V-Net: Fully Convolutional Neural + Networks for Volumetric Medical Image Segmentation + `_. + - the dice loss in which the power of the number in the + denominator is the first power instead of the second + power. + + Args: + pred (torch.Tensor): The prediction, has a shape (n, *) + target (torch.Tensor): The learning label of the prediction, + shape (n, *), same shape of pred. + weight (torch.Tensor, optional): The weight of loss for each + prediction, has a shape (n,). Defaults to None. + eps (float): Avoid dividing by zero. Default: 1e-3. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. + Options are "none", "mean" and "sum". + naive_dice (bool, optional): If false, use the dice + loss defined in the V-Net paper, otherwise, use the + naive dice loss in which the power of the number in the + denominator is the first power instead of the second + power.Defaults to False. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + ignore_index (int, optional): The label index to be ignored. + Defaults to 255. + """ + if ignore_index is not None: + num_classes = pred.shape[1] + pred = pred[:, torch.arange(num_classes) != ignore_index, :, :] + target = target[:, torch.arange(num_classes) != ignore_index, :, :] + assert pred.shape[1] != 0 # if the ignored index is the only class + input = pred.flatten(1) + target = target.flatten(1).float() + a = torch.sum(input * target, 1) + if naive_dice: + b = torch.sum(input, 1) + c = torch.sum(target, 1) + d = (2 * a + eps) / (b + c + eps) + else: + b = torch.sum(input * input, 1) + eps + c = torch.sum(target * target, 1) + eps + d = (2 * a) / (b + c) + + loss = 1 - d + if weight is not None: + assert weight.ndim == loss.ndim + assert len(weight) == len(pred) + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + +@MODELS.register_module() +class DiceLoss(nn.Module): + + def __init__(self, + use_sigmoid=True, + activate=True, + reduction='mean', + naive_dice=False, + loss_weight=1.0, + ignore_index=255, + eps=1e-3, + loss_name='loss_dice'): + """Compute dice loss. + + Args: + use_sigmoid (bool, optional): Whether to the prediction is + used for sigmoid or softmax. Defaults to True. + activate (bool): Whether to activate the predictions inside, + this will disable the inside sigmoid operation. + Defaults to True. + reduction (str, optional): The method used + to reduce the loss. Options are "none", + "mean" and "sum". Defaults to 'mean'. + naive_dice (bool, optional): If false, use the dice + loss defined in the V-Net paper, otherwise, use the + naive dice loss in which the power of the number in the + denominator is the first power instead of the second + power. Defaults to False. + loss_weight (float, optional): Weight of loss. Defaults to 1.0. + ignore_index (int, optional): The label index to be ignored. + Default: 255. + eps (float): Avoid dividing by zero. Defaults to 1e-3. + loss_name (str, optional): Name of the loss item. If you want this + loss item to be included into the backward graph, `loss_` must + be the prefix of the name. Defaults to 'loss_dice'. + """ + + super().__init__() + self.use_sigmoid = use_sigmoid + self.reduction = reduction + self.naive_dice = naive_dice + self.loss_weight = loss_weight + self.eps = eps + self.activate = activate + self.ignore_index = ignore_index + self._loss_name = loss_name + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None, + ignore_index=255, + **kwargs): + """Forward function. + + Args: + pred (torch.Tensor): The prediction, has a shape (n, *). + target (torch.Tensor): The label of the prediction, + shape (n, *), same shape of pred. + weight (torch.Tensor, optional): The weight of loss for each + prediction, has a shape (n,). Defaults to None. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + reduction_override (str, optional): The reduction method used to + override the original reduction method of the loss. + Options are "none", "mean" and "sum". + + Returns: + torch.Tensor: The calculated loss + """ + one_hot_target = target + if (pred.shape != target.shape): + one_hot_target = _expand_onehot_labels_dice(pred, target) + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.activate: + if self.use_sigmoid: + pred = pred.sigmoid() + elif pred.shape[1] != 1: + # softmax does not work when there is only 1 class + pred = pred.softmax(dim=1) + loss = self.loss_weight * dice_loss( + pred, + one_hot_target, + weight, + eps=self.eps, + reduction=reduction, + naive_dice=self.naive_dice, + avg_factor=avg_factor, + ignore_index=self.ignore_index) + + return loss + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/mmseg/models/losses/focal_loss.py b/mmseg/models/losses/focal_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..6507ed7a9112993733ac25bc095da0b571e14363 --- /dev/null +++ b/mmseg/models/losses/focal_loss.py @@ -0,0 +1,337 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Modified from https://github.com/open-mmlab/mmdetection +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.ops import sigmoid_focal_loss as _sigmoid_focal_loss + +from mmseg.registry import MODELS +from .utils import weight_reduce_loss + + +# This method is used when cuda is not available +def py_sigmoid_focal_loss(pred, + target, + one_hot_target=None, + weight=None, + gamma=2.0, + alpha=0.5, + class_weight=None, + valid_mask=None, + reduction='mean', + avg_factor=None): + """PyTorch version of `Focal Loss `_. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the + number of classes + target (torch.Tensor): The learning label of the prediction with + shape (N, C) + one_hot_target (None): Placeholder. It should be None. + weight (torch.Tensor, optional): Sample-wise loss weight. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float | list[float], optional): A balanced form for Focal Loss. + Defaults to 0.5. + class_weight (list[float], optional): Weight of each class. + Defaults to None. + valid_mask (torch.Tensor, optional): A mask uses 1 to mark the valid + samples and uses 0 to mark the ignored samples. Default: None. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + """ + if isinstance(alpha, list): + alpha = pred.new_tensor(alpha) + pred_sigmoid = pred.sigmoid() + target = target.type_as(pred) + one_minus_pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + focal_weight = (alpha * target + (1 - alpha) * + (1 - target)) * one_minus_pt.pow(gamma) + + loss = F.binary_cross_entropy_with_logits( + pred, target, reduction='none') * focal_weight + final_weight = torch.ones(1, pred.size(1)).type_as(loss) + if weight is not None: + if weight.shape != loss.shape and weight.size(0) == loss.size(0): + # For most cases, weight is of shape (N, ), + # which means it does not have the second axis num_class + weight = weight.view(-1, 1) + assert weight.dim() == loss.dim() + final_weight = final_weight * weight + if class_weight is not None: + final_weight = final_weight * pred.new_tensor(class_weight) + if valid_mask is not None: + final_weight = final_weight * valid_mask + loss = weight_reduce_loss(loss, final_weight, reduction, avg_factor) + return loss + + +def sigmoid_focal_loss(pred, + target, + one_hot_target, + weight=None, + gamma=2.0, + alpha=0.5, + class_weight=None, + valid_mask=None, + reduction='mean', + avg_factor=None): + r"""A wrapper of cuda version `Focal Loss + `_. + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the number + of classes. + target (torch.Tensor): The learning label of the prediction. It's shape + should be (N, ) + one_hot_target (torch.Tensor): The learning label with shape (N, C) + weight (torch.Tensor, optional): Sample-wise loss weight. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float | list[float], optional): A balanced form for Focal Loss. + Defaults to 0.5. + class_weight (list[float], optional): Weight of each class. + Defaults to None. + valid_mask (torch.Tensor, optional): A mask uses 1 to mark the valid + samples and uses 0 to mark the ignored samples. Default: None. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + """ + # Function.apply does not accept keyword arguments, so the decorator + # "weighted_loss" is not applicable + final_weight = torch.ones(1, pred.size(1)).type_as(pred) + if isinstance(alpha, list): + # _sigmoid_focal_loss doesn't accept alpha of list type. Therefore, if + # a list is given, we set the input alpha as 0.5. This means setting + # equal weight for foreground class and background class. By + # multiplying the loss by 2, the effect of setting alpha as 0.5 is + # undone. The alpha of type list is used to regulate the loss in the + # post-processing process. + loss = _sigmoid_focal_loss(pred.contiguous(), target.contiguous(), + gamma, 0.5, None, 'none') * 2 + alpha = pred.new_tensor(alpha) + final_weight = final_weight * ( + alpha * one_hot_target + (1 - alpha) * (1 - one_hot_target)) + else: + loss = _sigmoid_focal_loss(pred.contiguous(), target.contiguous(), + gamma, alpha, None, 'none') + if weight is not None: + if weight.shape != loss.shape and weight.size(0) == loss.size(0): + # For most cases, weight is of shape (N, ), + # which means it does not have the second axis num_class + weight = weight.view(-1, 1) + assert weight.dim() == loss.dim() + final_weight = final_weight * weight + if class_weight is not None: + final_weight = final_weight * pred.new_tensor(class_weight) + if valid_mask is not None: + final_weight = final_weight * valid_mask + loss = weight_reduce_loss(loss, final_weight, reduction, avg_factor) + return loss + + +@MODELS.register_module() +class FocalLoss(nn.Module): + + def __init__(self, + use_sigmoid=True, + gamma=2.0, + alpha=0.5, + reduction='mean', + class_weight=None, + loss_weight=1.0, + loss_name='loss_focal'): + """`Focal Loss `_ + Args: + use_sigmoid (bool, optional): Whether to the prediction is + used for sigmoid or softmax. Defaults to True. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float | list[float], optional): A balanced form for Focal + Loss. Defaults to 0.5. When a list is provided, the length + of the list should be equal to the number of classes. + Please be careful that this parameter is not the + class-wise weight but the weight of a binary classification + problem. This binary classification problem regards the + pixels which belong to one class as the foreground + and the other pixels as the background, each element in + the list is the weight of the corresponding foreground class. + The value of alpha or each element of alpha should be a float + in the interval [0, 1]. If you want to specify the class-wise + weight, please use `class_weight` parameter. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. Options are "none", "mean" and + "sum". + class_weight (list[float], optional): Weight of each class. + Defaults to None. + loss_weight (float, optional): Weight of loss. Defaults to 1.0. + loss_name (str, optional): Name of the loss item. If you want this + loss item to be included into the backward graph, `loss_` must + be the prefix of the name. Defaults to 'loss_focal'. + """ + super().__init__() + assert use_sigmoid is True, \ + 'AssertionError: Only sigmoid focal loss supported now.' + assert reduction in ('none', 'mean', 'sum'), \ + "AssertionError: reduction should be 'none', 'mean' or " \ + "'sum'" + assert isinstance(alpha, (float, list)), \ + 'AssertionError: alpha should be of type float' + assert isinstance(gamma, float), \ + 'AssertionError: gamma should be of type float' + assert isinstance(loss_weight, float), \ + 'AssertionError: loss_weight should be of type float' + assert isinstance(loss_name, str), \ + 'AssertionError: loss_name should be of type str' + assert isinstance(class_weight, list) or class_weight is None, \ + 'AssertionError: class_weight must be None or of type list' + self.use_sigmoid = use_sigmoid + self.gamma = gamma + self.alpha = alpha + self.reduction = reduction + self.class_weight = class_weight + self.loss_weight = loss_weight + self._loss_name = loss_name + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None, + ignore_index=255, + **kwargs): + """Forward function. + + Args: + pred (torch.Tensor): The prediction with shape + (N, C) where C = number of classes, or + (N, C, d_1, d_2, ..., d_K) with K≥1 in the + case of K-dimensional loss. + target (torch.Tensor): The ground truth. If containing class + indices, shape (N) where each value is 0≤targets[i]≤C−1, + or (N, d_1, d_2, ..., d_K) with K≥1 in the case of + K-dimensional loss. If containing class probabilities, + same shape as the input. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to + average the loss. Defaults to None. + reduction_override (str, optional): The reduction method used + to override the original reduction method of the loss. + Options are "none", "mean" and "sum". + ignore_index (int, optional): The label index to be ignored. + Default: 255 + Returns: + torch.Tensor: The calculated loss + """ + assert isinstance(ignore_index, int), \ + 'ignore_index must be of type int' + assert reduction_override in (None, 'none', 'mean', 'sum'), \ + "AssertionError: reduction should be 'none', 'mean' or " \ + "'sum'" + assert pred.shape == target.shape or \ + (pred.size(0) == target.size(0) and + pred.shape[2:] == target.shape[1:]), \ + "The shape of pred doesn't match the shape of target" + + original_shape = pred.shape + + # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] + pred = pred.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + pred = pred.reshape(pred.size(0), -1) + # [C, N] -> [N, C] + pred = pred.transpose(0, 1).contiguous() + + if original_shape == target.shape: + # target with shape [B, C, d_1, d_2, ...] + # transform it's shape into [N, C] + # [B, C, d_1, d_2, ...] -> [C, B, d_1, d_2, ..., d_k] + target = target.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + target = target.reshape(target.size(0), -1) + # [C, N] -> [N, C] + target = target.transpose(0, 1).contiguous() + else: + # target with shape [B, d_1, d_2, ...] + # transform it's shape into [N, ] + target = target.view(-1).contiguous() + valid_mask = (target != ignore_index).view(-1, 1) + # avoid raising error when using F.one_hot() + target = torch.where(target == ignore_index, target.new_tensor(0), + target) + + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.use_sigmoid: + num_classes = pred.size(1) + if torch.cuda.is_available() and pred.is_cuda: + if target.dim() == 1: + one_hot_target = F.one_hot( + target, num_classes=num_classes + 1) + if num_classes == 1: + one_hot_target = one_hot_target[:, 1] + target = 1 - target + else: + one_hot_target = one_hot_target[:, :num_classes] + else: + one_hot_target = target + target = target.argmax(dim=1) + valid_mask = (target != ignore_index).view(-1, 1) + calculate_loss_func = sigmoid_focal_loss + else: + one_hot_target = None + if target.dim() == 1: + target = F.one_hot(target, num_classes=num_classes + 1) + if num_classes == 1: + target = target[:, 1] + else: + target = target[:, num_classes] + else: + valid_mask = (target.argmax(dim=1) != ignore_index).view( + -1, 1) + calculate_loss_func = py_sigmoid_focal_loss + + loss_cls = self.loss_weight * calculate_loss_func( + pred, + target, + one_hot_target, + weight, + gamma=self.gamma, + alpha=self.alpha, + class_weight=self.class_weight, + valid_mask=valid_mask, + reduction=reduction, + avg_factor=avg_factor) + + if reduction == 'none': + # [N, C] -> [C, N] + loss_cls = loss_cls.transpose(0, 1) + # [C, N] -> [C, B, d1, d2, ...] + # original_shape: [B, C, d1, d2, ...] + loss_cls = loss_cls.reshape(original_shape[1], + original_shape[0], + *original_shape[2:]) + # [C, B, d1, d2, ...] -> [B, C, d1, d2, ...] + loss_cls = loss_cls.transpose(0, 1).contiguous() + else: + raise NotImplementedError + return loss_cls + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/mmseg/models/losses/huasdorff_distance_loss.py b/mmseg/models/losses/huasdorff_distance_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..d950ba728f8d419ea2b291e2159b926aca44038c --- /dev/null +++ b/mmseg/models/losses/huasdorff_distance_loss.py @@ -0,0 +1,160 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/JunMa11/SegWithDistMap/blob/ +master/code/train_LA_HD.py (Apache-2.0 License)""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from scipy.ndimage import distance_transform_edt as distance +from torch import Tensor + +from mmseg.registry import MODELS +from .utils import get_class_weight, weighted_loss + + +def compute_dtm(img_gt: Tensor, pred: Tensor) -> Tensor: + """ + compute the distance transform map of foreground in mask + Args: + img_gt: Ground truth of the image, (b, h, w) + pred: Predictions of the segmentation head after softmax, (b, c, h, w) + + Returns: + output: the foreground Distance Map (SDM) + dtm(x) = 0; x in segmentation boundary + inf|x-y|; x in segmentation + """ + + fg_dtm = torch.zeros_like(pred) + out_shape = pred.shape + for b in range(out_shape[0]): # batch size + for c in range(1, out_shape[1]): # default 0 channel is background + posmask = img_gt[b].byte() + if posmask.any(): + posdis = distance(posmask) + fg_dtm[b][c] = torch.from_numpy(posdis) + + return fg_dtm + + +@weighted_loss +def hd_loss(seg_soft: Tensor, + gt: Tensor, + seg_dtm: Tensor, + gt_dtm: Tensor, + class_weight=None, + ignore_index=255) -> Tensor: + """ + compute huasdorff distance loss for segmentation + Args: + seg_soft: softmax results, shape=(b,c,x,y) + gt: ground truth, shape=(b,x,y) + seg_dtm: segmentation distance transform map, shape=(b,c,x,y) + gt_dtm: ground truth distance transform map, shape=(b,c,x,y) + + Returns: + output: hd_loss + """ + assert seg_soft.shape[0] == gt.shape[0] + total_loss = 0 + num_class = seg_soft.shape[1] + if class_weight is not None: + assert class_weight.ndim == num_class + for i in range(1, num_class): + if i != ignore_index: + delta_s = (seg_soft[:, i, ...] - gt.float())**2 + s_dtm = seg_dtm[:, i, ...]**2 + g_dtm = gt_dtm[:, i, ...]**2 + dtm = s_dtm + g_dtm + multiplied = torch.einsum('bxy, bxy->bxy', delta_s, dtm) + hd_loss = multiplied.mean() + if class_weight is not None: + hd_loss *= class_weight[i] + total_loss += hd_loss + + return total_loss / num_class + + +@MODELS.register_module() +class HuasdorffDisstanceLoss(nn.Module): + """HuasdorffDisstanceLoss. This loss is proposed in `How Distance Transform + Maps Boost Segmentation CNNs: An Empirical Study. + + `_. + Args: + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float): Weight of the loss. Defaults to 1.0. + ignore_index (int | None): The label index to be ignored. Default: 255. + loss_name (str): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_boundary'. + """ + + def __init__(self, + reduction='mean', + class_weight=None, + loss_weight=1.0, + ignore_index=255, + loss_name='loss_huasdorff_disstance', + **kwargs): + super().__init__() + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + self._loss_name = loss_name + self.ignore_index = ignore_index + + def forward(self, + pred: Tensor, + target: Tensor, + avg_factor=None, + reduction_override=None, + **kwargs) -> Tensor: + """Forward function. + + Args: + pred (Tensor): Predictions of the segmentation head. (B, C, H, W) + target (Tensor): Ground truth of the image. (B, H, W) + avg_factor (int, optional): Average factor that is used to + average the loss. Defaults to None. + reduction_override (str, optional): The reduction method used + to override the original reduction method of the loss. + Options are "none", "mean" and "sum". + Returns: + Tensor: Loss tensor. + """ + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = pred.new_tensor(self.class_weight) + else: + class_weight = None + + pred_soft = F.softmax(pred, dim=1) + valid_mask = (target != self.ignore_index).long() + target = target * valid_mask + + with torch.no_grad(): + gt_dtm = compute_dtm(target.cpu(), pred_soft) + gt_dtm = gt_dtm.float() + seg_dtm2 = compute_dtm( + pred_soft.argmax(dim=1, keepdim=False).cpu(), pred_soft) + seg_dtm2 = seg_dtm2.float() + + loss_hd = self.loss_weight * hd_loss( + pred_soft, + target, + seg_dtm=seg_dtm2, + gt_dtm=gt_dtm, + reduction=reduction, + avg_factor=avg_factor, + class_weight=class_weight, + ignore_index=self.ignore_index) + return loss_hd + + @property + def loss_name(self): + return self._loss_name diff --git a/mmseg/models/losses/kldiv_loss.py b/mmseg/models/losses/kldiv_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..496ef9713f085a36d46837ac0b51d4cb9f956fce --- /dev/null +++ b/mmseg/models/losses/kldiv_loss.py @@ -0,0 +1,99 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from mmseg.registry import MODELS + + +@MODELS.register_module() +class KLDivLoss(nn.Module): + + def __init__(self, + temperature: float = 1.0, + reduction: str = 'mean', + loss_name: str = 'loss_kld'): + """Kullback-Leibler divergence Loss. + + + + Args: + temperature (float, optional): Temperature param + reduction (str, optional): The method to reduce the loss into a + scalar. Default is "mean". Options are "none", "sum", + and "mean" + """ + + assert isinstance(temperature, (float, int)), \ + 'Expected temperature to be' \ + f'float or int, but got {temperature.__class__.__name__} instead' + assert temperature != 0., 'Temperature must not be zero' + + assert reduction in ['mean', 'none', 'sum'], \ + 'Reduction must be one of the options ("mean", ' \ + f'"sum", "none"), but got {reduction}' + + super().__init__() + self.temperature = temperature + self.reduction = reduction + self._loss_name = loss_name + + def forward(self, input: torch.Tensor, target: torch.Tensor): + """Forward function. Calculate KL divergence Loss. + + Args: + input (Tensor): Logit tensor, + the data type is float32 or float64. + The shape is (N, C) where N is batchsize and C is number of + channels. + If there more than 2 dimensions, shape is (N, C, D1, D2, ... + Dk), k>= 1 + target (Tensor): Logit tensor, + the data type is float32 or float64. + input and target must be with the same shape. + + Returns: + (Tensor): Reduced loss. + """ + assert isinstance(input, torch.Tensor), 'Expected input to' \ + f'be Tensor, but got {input.__class__.__name__} instead' + assert isinstance(target, torch.Tensor), 'Expected target to' \ + f'be Tensor, but got {target.__class__.__name__} instead' + + assert input.shape == target.shape, 'Input and target ' \ + 'must have same shape,' \ + f'but got shapes {input.shape} and {target.shape}' + + input = F.softmax(input / self.temperature, dim=1) + target = F.softmax(target / self.temperature, dim=1) + + loss = F.kl_div(input, target, reduction='none', log_target=False) + loss = loss * self.temperature**2 + + batch_size = input.shape[0] + + if self.reduction == 'sum': + # Change view to calculate instance-wise sum + loss = loss.view(batch_size, -1) + return torch.sum(loss, dim=1) + + elif self.reduction == 'mean': + # Change view to calculate instance-wise mean + loss = loss.view(batch_size, -1) + return torch.mean(loss, dim=1) + + return loss + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/mmseg/models/losses/lovasz_loss.py b/mmseg/models/losses/lovasz_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..b47f9d8a15330a45d0d2d25f3c18d9386e2b335e --- /dev/null +++ b/mmseg/models/losses/lovasz_loss.py @@ -0,0 +1,323 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/bermanmaxim/LovaszSoftmax/blob/master/pytor +ch/lovasz_losses.py Lovasz-Softmax and Jaccard hinge loss in PyTorch Maxim +Berman 2018 ESAT-PSI KU Leuven (MIT License)""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmengine.utils import is_list_of + +from mmseg.registry import MODELS +from .utils import get_class_weight, weight_reduce_loss + + +def lovasz_grad(gt_sorted): + """Computes gradient of the Lovasz extension w.r.t sorted errors. + + See Alg. 1 in paper. + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1. - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def flatten_binary_logits(logits, labels, ignore_index=None): + """Flattens predictions in the batch (binary case) Remove labels equal to + 'ignore_index'.""" + logits = logits.view(-1) + labels = labels.view(-1) + if ignore_index is None: + return logits, labels + valid = (labels != ignore_index) + vlogits = logits[valid] + vlabels = labels[valid] + return vlogits, vlabels + + +def flatten_probs(probs, labels, ignore_index=None): + """Flattens predictions in the batch.""" + if probs.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probs.size() + probs = probs.view(B, 1, H, W) + B, C, H, W = probs.size() + probs = probs.permute(0, 2, 3, 1).contiguous().view(-1, C) # B*H*W, C=P,C + labels = labels.view(-1) + if ignore_index is None: + return probs, labels + valid = (labels != ignore_index) + vprobs = probs[valid.nonzero().squeeze()] + vlabels = labels[valid] + return vprobs, vlabels + + +def lovasz_hinge_flat(logits, labels): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [P], logits at each prediction + (between -infty and +infty). + labels (torch.Tensor): [P], binary ground truth labels (0 or 1). + + Returns: + torch.Tensor: The calculated loss. + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0. + signs = 2. * labels.float() - 1. + errors = (1. - logits * signs) + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), grad) + return loss + + +def lovasz_hinge(logits, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [B, H, W], logits at each pixel + (between -infty and +infty). + labels (torch.Tensor): [B, H, W], binary ground truth masks (0 or 1). + classes (str | list[int], optional): Placeholder, to be consistent with + other loss. Default: None. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): Placeholder, to be consistent + with other loss. Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + if per_image: + loss = [ + lovasz_hinge_flat(*flatten_binary_logits( + logit.unsqueeze(0), label.unsqueeze(0), ignore_index)) + for logit, label in zip(logits, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_hinge_flat( + *flatten_binary_logits(logits, labels, ignore_index)) + return loss + + +def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [P, C], class probabilities at each prediction + (between 0 and 1). + labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + class_weight (list[float], optional): The weight for each class. + Default: None. + + Returns: + torch.Tensor: The calculated loss. + """ + if probs.numel() == 0: + # only void pixels, the gradients should be 0 + return probs * 0. + C = probs.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes + for c in class_to_sum: + fg = (labels == c).float() # foreground for class c + if (classes == 'present' and fg.sum() == 0): + continue + if C == 1: + if len(classes) > 1: + raise ValueError('Sigmoid output possible only with 1 class') + class_pred = probs[:, 0] + else: + class_pred = probs[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + loss = torch.dot(errors_sorted, lovasz_grad(fg_sorted)) + if class_weight is not None: + loss *= class_weight[c] + losses.append(loss) + return torch.stack(losses).mean() + + +def lovasz_softmax(probs, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [B, C, H, W], class probabilities at each + prediction (between 0 and 1). + labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and + C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): The weight for each class. + Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + + if per_image: + loss = [ + lovasz_softmax_flat( + *flatten_probs( + prob.unsqueeze(0), label.unsqueeze(0), ignore_index), + classes=classes, + class_weight=class_weight) + for prob, label in zip(probs, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_softmax_flat( + *flatten_probs(probs, labels, ignore_index), + classes=classes, + class_weight=class_weight) + return loss + + +@MODELS.register_module() +class LovaszLoss(nn.Module): + """LovaszLoss. + + This loss is proposed in `The Lovasz-Softmax loss: A tractable surrogate + for the optimization of the intersection-over-union measure in neural + networks `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + loss_name (str, optional): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_lovasz'. + """ + + def __init__(self, + loss_type='multi_class', + classes='present', + per_image=False, + reduction='mean', + class_weight=None, + loss_weight=1.0, + loss_name='loss_lovasz'): + super().__init__() + assert loss_type in ('binary', 'multi_class'), "loss_type should be \ + 'binary' or 'multi_class'." + + if loss_type == 'binary': + self.cls_criterion = lovasz_hinge + else: + self.cls_criterion = lovasz_softmax + assert classes in ('all', 'present') or is_list_of(classes, int) + if not per_image: + assert reduction == 'none', "reduction should be 'none' when \ + per_image is False." + + self.classes = classes + self.per_image = per_image + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + self._loss_name = loss_name + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + + # if multi-class loss, transform logits to probs + if self.cls_criterion == lovasz_softmax: + cls_score = F.softmax(cls_score, dim=1) + + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + self.classes, + self.per_image, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss_cls + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/mmseg/models/losses/ohem_cross_entropy_loss.py b/mmseg/models/losses/ohem_cross_entropy_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..a519b4d84e1dbf86ebc7ad07372ddbdfb0ff3d13 --- /dev/null +++ b/mmseg/models/losses/ohem_cross_entropy_loss.py @@ -0,0 +1,94 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Optional, Union + +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from mmseg.registry import MODELS + + +@MODELS.register_module() +class OhemCrossEntropy(nn.Module): + """OhemCrossEntropy loss. + + This func is modified from + `PIDNet `_. # noqa + + Licensed under the MIT License. + + Args: + ignore_label (int): Labels to ignore when computing the loss. + Default: 255 + thresh (float, optional): The threshold for hard example selection. + Below which, are prediction with low confidence. If not + specified, the hard examples will be pixels of top ``min_kept`` + loss. Default: 0.7. + min_kept (int, optional): The minimum number of predictions to keep. + Default: 100000. + loss_weight (float): Weight of the loss. Defaults to 1.0. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_name (str): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_boundary'. + """ + + def __init__(self, + ignore_label: int = 255, + thres: float = 0.7, + min_kept: int = 100000, + loss_weight: float = 1.0, + class_weight: Optional[Union[List[float], str]] = None, + loss_name: str = 'loss_ohem'): + super().__init__() + self.thresh = thres + self.min_kept = max(1, min_kept) + self.ignore_label = ignore_label + self.loss_weight = loss_weight + self.loss_name_ = loss_name + self.class_weight = class_weight + + def forward(self, score: Tensor, target: Tensor) -> Tensor: + """Forward function. + Args: + score (Tensor): Predictions of the segmentation head. + target (Tensor): Ground truth of the image. + + Returns: + Tensor: Loss tensor. + """ + # score: (N, C, H, W) + pred = F.softmax(score, dim=1) + if self.class_weight is not None: + class_weight = score.new_tensor(self.class_weight) + else: + class_weight = None + + pixel_losses = F.cross_entropy( + score, + target, + weight=class_weight, + ignore_index=self.ignore_label, + reduction='none').contiguous().view(-1) # (N*H*W) + mask = target.contiguous().view(-1) != self.ignore_label # (N*H*W) + + tmp_target = target.clone() # (N, H, W) + tmp_target[tmp_target == self.ignore_label] = 0 + # pred: (N, C, H, W) -> (N*H*W, C) + pred = pred.gather(1, tmp_target.unsqueeze(1)) + # pred: (N*H*W, C) -> (N*H*W), ind: (N*H*W) + pred, ind = pred.contiguous().view(-1, )[mask].contiguous().sort() + if pred.numel() > 0: + min_value = pred[min(self.min_kept, pred.numel() - 1)] + else: + return score.new_tensor(0.0) + threshold = max(min_value, self.thresh) + + pixel_losses = pixel_losses[mask][ind] + pixel_losses = pixel_losses[pred < threshold] + return self.loss_weight * pixel_losses.mean() + + @property + def loss_name(self): + return self.loss_name_ diff --git a/mmseg/models/losses/silog_loss.py b/mmseg/models/losses/silog_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..ecc07aac424a9308bce33e00c621369ac555f4ba --- /dev/null +++ b/mmseg/models/losses/silog_loss.py @@ -0,0 +1,122 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Union + +import torch +import torch.nn as nn +from torch import Tensor + +from mmseg.registry import MODELS +from .utils import weight_reduce_loss + + +def silog_loss(pred: Tensor, + target: Tensor, + weight: Optional[Tensor] = None, + eps: float = 1e-4, + reduction: Union[str, None] = 'mean', + avg_factor: Optional[int] = None) -> Tensor: + """Computes the Scale-Invariant Logarithmic (SI-Log) loss between + prediction and target. + + Args: + pred (Tensor): Predicted output. + target (Tensor): Ground truth. + weight (Optional[Tensor]): Optional weight to apply on the loss. + eps (float): Epsilon value to avoid division and log(0). + reduction (Union[str, None]): Specifies the reduction to apply to the + output: 'mean', 'sum' or None. + avg_factor (Optional[int]): Optional average factor for the loss. + + Returns: + Tensor: The calculated SI-Log loss. + """ + pred, target = pred.flatten(1), target.flatten(1) + valid_mask = (target > eps).detach().float() + + diff_log = torch.log(target.clamp(min=eps)) - torch.log( + pred.clamp(min=eps)) + + valid_mask = (target > eps).detach() & (~torch.isnan(diff_log)) + diff_log[~valid_mask] = 0.0 + valid_mask = valid_mask.float() + + diff_log_sq_mean = (diff_log.pow(2) * valid_mask).sum( + dim=1) / valid_mask.sum(dim=1).clamp(min=eps) + diff_log_mean = (diff_log * valid_mask).sum(dim=1) / valid_mask.sum( + dim=1).clamp(min=eps) + + loss = torch.sqrt(diff_log_sq_mean - 0.5 * diff_log_mean.pow(2)) + + if weight is not None: + weight = weight.float() + + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + +@MODELS.register_module() +class SiLogLoss(nn.Module): + """Compute SiLog loss. + + Args: + reduction (str, optional): The method used + to reduce the loss. Options are "none", + "mean" and "sum". Defaults to 'mean'. + loss_weight (float, optional): Weight of loss. Defaults to 1.0. + eps (float): Avoid dividing by zero. Defaults to 1e-3. + loss_name (str, optional): Name of the loss item. If you want this + loss item to be included into the backward graph, `loss_` must + be the prefix of the name. Defaults to 'loss_silog'. + """ + + def __init__(self, + reduction='mean', + loss_weight=1.0, + eps=1e-6, + loss_name='loss_silog'): + super().__init__() + self.reduction = reduction + self.loss_weight = loss_weight + self.eps = eps + self._loss_name = loss_name + + def forward( + self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None, + ): + + assert pred.shape == target.shape, 'the shapes of pred ' \ + f'({pred.shape}) and target ({target.shape}) are mismatch' + + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + + loss = self.loss_weight * silog_loss( + pred, + target, + weight, + eps=self.eps, + reduction=reduction, + avg_factor=avg_factor, + ) + + return loss + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/mmseg/models/losses/tversky_loss.py b/mmseg/models/losses/tversky_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..bfca1af6669e3ac328492da11758a084999ef906 --- /dev/null +++ b/mmseg/models/losses/tversky_loss.py @@ -0,0 +1,137 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from +https://github.com/JunMa11/SegLoss/blob/master/losses_pytorch/dice_loss.py#L333 +(Apache-2.0 License)""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weighted_loss + + +@weighted_loss +def tversky_loss(pred, + target, + valid_mask, + alpha=0.3, + beta=0.7, + smooth=1, + class_weight=None, + ignore_index=255): + assert pred.shape[0] == target.shape[0] + total_loss = 0 + num_classes = pred.shape[1] + for i in range(num_classes): + if i != ignore_index: + tversky_loss = binary_tversky_loss( + pred[:, i], + target[..., i], + valid_mask=valid_mask, + alpha=alpha, + beta=beta, + smooth=smooth) + if class_weight is not None: + tversky_loss *= class_weight[i] + total_loss += tversky_loss + return total_loss / num_classes + + +@weighted_loss +def binary_tversky_loss(pred, + target, + valid_mask, + alpha=0.3, + beta=0.7, + smooth=1): + assert pred.shape[0] == target.shape[0] + pred = pred.reshape(pred.shape[0], -1) + target = target.reshape(target.shape[0], -1) + valid_mask = valid_mask.reshape(valid_mask.shape[0], -1) + + TP = torch.sum(torch.mul(pred, target) * valid_mask, dim=1) + FP = torch.sum(torch.mul(pred, 1 - target) * valid_mask, dim=1) + FN = torch.sum(torch.mul(1 - pred, target) * valid_mask, dim=1) + tversky = (TP + smooth) / (TP + alpha * FP + beta * FN + smooth) + + return 1 - tversky + + +@LOSSES.register_module() +class TverskyLoss(nn.Module): + """TverskyLoss. This loss is proposed in `Tversky loss function for image + segmentation using 3D fully convolutional deep networks. + + `_. + Args: + smooth (float): A float number to smooth loss, and avoid NaN error. + Default: 1. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Default to 1.0. + ignore_index (int | None): The label index to be ignored. Default: 255. + alpha(float, in [0, 1]): + The coefficient of false positives. Default: 0.3. + beta (float, in [0, 1]): + The coefficient of false negatives. Default: 0.7. + Note: alpha + beta = 1. + loss_name (str, optional): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_tversky'. + """ + + def __init__(self, + smooth=1, + class_weight=None, + loss_weight=1.0, + ignore_index=255, + alpha=0.3, + beta=0.7, + loss_name='loss_tversky'): + super().__init__() + self.smooth = smooth + self.class_weight = get_class_weight(class_weight) + self.loss_weight = loss_weight + self.ignore_index = ignore_index + assert (alpha + beta == 1.0), 'Sum of alpha and beta but be 1.0!' + self.alpha = alpha + self.beta = beta + self._loss_name = loss_name + + def forward(self, pred, target, **kwargs): + if self.class_weight is not None: + class_weight = pred.new_tensor(self.class_weight) + else: + class_weight = None + + pred = F.softmax(pred, dim=1) + num_classes = pred.shape[1] + one_hot_target = F.one_hot( + torch.clamp(target.long(), 0, num_classes - 1), + num_classes=num_classes) + valid_mask = (target != self.ignore_index).long() + + loss = self.loss_weight * tversky_loss( + pred, + one_hot_target, + valid_mask=valid_mask, + alpha=self.alpha, + beta=self.beta, + smooth=self.smooth, + class_weight=class_weight, + ignore_index=self.ignore_index) + return loss + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/mmseg/models/losses/utils.py b/mmseg/models/losses/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..047803473316ff5fc58de2b8e35ef0087bc3b624 --- /dev/null +++ b/mmseg/models/losses/utils.py @@ -0,0 +1,129 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools + +import numpy as np +import torch +import torch.nn.functional as F +from mmengine.fileio import load + + +def get_class_weight(class_weight): + """Get class weight for loss function. + + Args: + class_weight (list[float] | str | None): If class_weight is a str, + take it as a file name and read from it. + """ + if isinstance(class_weight, str): + # take it as a file path + if class_weight.endswith('.npy'): + class_weight = np.load(class_weight) + else: + # pkl, json or yaml + class_weight = load(class_weight) + + return class_weight + + +def reduce_loss(loss, reduction) -> torch.Tensor: + """Reduce loss as specified. + + Args: + loss (Tensor): Elementwise loss tensor. + reduction (str): Options are "none", "mean" and "sum". + + Return: + Tensor: Reduced loss tensor. + """ + reduction_enum = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction_enum == 0: + return loss + elif reduction_enum == 1: + return loss.mean() + elif reduction_enum == 2: + return loss.sum() + + +def weight_reduce_loss(loss, + weight=None, + reduction='mean', + avg_factor=None) -> torch.Tensor: + """Apply element-wise weight and reduce loss. + + Args: + loss (Tensor): Element-wise loss. + weight (Tensor): Element-wise weights. + reduction (str): Same as built-in losses of PyTorch. + avg_factor (float): Average factor when computing the mean of losses. + + Returns: + Tensor: Processed loss values. + """ + # if weight is specified, apply element-wise weight + if weight is not None: + assert weight.dim() == loss.dim() + if weight.dim() > 1: + assert weight.size(1) == 1 or weight.size(1) == loss.size(1) + loss = loss * weight + + # if avg_factor is not specified, just reduce the loss + if avg_factor is None: + loss = reduce_loss(loss, reduction) + else: + # if reduction is mean, then average the loss by avg_factor + if reduction == 'mean': + # Avoid causing ZeroDivisionError when avg_factor is 0.0, + # i.e., all labels of an image belong to ignore index. + eps = torch.finfo(torch.float32).eps + loss = loss.sum() / (avg_factor + eps) + # if reduction is 'none', then do nothing, otherwise raise an error + elif reduction != 'none': + raise ValueError('avg_factor can not be used with reduction="sum"') + return loss + + +def weighted_loss(loss_func): + """Create a weighted version of a given loss function. + + To use this decorator, the loss function must have the signature like + `loss_func(pred, target, **kwargs)`. The function only needs to compute + element-wise loss without any reduction. This decorator will add weight + and reduction arguments to the function. The decorated function will have + the signature like `loss_func(pred, target, weight=None, reduction='mean', + avg_factor=None, **kwargs)`. + + :Example: + + >>> import torch + >>> @weighted_loss + >>> def l1_loss(pred, target): + >>> return (pred - target).abs() + + >>> pred = torch.Tensor([0, 2, 3]) + >>> target = torch.Tensor([1, 1, 1]) + >>> weight = torch.Tensor([1, 0, 1]) + + >>> l1_loss(pred, target) + tensor(1.3333) + >>> l1_loss(pred, target, weight) + tensor(1.) + >>> l1_loss(pred, target, reduction='none') + tensor([1., 1., 2.]) + >>> l1_loss(pred, target, weight, avg_factor=2) + tensor(1.5000) + """ + + @functools.wraps(loss_func) + def wrapper(pred, + target, + weight=None, + reduction='mean', + avg_factor=None, + **kwargs): + # get element-wise loss + loss = loss_func(pred, target, **kwargs) + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + return wrapper diff --git a/mmseg/models/necks/__init__.py b/mmseg/models/necks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ff03186a92b78f942e79cff9eec9f5e2784c359a --- /dev/null +++ b/mmseg/models/necks/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .featurepyramid import Feature2Pyramid +from .fpn import FPN +from .ic_neck import ICNeck +from .jpu import JPU +from .mla_neck import MLANeck +from .multilevel_neck import MultiLevelNeck + +__all__ = [ + 'FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck', 'JPU', 'Feature2Pyramid' +] diff --git a/mmseg/models/necks/__pycache__/__init__.cpython-39.pyc b/mmseg/models/necks/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c9340b51bf9bb2c52b2cb6e42888ccc9ed3a647 Binary files /dev/null and b/mmseg/models/necks/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/models/necks/__pycache__/featurepyramid.cpython-39.pyc b/mmseg/models/necks/__pycache__/featurepyramid.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e34655da408831cfaf1c6da7a9ca14f64ef8e47 Binary files /dev/null and b/mmseg/models/necks/__pycache__/featurepyramid.cpython-39.pyc differ diff --git a/mmseg/models/necks/__pycache__/fpn.cpython-39.pyc b/mmseg/models/necks/__pycache__/fpn.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c613fc0ba8acf08f121a2d9c6c35a6c91614b6ae Binary files /dev/null and b/mmseg/models/necks/__pycache__/fpn.cpython-39.pyc differ diff --git a/mmseg/models/necks/__pycache__/ic_neck.cpython-39.pyc b/mmseg/models/necks/__pycache__/ic_neck.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..376efd7bd195335828974ba2f0dc2c6da197d636 Binary files /dev/null and b/mmseg/models/necks/__pycache__/ic_neck.cpython-39.pyc differ diff --git a/mmseg/models/necks/__pycache__/jpu.cpython-39.pyc b/mmseg/models/necks/__pycache__/jpu.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6025efc4aa8b7d4e0ef5fccffdfe9e1abaef0c4f Binary files /dev/null and b/mmseg/models/necks/__pycache__/jpu.cpython-39.pyc differ diff --git a/mmseg/models/necks/__pycache__/mla_neck.cpython-39.pyc b/mmseg/models/necks/__pycache__/mla_neck.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ff5c19546da3f5a35ba579fc0325069b22e3302 Binary files /dev/null and b/mmseg/models/necks/__pycache__/mla_neck.cpython-39.pyc differ diff --git a/mmseg/models/necks/__pycache__/multilevel_neck.cpython-39.pyc b/mmseg/models/necks/__pycache__/multilevel_neck.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab2981b9f0dab98ccd498bf2a84257ca9a9015b3 Binary files /dev/null and b/mmseg/models/necks/__pycache__/multilevel_neck.cpython-39.pyc differ diff --git a/mmseg/models/necks/featurepyramid.py b/mmseg/models/necks/featurepyramid.py new file mode 100644 index 0000000000000000000000000000000000000000..dc1250d39dafcf78880aa282bcba4215520ad94e --- /dev/null +++ b/mmseg/models/necks/featurepyramid.py @@ -0,0 +1,67 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import build_norm_layer + +from mmseg.registry import MODELS + + +@MODELS.register_module() +class Feature2Pyramid(nn.Module): + """Feature2Pyramid. + + A neck structure connect ViT backbone and decoder_heads. + + Args: + embed_dims (int): Embedding dimension. + rescales (list[float]): Different sampling multiples were + used to obtain pyramid features. Default: [4, 2, 1, 0.5]. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='SyncBN', requires_grad=True). + """ + + def __init__(self, + embed_dim, + rescales=[4, 2, 1, 0.5], + norm_cfg=dict(type='SyncBN', requires_grad=True)): + super().__init__() + self.rescales = rescales + self.upsample_4x = None + for k in self.rescales: + if k == 4: + self.upsample_4x = nn.Sequential( + nn.ConvTranspose2d( + embed_dim, embed_dim, kernel_size=2, stride=2), + build_norm_layer(norm_cfg, embed_dim)[1], + nn.GELU(), + nn.ConvTranspose2d( + embed_dim, embed_dim, kernel_size=2, stride=2), + ) + elif k == 2: + self.upsample_2x = nn.Sequential( + nn.ConvTranspose2d( + embed_dim, embed_dim, kernel_size=2, stride=2)) + elif k == 1: + self.identity = nn.Identity() + elif k == 0.5: + self.downsample_2x = nn.MaxPool2d(kernel_size=2, stride=2) + elif k == 0.25: + self.downsample_4x = nn.MaxPool2d(kernel_size=4, stride=4) + else: + raise KeyError(f'invalid {k} for feature2pyramid') + + def forward(self, inputs): + assert len(inputs) == len(self.rescales) + outputs = [] + if self.upsample_4x is not None: + ops = [ + self.upsample_4x, self.upsample_2x, self.identity, + self.downsample_2x + ] + else: + ops = [ + self.upsample_2x, self.identity, self.downsample_2x, + self.downsample_4x + ] + for i in range(len(inputs)): + outputs.append(ops[i](inputs[i])) + return tuple(outputs) diff --git a/mmseg/models/necks/fpn.py b/mmseg/models/necks/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..ddab74c00a262a89031fda44824c5de0e2e9a362 --- /dev/null +++ b/mmseg/models/necks/fpn.py @@ -0,0 +1,212 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule + +from mmseg.registry import MODELS +from ..utils import resize + + +@MODELS.register_module() +class FPN(BaseModule): + """Feature Pyramid Network. + + This neck is the implementation of `Feature Pyramid Networks for Object + Detection `_. + + Args: + in_channels (list[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + num_outs (int): Number of output scales. + start_level (int): Index of the start input backbone level used to + build the feature pyramid. Default: 0. + end_level (int): Index of the end input backbone level (exclusive) to + build the feature pyramid. Default: -1, which means the last level. + add_extra_convs (bool | str): If bool, it decides whether to add conv + layers on top of the original feature maps. Default to False. + If True, its actual mode is specified by `extra_convs_on_inputs`. + If str, it specifies the source feature map of the extra convs. + Only the following options are allowed + + - 'on_input': Last feat map of neck inputs (i.e. backbone feature). + - 'on_lateral': Last feature map after lateral convs. + - 'on_output': The last output feature map after fpn convs. + extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs + on the original feature from the backbone. If True, + it is equivalent to `add_extra_convs='on_input'`. If False, it is + equivalent to set `add_extra_convs='on_output'`. Default to True. + relu_before_extra_convs (bool): Whether to apply relu before the extra + conv. Default: False. + no_norm_on_lateral (bool): Whether to apply norm on lateral. + Default: False. + conv_cfg (dict): Config dict for convolution layer. Default: None. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + upsample_cfg (dict): Config dict for interpolate layer. + Default: dict(mode='nearest'). + init_cfg (dict or list[dict], optional): Initialization config dict. + + Example: + >>> import torch + >>> in_channels = [2, 3, 5, 7] + >>> scales = [340, 170, 84, 43] + >>> inputs = [torch.rand(1, c, s, s) + ... for c, s in zip(in_channels, scales)] + >>> self = FPN(in_channels, 11, len(in_channels)).eval() + >>> outputs = self.forward(inputs) + >>> for i in range(len(outputs)): + ... print(f'outputs[{i}].shape = {outputs[i].shape}') + outputs[0].shape = torch.Size([1, 11, 340, 340]) + outputs[1].shape = torch.Size([1, 11, 170, 170]) + outputs[2].shape = torch.Size([1, 11, 84, 84]) + outputs[3].shape = torch.Size([1, 11, 43, 43]) + """ + + def __init__(self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + extra_convs_on_inputs=False, + relu_before_extra_convs=False, + no_norm_on_lateral=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + upsample_cfg=dict(mode='nearest'), + init_cfg=dict( + type='Xavier', layer='Conv2d', distribution='uniform')): + super().__init__(init_cfg) + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.num_outs = num_outs + self.relu_before_extra_convs = relu_before_extra_convs + self.no_norm_on_lateral = no_norm_on_lateral + self.fp16_enabled = False + self.upsample_cfg = upsample_cfg.copy() + + if end_level == -1: + self.backbone_end_level = self.num_ins + assert num_outs >= self.num_ins - start_level + else: + # if end_level < inputs, no extra level is allowed + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + assert num_outs == end_level - start_level + self.start_level = start_level + self.end_level = end_level + self.add_extra_convs = add_extra_convs + assert isinstance(add_extra_convs, (str, bool)) + if isinstance(add_extra_convs, str): + # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' + assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') + elif add_extra_convs: # True + if extra_convs_on_inputs: + # For compatibility with previous release + # TODO: deprecate `extra_convs_on_inputs` + self.add_extra_convs = 'on_input' + else: + self.add_extra_convs = 'on_output' + + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + + for i in range(self.start_level, self.backbone_end_level): + l_conv = ConvModule( + in_channels[i], + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, + act_cfg=act_cfg, + inplace=False) + fpn_conv = ConvModule( + out_channels, + out_channels, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + # add extra conv layers (e.g., RetinaNet) + extra_levels = num_outs - self.backbone_end_level + self.start_level + if self.add_extra_convs and extra_levels >= 1: + for i in range(extra_levels): + if i == 0 and self.add_extra_convs == 'on_input': + in_channels = self.in_channels[self.backbone_end_level - 1] + else: + in_channels = out_channels + extra_fpn_conv = ConvModule( + in_channels, + out_channels, + 3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + self.fpn_convs.append(extra_fpn_conv) + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # build laterals + laterals = [ + lateral_conv(inputs[i + self.start_level]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + # In some cases, fixing `scale factor` (e.g. 2) is preferred, but + # it cannot co-exist with `size` in `F.interpolate`. + if 'scale_factor' in self.upsample_cfg: + laterals[i - 1] = laterals[i - 1] + resize( + laterals[i], **self.upsample_cfg) + else: + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] = laterals[i - 1] + resize( + laterals[i], size=prev_shape, **self.upsample_cfg) + + # build outputs + # part 1: from original levels + outs = [ + self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) + ] + # part 2: add extra levels + if self.num_outs > len(outs): + # use max pool to get more levels on top of outputs + # (e.g., Faster R-CNN, Mask R-CNN) + if not self.add_extra_convs: + for i in range(self.num_outs - used_backbone_levels): + outs.append(F.max_pool2d(outs[-1], 1, stride=2)) + # add conv layers on top of original feature maps (RetinaNet) + else: + if self.add_extra_convs == 'on_input': + extra_source = inputs[self.backbone_end_level - 1] + elif self.add_extra_convs == 'on_lateral': + extra_source = laterals[-1] + elif self.add_extra_convs == 'on_output': + extra_source = outs[-1] + else: + raise NotImplementedError + outs.append(self.fpn_convs[used_backbone_levels](extra_source)) + for i in range(used_backbone_levels + 1, self.num_outs): + if self.relu_before_extra_convs: + outs.append(self.fpn_convs[i](F.relu(outs[-1]))) + else: + outs.append(self.fpn_convs[i](outs[-1])) + return tuple(outs) diff --git a/mmseg/models/necks/ic_neck.py b/mmseg/models/necks/ic_neck.py new file mode 100644 index 0000000000000000000000000000000000000000..9763541e0980cb0ec53a342b656e64c99d87ed7e --- /dev/null +++ b/mmseg/models/necks/ic_neck.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule + +from mmseg.registry import MODELS +from ..utils import resize + + +class CascadeFeatureFusion(BaseModule): + """Cascade Feature Fusion Unit in ICNet. + + Args: + low_channels (int): The number of input channels for + low resolution feature map. + high_channels (int): The number of input channels for + high resolution feature map. + out_channels (int): The number of output channels. + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN'). + act_cfg (dict): Dictionary to construct and config act layer. + Default: dict(type='ReLU'). + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Returns: + x (Tensor): The output tensor of shape (N, out_channels, H, W). + x_low (Tensor): The output tensor of shape (N, out_channels, H, W) + for Cascade Label Guidance in auxiliary heads. + """ + + def __init__(self, + low_channels, + high_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.align_corners = align_corners + self.conv_low = ConvModule( + low_channels, + out_channels, + 3, + padding=2, + dilation=2, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv_high = ConvModule( + high_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x_low, x_high): + x_low = resize( + x_low, + size=x_high.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + # Note: Different from original paper, `x_low` is underwent + # `self.conv_low` rather than another 1x1 conv classifier + # before being used for auxiliary head. + x_low = self.conv_low(x_low) + x_high = self.conv_high(x_high) + x = x_low + x_high + x = F.relu(x, inplace=True) + return x, x_low + + +@MODELS.register_module() +class ICNeck(BaseModule): + """ICNet for Real-Time Semantic Segmentation on High-Resolution Images. + + This head is the implementation of `ICHead + `_. + + Args: + in_channels (int): The number of input image channels. Default: 3. + out_channels (int): The numbers of output feature channels. + Default: 128. + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN'). + act_cfg (dict): Dictionary to construct and config act layer. + Default: dict(type='ReLU'). + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=(64, 256, 256), + out_channels=128, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + assert len(in_channels) == 3, 'Length of input channels \ + must be 3!' + + self.in_channels = in_channels + self.out_channels = out_channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.cff_24 = CascadeFeatureFusion( + self.in_channels[2], + self.in_channels[1], + self.out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + self.cff_12 = CascadeFeatureFusion( + self.out_channels, + self.in_channels[0], + self.out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + def forward(self, inputs): + assert len(inputs) == 3, 'Length of input feature \ + maps must be 3!' + + x_sub1, x_sub2, x_sub4 = inputs + x_cff_24, x_24 = self.cff_24(x_sub4, x_sub2) + x_cff_12, x_12 = self.cff_12(x_cff_24, x_sub1) + # Note: `x_cff_12` is used for decode_head, + # `x_24` and `x_12` are used for auxiliary head. + return x_24, x_12, x_cff_12 diff --git a/mmseg/models/necks/jpu.py b/mmseg/models/necks/jpu.py new file mode 100644 index 0000000000000000000000000000000000000000..3ea0fe2183377d3e3c1a87ca8a0df909b123cdfa --- /dev/null +++ b/mmseg/models/necks/jpu.py @@ -0,0 +1,131 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule +from mmengine.model import BaseModule + +from mmseg.registry import MODELS +from ..utils import resize + + +@MODELS.register_module() +class JPU(BaseModule): + """FastFCN: Rethinking Dilated Convolution in the Backbone + for Semantic Segmentation. + + This Joint Pyramid Upsampling (JPU) neck is the implementation of + `FastFCN `_. + + Args: + in_channels (Tuple[int], optional): The number of input channels + for each convolution operations before upsampling. + Default: (512, 1024, 2048). + mid_channels (int): The number of output channels of JPU. + Default: 512. + start_level (int): Index of the start input backbone level used to + build the feature pyramid. Default: 0. + end_level (int): Index of the end input backbone level (exclusive) to + build the feature pyramid. Default: -1, which means the last level. + dilations (tuple[int]): Dilation rate of each Depthwise + Separable ConvModule. Default: (1, 2, 4, 8). + align_corners (bool, optional): The align_corners argument of + resize operation. Default: False. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=(512, 1024, 2048), + mid_channels=512, + start_level=0, + end_level=-1, + dilations=(1, 2, 4, 8), + align_corners=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + assert isinstance(in_channels, tuple) + assert isinstance(dilations, tuple) + self.in_channels = in_channels + self.mid_channels = mid_channels + self.start_level = start_level + self.num_ins = len(in_channels) + if end_level == -1: + self.backbone_end_level = self.num_ins + else: + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + + self.dilations = dilations + self.align_corners = align_corners + + self.conv_layers = nn.ModuleList() + self.dilation_layers = nn.ModuleList() + for i in range(self.start_level, self.backbone_end_level): + conv_layer = nn.Sequential( + ConvModule( + self.in_channels[i], + self.mid_channels, + kernel_size=3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.conv_layers.append(conv_layer) + for i in range(len(dilations)): + dilation_layer = nn.Sequential( + DepthwiseSeparableConvModule( + in_channels=(self.backbone_end_level - self.start_level) * + self.mid_channels, + out_channels=self.mid_channels, + kernel_size=3, + stride=1, + padding=dilations[i], + dilation=dilations[i], + dw_norm_cfg=norm_cfg, + dw_act_cfg=None, + pw_norm_cfg=norm_cfg, + pw_act_cfg=act_cfg)) + self.dilation_layers.append(dilation_layer) + + def forward(self, inputs): + """Forward function.""" + assert len(inputs) == len(self.in_channels), 'Length of inputs must \ + be the same with self.in_channels!' + + feats = [ + self.conv_layers[i - self.start_level](inputs[i]) + for i in range(self.start_level, self.backbone_end_level) + ] + + h, w = feats[0].shape[2:] + for i in range(1, len(feats)): + feats[i] = resize( + feats[i], + size=(h, w), + mode='bilinear', + align_corners=self.align_corners) + + feat = torch.cat(feats, dim=1) + concat_feat = torch.cat([ + self.dilation_layers[i](feat) for i in range(len(self.dilations)) + ], + dim=1) + + outs = [] + + # Default: outs[2] is the output of JPU for decoder head, outs[1] is + # the feature map from backbone for auxiliary head. Additionally, + # outs[0] can also be used for auxiliary head. + for i in range(self.start_level, self.backbone_end_level - 1): + outs.append(inputs[i]) + outs.append(concat_feat) + return tuple(outs) diff --git a/mmseg/models/necks/mla_neck.py b/mmseg/models/necks/mla_neck.py new file mode 100644 index 0000000000000000000000000000000000000000..db250aefbfa45beaa98855be79ddc7f5e7276cca --- /dev/null +++ b/mmseg/models/necks/mla_neck.py @@ -0,0 +1,118 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import ConvModule, build_norm_layer + +from mmseg.registry import MODELS + + +class MLAModule(nn.Module): + + def __init__(self, + in_channels=[1024, 1024, 1024, 1024], + out_channels=256, + norm_cfg=None, + act_cfg=None): + super().__init__() + self.channel_proj = nn.ModuleList() + for i in range(len(in_channels)): + self.channel_proj.append( + ConvModule( + in_channels=in_channels[i], + out_channels=out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.feat_extract = nn.ModuleList() + for i in range(len(in_channels)): + self.feat_extract.append( + ConvModule( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, inputs): + + # feat_list -> [p2, p3, p4, p5] + feat_list = [] + for x, conv in zip(inputs, self.channel_proj): + feat_list.append(conv(x)) + + # feat_list -> [p5, p4, p3, p2] + # mid_list -> [m5, m4, m3, m2] + feat_list = feat_list[::-1] + mid_list = [] + for feat in feat_list: + if len(mid_list) == 0: + mid_list.append(feat) + else: + mid_list.append(mid_list[-1] + feat) + + # mid_list -> [m5, m4, m3, m2] + # out_list -> [o2, o3, o4, o5] + out_list = [] + for mid, conv in zip(mid_list, self.feat_extract): + out_list.append(conv(mid)) + + return tuple(out_list) + + +@MODELS.register_module() +class MLANeck(nn.Module): + """Multi-level Feature Aggregation. + + This neck is `The Multi-level Feature Aggregation construction of + SETR `_. + + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + norm_layer (dict): Config dict for input normalization. + Default: norm_layer=dict(type='LN', eps=1e-6, requires_grad=True). + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + norm_layer=dict(type='LN', eps=1e-6, requires_grad=True), + norm_cfg=None, + act_cfg=None): + super().__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + + # In order to build general vision transformer backbone, we have to + # move MLA to neck. + self.norm = nn.ModuleList([ + build_norm_layer(norm_layer, in_channels[i])[1] + for i in range(len(in_channels)) + ]) + + self.mla = MLAModule( + in_channels=in_channels, + out_channels=out_channels, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # Convert from nchw to nlc + outs = [] + for i in range(len(inputs)): + x = inputs[i] + n, c, h, w = x.shape + x = x.reshape(n, c, h * w).transpose(2, 1).contiguous() + x = self.norm[i](x) + x = x.transpose(1, 2).reshape(n, c, h, w).contiguous() + outs.append(x) + + outs = self.mla(outs) + return tuple(outs) diff --git a/mmseg/models/necks/multilevel_neck.py b/mmseg/models/necks/multilevel_neck.py new file mode 100644 index 0000000000000000000000000000000000000000..c997125f24791b1c01248c60a27fa37a986c6c82 --- /dev/null +++ b/mmseg/models/necks/multilevel_neck.py @@ -0,0 +1,79 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmengine.model.weight_init import xavier_init + +from mmseg.registry import MODELS +from ..utils import resize + + +@MODELS.register_module() +class MultiLevelNeck(nn.Module): + """MultiLevelNeck. + + A neck structure connect vit backbone and decoder_heads. + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + scales (List[float]): Scale factors for each input feature map. + Default: [0.5, 1, 2, 4] + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + scales=[0.5, 1, 2, 4], + norm_cfg=None, + act_cfg=None): + super().__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.scales = scales + self.num_outs = len(scales) + self.lateral_convs = nn.ModuleList() + self.convs = nn.ModuleList() + for in_channel in in_channels: + self.lateral_convs.append( + ConvModule( + in_channel, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + for _ in range(self.num_outs): + self.convs.append( + ConvModule( + out_channels, + out_channels, + kernel_size=3, + padding=1, + stride=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + inputs = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + # for len(inputs) not equal to self.num_outs + if len(inputs) == 1: + inputs = [inputs[0] for _ in range(self.num_outs)] + outs = [] + for i in range(self.num_outs): + x_resize = resize( + inputs[i], scale_factor=self.scales[i], mode='bilinear') + outs.append(self.convs[i](x_resize)) + return tuple(outs) diff --git a/mmseg/models/segmentors/__init__.py b/mmseg/models/segmentors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..59b012f41725d26d099b8f890630d1dc04019ba5 --- /dev/null +++ b/mmseg/models/segmentors/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import BaseSegmentor +from .cascade_encoder_decoder import CascadeEncoderDecoder +from .depth_estimator import DepthEstimator +from .encoder_decoder import EncoderDecoder +from .multimodal_encoder_decoder import MultimodalEncoderDecoder +from .seg_tta import SegTTAModel + +__all__ = [ + 'BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder', 'SegTTAModel', + 'MultimodalEncoderDecoder', 'DepthEstimator' +] diff --git a/mmseg/models/segmentors/__pycache__/__init__.cpython-39.pyc b/mmseg/models/segmentors/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80f8c877235a622b2fc96809491e277e4e7eed7e Binary files /dev/null and b/mmseg/models/segmentors/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/models/segmentors/__pycache__/base.cpython-39.pyc b/mmseg/models/segmentors/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e38c0562f3d28aaebaf4b258d4d51823bd59dbf Binary files /dev/null and b/mmseg/models/segmentors/__pycache__/base.cpython-39.pyc differ diff --git a/mmseg/models/segmentors/__pycache__/cascade_encoder_decoder.cpython-39.pyc b/mmseg/models/segmentors/__pycache__/cascade_encoder_decoder.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b1a4d8404e4bc2d08241d83a937240d6c94012d2 Binary files /dev/null and b/mmseg/models/segmentors/__pycache__/cascade_encoder_decoder.cpython-39.pyc differ diff --git a/mmseg/models/segmentors/__pycache__/depth_estimator.cpython-39.pyc b/mmseg/models/segmentors/__pycache__/depth_estimator.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dfeaa0698f0a9975755e5839e411dd15e07b0fd2 Binary files /dev/null and b/mmseg/models/segmentors/__pycache__/depth_estimator.cpython-39.pyc differ diff --git a/mmseg/models/segmentors/__pycache__/encoder_decoder.cpython-39.pyc b/mmseg/models/segmentors/__pycache__/encoder_decoder.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a7dd58363f545551038bc76d9366b9c1f6bb5ed Binary files /dev/null and b/mmseg/models/segmentors/__pycache__/encoder_decoder.cpython-39.pyc differ diff --git a/mmseg/models/segmentors/__pycache__/multimodal_encoder_decoder.cpython-39.pyc b/mmseg/models/segmentors/__pycache__/multimodal_encoder_decoder.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb4d62e05f35a8ce20c545988b42df8afb4d673e Binary files /dev/null and b/mmseg/models/segmentors/__pycache__/multimodal_encoder_decoder.cpython-39.pyc differ diff --git a/mmseg/models/segmentors/__pycache__/seg_tta.cpython-39.pyc b/mmseg/models/segmentors/__pycache__/seg_tta.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac7b73570d09bfa03894b798ec510fa555837afe Binary files /dev/null and b/mmseg/models/segmentors/__pycache__/seg_tta.cpython-39.pyc differ diff --git a/mmseg/models/segmentors/base.py b/mmseg/models/segmentors/base.py new file mode 100644 index 0000000000000000000000000000000000000000..17a0bb2b33e57684bccaaf892af69bcba69dd773 --- /dev/null +++ b/mmseg/models/segmentors/base.py @@ -0,0 +1,200 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod +from typing import List, Tuple + +from mmengine.model import BaseModel +from mmengine.structures import PixelData +from torch import Tensor + +from mmseg.structures import SegDataSample +from mmseg.utils import (ForwardResults, OptConfigType, OptMultiConfig, + OptSampleList, SampleList) +from ..utils import resize + + +class BaseSegmentor(BaseModel, metaclass=ABCMeta): + """Base class for segmentors. + + Args: + data_preprocessor (dict, optional): Model preprocessing config + for processing the input data. it usually includes + ``to_rgb``, ``pad_size_divisor``, ``pad_val``, + ``mean`` and ``std``. Default to None. + init_cfg (dict, optional): the config to control the + initialization. Default to None. + """ + + def __init__(self, + data_preprocessor: OptConfigType = None, + init_cfg: OptMultiConfig = None): + super().__init__( + data_preprocessor=data_preprocessor, init_cfg=init_cfg) + + @property + def with_neck(self) -> bool: + """bool: whether the segmentor has neck""" + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_auxiliary_head(self) -> bool: + """bool: whether the segmentor has auxiliary head""" + return hasattr(self, + 'auxiliary_head') and self.auxiliary_head is not None + + @property + def with_decode_head(self) -> bool: + """bool: whether the segmentor has decode head""" + return hasattr(self, 'decode_head') and self.decode_head is not None + + @abstractmethod + def extract_feat(self, inputs: Tensor) -> bool: + """Placeholder for extract features from images.""" + pass + + @abstractmethod + def encode_decode(self, inputs: Tensor, batch_data_samples: SampleList): + """Placeholder for encode images with backbone and decode into a + semantic segmentation map of the same size as input.""" + pass + + def forward(self, + inputs: Tensor, + data_samples: OptSampleList = None, + mode: str = 'tensor') -> ForwardResults: + """The unified entry for a forward process in both training and test. + + The method should accept three modes: "tensor", "predict" and "loss": + + - "tensor": Forward the whole network and return tensor or tuple of + tensor without any post-processing, same as a common nn.Module. + - "predict": Forward and return the predictions, which are fully + processed to a list of :obj:`SegDataSample`. + - "loss": Forward and return a dict of losses according to the given + inputs and data samples. + + Note that this method doesn't handle neither back propagation nor + optimizer updating, which are done in the :meth:`train_step`. + + Args: + inputs (torch.Tensor): The input tensor with shape (N, C, ...) in + general. + data_samples (list[:obj:`SegDataSample`]): The seg data samples. + It usually includes information such as `metainfo` and + `gt_sem_seg`. Default to None. + mode (str): Return what kind of value. Defaults to 'tensor'. + + Returns: + The return type depends on ``mode``. + + - If ``mode="tensor"``, return a tensor or a tuple of tensor. + - If ``mode="predict"``, return a list of :obj:`DetDataSample`. + - If ``mode="loss"``, return a dict of tensor. + """ + if mode == 'loss': + return self.loss(inputs, data_samples) + elif mode == 'predict': + return self.predict(inputs, data_samples) + elif mode == 'tensor': + return self._forward(inputs, data_samples) + else: + raise RuntimeError(f'Invalid mode "{mode}". ' + 'Only supports loss, predict and tensor mode') + + @abstractmethod + def loss(self, inputs: Tensor, data_samples: SampleList) -> dict: + """Calculate losses from a batch of inputs and data samples.""" + pass + + @abstractmethod + def predict(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> SampleList: + """Predict results from a batch of inputs and data samples with post- + processing.""" + pass + + @abstractmethod + def _forward(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> Tuple[List[Tensor]]: + """Network forward process. + + Usually includes backbone, neck and head forward without any post- + processing. + """ + pass + + def postprocess_result(self, + seg_logits: Tensor, + data_samples: OptSampleList = None) -> SampleList: + """ Convert results list to `SegDataSample`. + Args: + seg_logits (Tensor): The segmentation results, seg_logits from + model of each input image. + data_samples (list[:obj:`SegDataSample`]): The seg data samples. + It usually includes information such as `metainfo` and + `gt_sem_seg`. Default to None. + Returns: + list[:obj:`SegDataSample`]: Segmentation results of the + input images. Each SegDataSample usually contain: + + - ``pred_sem_seg``(PixelData): Prediction of semantic segmentation. + - ``seg_logits``(PixelData): Predicted logits of semantic + segmentation before normalization. + """ + batch_size, C, H, W = seg_logits.shape + + if data_samples is None: + data_samples = [SegDataSample() for _ in range(batch_size)] + only_prediction = True + else: + only_prediction = False + + for i in range(batch_size): + if not only_prediction: + img_meta = data_samples[i].metainfo + # remove padding area + if 'img_padding_size' not in img_meta: + padding_size = img_meta.get('padding_size', [0] * 4) + else: + padding_size = img_meta['img_padding_size'] + padding_left, padding_right, padding_top, padding_bottom =\ + padding_size + # i_seg_logits shape is 1, C, H, W after remove padding + i_seg_logits = seg_logits[i:i + 1, :, + padding_top:H - padding_bottom, + padding_left:W - padding_right] + + flip = img_meta.get('flip', None) + if flip: + flip_direction = img_meta.get('flip_direction', None) + assert flip_direction in ['horizontal', 'vertical'] + if flip_direction == 'horizontal': + i_seg_logits = i_seg_logits.flip(dims=(3, )) + else: + i_seg_logits = i_seg_logits.flip(dims=(2, )) + + # resize as original shape + i_seg_logits = resize( + i_seg_logits, + size=img_meta['ori_shape'], + mode='bilinear', + align_corners=self.align_corners, + warning=False).squeeze(0) + else: + i_seg_logits = seg_logits[i] + + if C > 1: + i_seg_pred = i_seg_logits.argmax(dim=0, keepdim=True) + else: + i_seg_logits = i_seg_logits.sigmoid() + i_seg_pred = (i_seg_logits > + self.decode_head.threshold).to(i_seg_logits) + data_samples[i].set_data({ + 'seg_logits': + PixelData(**{'data': i_seg_logits}), + 'pred_sem_seg': + PixelData(**{'data': i_seg_pred}) + }) + + return data_samples diff --git a/mmseg/models/segmentors/cascade_encoder_decoder.py b/mmseg/models/segmentors/cascade_encoder_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..0184a3533a18cbe96a28bbb645c3e73bbffcdeee --- /dev/null +++ b/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -0,0 +1,138 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Optional + +from torch import Tensor, nn + +from mmseg.registry import MODELS +from mmseg.utils import (ConfigType, OptConfigType, OptMultiConfig, + OptSampleList, SampleList, add_prefix) +from .encoder_decoder import EncoderDecoder + + +@MODELS.register_module() +class CascadeEncoderDecoder(EncoderDecoder): + """Cascade Encoder Decoder segmentors. + + CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of + CascadeEncoderDecoder are cascaded. The output of previous decoder_head + will be the input of next decoder_head. + + Args: + + num_stages (int): How many stages will be cascaded. + backbone (ConfigType): The config for the backnone of segmentor. + decode_head (ConfigType): The config for the decode head of segmentor. + neck (OptConfigType): The config for the neck of segmentor. + Defaults to None. + auxiliary_head (OptConfigType): The config for the auxiliary head of + segmentor. Defaults to None. + train_cfg (OptConfigType): The config for training. Defaults to None. + test_cfg (OptConfigType): The config for testing. Defaults to None. + data_preprocessor (dict, optional): The pre-process config of + :class:`BaseDataPreprocessor`. + pretrained (str, optional): The path for pretrained model. + Defaults to None. + init_cfg (dict, optional): The weight initialized config for + :class:`BaseModule`. + """ + + def __init__(self, + num_stages: int, + backbone: ConfigType, + decode_head: ConfigType, + neck: OptConfigType = None, + auxiliary_head: OptConfigType = None, + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + data_preprocessor: OptConfigType = None, + pretrained: Optional[str] = None, + init_cfg: OptMultiConfig = None): + self.num_stages = num_stages + super().__init__( + backbone=backbone, + decode_head=decode_head, + neck=neck, + auxiliary_head=auxiliary_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + data_preprocessor=data_preprocessor, + pretrained=pretrained, + init_cfg=init_cfg) + + def _init_decode_head(self, decode_head: ConfigType) -> None: + """Initialize ``decode_head``""" + assert isinstance(decode_head, list) + assert len(decode_head) == self.num_stages + self.decode_head = nn.ModuleList() + for i in range(self.num_stages): + self.decode_head.append(MODELS.build(decode_head[i])) + self.align_corners = self.decode_head[-1].align_corners + self.num_classes = self.decode_head[-1].num_classes + self.out_channels = self.decode_head[-1].out_channels + + def encode_decode(self, inputs: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(inputs) + out = self.decode_head[0].forward(x) + for i in range(1, self.num_stages - 1): + out = self.decode_head[i].forward(x, out) + seg_logits_list = self.decode_head[-1].predict(x, out, batch_img_metas, + self.test_cfg) + + return seg_logits_list + + def _decode_head_forward_train(self, inputs: Tensor, + data_samples: SampleList) -> dict: + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + + loss_decode = self.decode_head[0].loss(inputs, data_samples, + self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode_0')) + # get batch_img_metas + batch_size = len(data_samples) + batch_img_metas = [] + for batch_index in range(batch_size): + metainfo = data_samples[batch_index].metainfo + batch_img_metas.append(metainfo) + + for i in range(1, self.num_stages): + # forward test again, maybe unnecessary for most methods. + if i == 1: + prev_outputs = self.decode_head[0].forward(inputs) + else: + prev_outputs = self.decode_head[i - 1].forward( + inputs, prev_outputs) + loss_decode = self.decode_head[i].loss(inputs, prev_outputs, + data_samples, + self.train_cfg) + losses.update(add_prefix(loss_decode, f'decode_{i}')) + + return losses + + def _forward(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> Tensor: + """Network forward process. + + Args: + inputs (Tensor): Inputs with shape (N, C, H, W). + data_samples (List[:obj:`SegDataSample`]): The seg data samples. + It usually includes information such as `metainfo` and + `gt_semantic_seg`. + + Returns: + Tensor: Forward output of model without any post-processes. + """ + x = self.extract_feat(inputs) + + out = self.decode_head[0].forward(x) + for i in range(1, self.num_stages): + # TODO support PointRend tensor mode + out = self.decode_head[i].forward(x, out) + + return out diff --git a/mmseg/models/segmentors/depth_estimator.py b/mmseg/models/segmentors/depth_estimator.py new file mode 100644 index 0000000000000000000000000000000000000000..1020637e737a3c72ba6a48f2d1228717470ba862 --- /dev/null +++ b/mmseg/models/segmentors/depth_estimator.py @@ -0,0 +1,392 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging +from typing import List, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmengine.logging import print_log +from mmengine.structures import PixelData +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.structures import SegDataSample +from mmseg.utils import (ConfigType, OptConfigType, OptMultiConfig, + OptSampleList, SampleList, add_prefix) +from ..utils import resize +from .encoder_decoder import EncoderDecoder + + +@MODELS.register_module() +class DepthEstimator(EncoderDecoder): + """Encoder Decoder depth estimator. + + EncoderDecoder typically consists of backbone, decode_head, auxiliary_head. + Note that auxiliary_head is only used for deep supervision during training, + which could be dumped during inference. + + 1. The ``loss`` method is used to calculate the loss of model, + which includes two steps: (1) Extracts features to obtain the feature maps + (2) Call the decode head loss function to forward decode head model and + calculate losses. + + .. code:: text + + loss(): extract_feat() -> _decode_head_forward_train() -> _auxiliary_head_forward_train (optional) + _decode_head_forward_train(): decode_head.loss() + _auxiliary_head_forward_train(): auxiliary_head.loss (optional) + + 2. The ``predict`` method is used to predict depth estimation results, + which includes two steps: (1) Run inference function to obtain the list of + depth (2) Call post-processing function to obtain list of + ``SegDataSample`` including ``pred_depth_map``. + + .. code:: text + + predict(): inference() -> postprocess_result() + inference(): whole_inference()/slide_inference() + whole_inference()/slide_inference(): encoder_decoder() + encoder_decoder(): extract_feat() -> decode_head.predict() + + 3. The ``_forward`` method is used to output the tensor by running the model, + which includes two steps: (1) Extracts features to obtain the feature maps + (2)Call the decode head forward function to forward decode head model. + + .. code:: text + + _forward(): extract_feat() -> _decode_head.forward() + + Args: + + backbone (ConfigType): The config for the backnone of depth estimator. + decode_head (ConfigType): The config for the decode head of depth estimator. + neck (OptConfigType): The config for the neck of depth estimator. + Defaults to None. + auxiliary_head (OptConfigType): The config for the auxiliary head of + depth estimator. Defaults to None. + train_cfg (OptConfigType): The config for training. Defaults to None. + test_cfg (OptConfigType): The config for testing. Defaults to None. + data_preprocessor (dict, optional): The pre-process config of + :class:`BaseDataPreprocessor`. + pretrained (str, optional): The path for pretrained model. + Defaults to None. + init_cfg (dict, optional): The weight initialized config for + :class:`BaseModule`. + """ # noqa: E501 + + def __init__(self, + backbone: ConfigType, + decode_head: ConfigType, + neck: OptConfigType = None, + auxiliary_head: OptConfigType = None, + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + data_preprocessor: OptConfigType = None, + pretrained: Optional[str] = None, + init_cfg: OptMultiConfig = None): + super().__init__( + backbone=backbone, + decode_head=decode_head, + neck=neck, + auxiliary_head=auxiliary_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + data_preprocessor=data_preprocessor, + pretrained=pretrained, + init_cfg=init_cfg) + + def extract_feat(self, + inputs: Tensor, + batch_img_metas: Optional[List[dict]] = None) -> Tensor: + """Extract features from images.""" + + if getattr(self.backbone, 'class_embed_select', False) and \ + isinstance(batch_img_metas, list) and \ + 'category_id' in batch_img_metas[0]: + cat_ids = [meta['category_id'] for meta in batch_img_metas] + cat_ids = torch.tensor(cat_ids).to(inputs.device) + inputs = (inputs, cat_ids) + + x = self.backbone(inputs) + if self.with_neck: + x = self.neck(x) + return x + + def encode_decode(self, inputs: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Encode images with backbone and decode into a depth map of the same + size as input.""" + x = self.extract_feat(inputs, batch_img_metas) + depth = self.decode_head.predict(x, batch_img_metas, self.test_cfg) + + return depth + + def _decode_head_forward_train(self, inputs: List[Tensor], + data_samples: SampleList) -> dict: + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + loss_decode = self.decode_head.loss(inputs, data_samples, + self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode')) + return losses + + def _auxiliary_head_forward_train(self, inputs: List[Tensor], + data_samples: SampleList) -> dict: + """Run forward function and calculate loss for auxiliary head in + training.""" + losses = dict() + if isinstance(self.auxiliary_head, nn.ModuleList): + for idx, aux_head in enumerate(self.auxiliary_head): + loss_aux = aux_head.loss(inputs, data_samples, self.train_cfg) + losses.update(add_prefix(loss_aux, f'aux_{idx}')) + else: + loss_aux = self.auxiliary_head.loss(inputs, data_samples, + self.train_cfg) + losses.update(add_prefix(loss_aux, 'aux')) + + return losses + + def loss(self, inputs: Tensor, data_samples: SampleList) -> dict: + """Calculate losses from a batch of inputs and data samples. + + Args: + inputs (Tensor): Input images. + data_samples (list[:obj:`SegDataSample`]): The seg data samples. + It usually includes information such as `metainfo` and + `gt_depth_map`. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + if data_samples is not None: + batch_img_metas = [ + data_sample.metainfo for data_sample in data_samples + ] + else: + batch_img_metas = [ + dict( + ori_shape=inputs.shape[2:], + img_shape=inputs.shape[2:], + pad_shape=inputs.shape[2:], + padding_size=[0, 0, 0, 0]) + ] * inputs.shape[0] + + x = self.extract_feat(inputs, batch_img_metas) + + losses = dict() + + loss_decode = self._decode_head_forward_train(x, data_samples) + losses.update(loss_decode) + + if self.with_auxiliary_head: + loss_aux = self._auxiliary_head_forward_train(x, data_samples) + losses.update(loss_aux) + + return losses + + def predict(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> SampleList: + """Predict results from a batch of inputs and data samples with post- + processing. + + Args: + inputs (Tensor): Inputs with shape (N, C, H, W). + data_samples (List[:obj:`SegDataSample`], optional): The seg data + samples. It usually includes information such as `metainfo` + and `gt_depth_map`. + + Returns: + list[:obj:`SegDataSample`]: Depth estimation results of the + input images. Each SegDataSample usually contain: + + - ``pred_depth_max``(PixelData): Prediction of depth estimation. + """ + if data_samples is not None: + batch_img_metas = [ + data_sample.metainfo for data_sample in data_samples + ] + else: + batch_img_metas = [ + dict( + ori_shape=inputs.shape[2:], + img_shape=inputs.shape[2:], + pad_shape=inputs.shape[2:], + padding_size=[0, 0, 0, 0]) + ] * inputs.shape[0] + + depth = self.inference(inputs, batch_img_metas) + + return self.postprocess_result(depth, data_samples) + + def _forward(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> Tensor: + """Network forward process. + + Args: + inputs (Tensor): Inputs with shape (N, C, H, W). + data_samples (List[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `metainfo` and `gt_depth_map`. + + Returns: + Tensor: Forward output of model without any post-processes. + """ + x = self.extract_feat(inputs) + return self.decode_head.forward(x) + + def slide_flip_inference(self, inputs: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Inference by sliding-window with overlap and flip. + + If h_crop > h_img or w_crop > w_img, the small patch will be used to + decode without padding. + + Args: + inputs (tensor): the tensor should have a shape NxCxHxW, + which contains all images in the batch. + batch_img_metas (List[dict]): List of image metainfo where each may + also contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', and 'pad_shape'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + + Returns: + Tensor: The depth estimation results. + """ + + h_stride, w_stride = self.test_cfg.stride + h_crop, w_crop = self.test_cfg.crop_size + batch_size, _, h_img, w_img = inputs.size() + out_channels = self.out_channels + h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 + w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 + preds = inputs.new_zeros((batch_size, out_channels, h_img, w_img)) + count_mat = inputs.new_zeros((batch_size, 1, h_img, w_img)) + for h_idx in range(h_grids): + for w_idx in range(w_grids): + y1 = h_idx * h_stride + x1 = w_idx * w_stride + y2 = min(y1 + h_crop, h_img) + x2 = min(x1 + w_crop, w_img) + y1 = max(y2 - h_crop, 0) + x1 = max(x2 - w_crop, 0) + crop_img = inputs[:, :, y1:y2, x1:x2] + # change the image shape to patch shape + batch_img_metas[0]['img_shape'] = crop_img.shape[2:] + # the output of encode_decode is depth tensor map + # with shape [N, C, H, W] + crop_depth_map = self.encode_decode(crop_img, batch_img_metas) + + # average out the original and flipped prediction + crop_depth_map_flip = self.encode_decode( + crop_img.flip(dims=(3, )), batch_img_metas) + crop_depth_map_flip = crop_depth_map_flip.flip(dims=(3, )) + crop_depth_map = (crop_depth_map + crop_depth_map_flip) / 2.0 + + preds += F.pad(crop_depth_map, + (int(x1), int(preds.shape[3] - x2), int(y1), + int(preds.shape[2] - y2))) + + count_mat[:, :, y1:y2, x1:x2] += 1 + assert (count_mat == 0).sum() == 0 + depth = preds / count_mat + + return depth + + def inference(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor: + """Inference with slide/whole style. + + Args: + inputs (Tensor): The input image of shape (N, 3, H, W). + batch_img_metas (List[dict]): List of image metainfo where each may + also contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', 'pad_shape', and 'padding_size'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + + Returns: + Tensor: The depth estimation results. + """ + assert self.test_cfg.get('mode', 'whole') in ['slide', 'whole', + 'slide_flip'], \ + f'Only "slide", "slide_flip" or "whole" test mode are ' \ + f'supported, but got {self.test_cfg["mode"]}.' + ori_shape = batch_img_metas[0]['ori_shape'] + if not all(_['ori_shape'] == ori_shape for _ in batch_img_metas): + print_log( + 'Image shapes are different in the batch.', + logger='current', + level=logging.WARN) + if self.test_cfg.mode == 'slide': + depth_map = self.slide_inference(inputs, batch_img_metas) + if self.test_cfg.mode == 'slide_flip': + depth_map = self.slide_flip_inference(inputs, batch_img_metas) + else: + depth_map = self.whole_inference(inputs, batch_img_metas) + + return depth_map + + def postprocess_result(self, + depth: Tensor, + data_samples: OptSampleList = None) -> SampleList: + """ Convert results list to `SegDataSample`. + Args: + depth (Tensor): The depth estimation results. + data_samples (list[:obj:`SegDataSample`]): The seg data samples. + It usually includes information such as `metainfo` and + `gt_depth_map`. Default to None. + Returns: + list[:obj:`SegDataSample`]: Depth estomation results of the + input images. Each SegDataSample usually contain: + + - ``pred_depth_map``(PixelData): Prediction of depth estimation. + """ + batch_size, C, H, W = depth.shape + + if data_samples is None: + data_samples = [SegDataSample() for _ in range(batch_size)] + only_prediction = True + else: + only_prediction = False + + for i in range(batch_size): + if not only_prediction: + img_meta = data_samples[i].metainfo + # remove padding area + if 'img_padding_size' not in img_meta: + padding_size = img_meta.get('padding_size', [0] * 4) + else: + padding_size = img_meta['img_padding_size'] + padding_left, padding_right, padding_top, padding_bottom =\ + padding_size + # i_depth shape is 1, C, H, W after remove padding + i_depth = depth[i:i + 1, :, padding_top:H - padding_bottom, + padding_left:W - padding_right] + + flip = img_meta.get('flip', None) + if flip: + flip_direction = img_meta.get('flip_direction', None) + assert flip_direction in ['horizontal', 'vertical'] + if flip_direction == 'horizontal': + i_depth = i_depth.flip(dims=(3, )) + else: + i_depth = i_depth.flip(dims=(2, )) + + # resize as original shape + i_depth = resize( + i_depth, + size=img_meta['ori_shape'], + mode='bilinear', + align_corners=self.align_corners, + warning=False).squeeze(0) + else: + i_depth = depth[i] + + data_samples[i].set_data( + {'pred_depth_map': PixelData(**{'data': i_depth})}) + + return data_samples diff --git a/mmseg/models/segmentors/encoder_decoder.py b/mmseg/models/segmentors/encoder_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..fa4050e0b736f98c17629a93e2f70be1d7e84fbb --- /dev/null +++ b/mmseg/models/segmentors/encoder_decoder.py @@ -0,0 +1,364 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging +from typing import List, Optional + +import torch.nn as nn +import torch.nn.functional as F +from mmengine.logging import print_log +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.utils import (ConfigType, OptConfigType, OptMultiConfig, + OptSampleList, SampleList, add_prefix) +from .base import BaseSegmentor + + +@MODELS.register_module() +class EncoderDecoder(BaseSegmentor): + """Encoder Decoder segmentors. + + EncoderDecoder typically consists of backbone, decode_head, auxiliary_head. + Note that auxiliary_head is only used for deep supervision during training, + which could be dumped during inference. + + 1. The ``loss`` method is used to calculate the loss of model, + which includes two steps: (1) Extracts features to obtain the feature maps + (2) Call the decode head loss function to forward decode head model and + calculate losses. + + .. code:: text + + loss(): extract_feat() -> _decode_head_forward_train() -> _auxiliary_head_forward_train (optional) + _decode_head_forward_train(): decode_head.loss() + _auxiliary_head_forward_train(): auxiliary_head.loss (optional) + + 2. The ``predict`` method is used to predict segmentation results, + which includes two steps: (1) Run inference function to obtain the list of + seg_logits (2) Call post-processing function to obtain list of + ``SegDataSample`` including ``pred_sem_seg`` and ``seg_logits``. + + .. code:: text + + predict(): inference() -> postprocess_result() + infercen(): whole_inference()/slide_inference() + whole_inference()/slide_inference(): encoder_decoder() + encoder_decoder(): extract_feat() -> decode_head.predict() + + 3. The ``_forward`` method is used to output the tensor by running the model, + which includes two steps: (1) Extracts features to obtain the feature maps + (2)Call the decode head forward function to forward decode head model. + + .. code:: text + + _forward(): extract_feat() -> _decode_head.forward() + + Args: + + backbone (ConfigType): The config for the backnone of segmentor. + decode_head (ConfigType): The config for the decode head of segmentor. + neck (OptConfigType): The config for the neck of segmentor. + Defaults to None. + auxiliary_head (OptConfigType): The config for the auxiliary head of + segmentor. Defaults to None. + train_cfg (OptConfigType): The config for training. Defaults to None. + test_cfg (OptConfigType): The config for testing. Defaults to None. + data_preprocessor (dict, optional): The pre-process config of + :class:`BaseDataPreprocessor`. + pretrained (str, optional): The path for pretrained model. + Defaults to None. + init_cfg (dict, optional): The weight initialized config for + :class:`BaseModule`. + """ # noqa: E501 + + def __init__(self, + backbone: ConfigType, + decode_head: ConfigType, + neck: OptConfigType = None, + auxiliary_head: OptConfigType = None, + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + data_preprocessor: OptConfigType = None, + pretrained: Optional[str] = None, + init_cfg: OptMultiConfig = None): + super().__init__( + data_preprocessor=data_preprocessor, init_cfg=init_cfg) + if pretrained is not None: + assert backbone.get('pretrained') is None, \ + 'both backbone and segmentor set pretrained weight' + backbone.pretrained = pretrained + self.backbone = MODELS.build(backbone) + if neck is not None: + self.neck = MODELS.build(neck) + self._init_decode_head(decode_head) + self._init_auxiliary_head(auxiliary_head) + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + assert self.with_decode_head + + def _init_decode_head(self, decode_head: ConfigType) -> None: + """Initialize ``decode_head``""" + self.decode_head = MODELS.build(decode_head) + self.align_corners = self.decode_head.align_corners + self.num_classes = self.decode_head.num_classes + self.out_channels = self.decode_head.out_channels + + def _init_auxiliary_head(self, auxiliary_head: ConfigType) -> None: + """Initialize ``auxiliary_head``""" + if auxiliary_head is not None: + if isinstance(auxiliary_head, list): + self.auxiliary_head = nn.ModuleList() + for head_cfg in auxiliary_head: + self.auxiliary_head.append(MODELS.build(head_cfg)) + else: + self.auxiliary_head = MODELS.build(auxiliary_head) + + def extract_feat(self, inputs: Tensor) -> List[Tensor]: + """Extract features from images.""" + x = self.backbone(inputs) + if self.with_neck: + x = self.neck(x) + return x + + def encode_decode(self, inputs: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(inputs) + seg_logits = self.decode_head.predict(x, batch_img_metas, + self.test_cfg) + + return seg_logits + + def _decode_head_forward_train(self, inputs: List[Tensor], + data_samples: SampleList) -> dict: + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + loss_decode = self.decode_head.loss(inputs, data_samples, + self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode')) + return losses + + def _auxiliary_head_forward_train(self, inputs: List[Tensor], + data_samples: SampleList) -> dict: + """Run forward function and calculate loss for auxiliary head in + training.""" + losses = dict() + if isinstance(self.auxiliary_head, nn.ModuleList): + for idx, aux_head in enumerate(self.auxiliary_head): + loss_aux = aux_head.loss(inputs, data_samples, self.train_cfg) + losses.update(add_prefix(loss_aux, f'aux_{idx}')) + else: + loss_aux = self.auxiliary_head.loss(inputs, data_samples, + self.train_cfg) + losses.update(add_prefix(loss_aux, 'aux')) + + return losses + + def loss(self, inputs: Tensor, data_samples: SampleList) -> dict: + """Calculate losses from a batch of inputs and data samples. + + Args: + inputs (Tensor): Input images. + data_samples (list[:obj:`SegDataSample`]): The seg data samples. + It usually includes information such as `metainfo` and + `gt_sem_seg`. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + + x = self.extract_feat(inputs) + + losses = dict() + + loss_decode = self._decode_head_forward_train(x, data_samples) + losses.update(loss_decode) + + if self.with_auxiliary_head: + loss_aux = self._auxiliary_head_forward_train(x, data_samples) + losses.update(loss_aux) + + return losses + + def predict(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> SampleList: + """Predict results from a batch of inputs and data samples with post- + processing. + + Args: + inputs (Tensor): Inputs with shape (N, C, H, W). + data_samples (List[:obj:`SegDataSample`], optional): The seg data + samples. It usually includes information such as `metainfo` + and `gt_sem_seg`. + + Returns: + list[:obj:`SegDataSample`]: Segmentation results of the + input images. Each SegDataSample usually contain: + + - ``pred_sem_seg``(PixelData): Prediction of semantic segmentation. + - ``seg_logits``(PixelData): Predicted logits of semantic + segmentation before normalization. + """ + if data_samples is not None: + batch_img_metas = [ + data_sample.metainfo for data_sample in data_samples + ] + else: + batch_img_metas = [ + dict( + ori_shape=inputs.shape[2:], + img_shape=inputs.shape[2:], + pad_shape=inputs.shape[2:], + padding_size=[0, 0, 0, 0]) + ] * inputs.shape[0] + + seg_logits = self.inference(inputs, batch_img_metas) + + return self.postprocess_result(seg_logits, data_samples) + + def _forward(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> Tensor: + """Network forward process. + + Args: + inputs (Tensor): Inputs with shape (N, C, H, W). + data_samples (List[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `metainfo` and `gt_sem_seg`. + + Returns: + Tensor: Forward output of model without any post-processes. + """ + x = self.extract_feat(inputs) + return self.decode_head.forward(x) + + def slide_inference(self, inputs: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Inference by sliding-window with overlap. + + If h_crop > h_img or w_crop > w_img, the small patch will be used to + decode without padding. + + Args: + inputs (tensor): the tensor should have a shape NxCxHxW, + which contains all images in the batch. + batch_img_metas (List[dict]): List of image metainfo where each may + also contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', and 'pad_shape'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + + Returns: + Tensor: The segmentation results, seg_logits from model of each + input image. + """ + + h_stride, w_stride = self.test_cfg.stride + h_crop, w_crop = self.test_cfg.crop_size + batch_size, _, h_img, w_img = inputs.size() + out_channels = self.out_channels + h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 + w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 + preds = inputs.new_zeros((batch_size, out_channels, h_img, w_img)) + count_mat = inputs.new_zeros((batch_size, 1, h_img, w_img)) + for h_idx in range(h_grids): + for w_idx in range(w_grids): + y1 = h_idx * h_stride + x1 = w_idx * w_stride + y2 = min(y1 + h_crop, h_img) + x2 = min(x1 + w_crop, w_img) + y1 = max(y2 - h_crop, 0) + x1 = max(x2 - w_crop, 0) + crop_img = inputs[:, :, y1:y2, x1:x2] + # change the image shape to patch shape + batch_img_metas[0]['img_shape'] = crop_img.shape[2:] + # the output of encode_decode is seg logits tensor map + # with shape [N, C, H, W] + crop_seg_logit = self.encode_decode(crop_img, batch_img_metas) + preds += F.pad(crop_seg_logit, + (int(x1), int(preds.shape[3] - x2), int(y1), + int(preds.shape[2] - y2))) + + count_mat[:, :, y1:y2, x1:x2] += 1 + assert (count_mat == 0).sum() == 0 + seg_logits = preds / count_mat + + return seg_logits + + def whole_inference(self, inputs: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Inference with full image. + + Args: + inputs (Tensor): The tensor should have a shape NxCxHxW, which + contains all images in the batch. + batch_img_metas (List[dict]): List of image metainfo where each may + also contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', and 'pad_shape'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + + Returns: + Tensor: The segmentation results, seg_logits from model of each + input image. + """ + + seg_logits = self.encode_decode(inputs, batch_img_metas) + + return seg_logits + + def inference(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor: + """Inference with slide/whole style. + + Args: + inputs (Tensor): The input image of shape (N, 3, H, W). + batch_img_metas (List[dict]): List of image metainfo where each may + also contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', 'pad_shape', and 'padding_size'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + + Returns: + Tensor: The segmentation results, seg_logits from model of each + input image. + """ + assert self.test_cfg.get('mode', 'whole') in ['slide', 'whole'], \ + f'Only "slide" or "whole" test mode are supported, but got ' \ + f'{self.test_cfg["mode"]}.' + ori_shape = batch_img_metas[0]['ori_shape'] + if not all(_['ori_shape'] == ori_shape for _ in batch_img_metas): + print_log( + 'Image shapes are different in the batch.', + logger='current', + level=logging.WARN) + if self.test_cfg.mode == 'slide': + seg_logit = self.slide_inference(inputs, batch_img_metas) + else: + seg_logit = self.whole_inference(inputs, batch_img_metas) + + return seg_logit + + def aug_test(self, inputs, batch_img_metas, rescale=True): + """Test with augmentations. + + Only rescale=True is supported. + """ + # aug_test rescale all imgs back to ori_shape for now + assert rescale + # to save memory, we get augmented seg logit inplace + seg_logit = self.inference(inputs[0], batch_img_metas[0], rescale) + for i in range(1, len(inputs)): + cur_seg_logit = self.inference(inputs[i], batch_img_metas[i], + rescale) + seg_logit += cur_seg_logit + seg_logit /= len(inputs) + seg_pred = seg_logit.argmax(dim=1) + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred diff --git a/mmseg/models/segmentors/multimodal_encoder_decoder.py b/mmseg/models/segmentors/multimodal_encoder_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..75aa8b9b17688cb5f54da08f9300af82b3339967 --- /dev/null +++ b/mmseg/models/segmentors/multimodal_encoder_decoder.py @@ -0,0 +1,350 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Optional + +import torch.nn.functional as F +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.utils import (ConfigType, OptConfigType, OptMultiConfig, + OptSampleList, SampleList, add_prefix) +from .base import BaseSegmentor + + +@MODELS.register_module() +class MultimodalEncoderDecoder(BaseSegmentor): + """Multimodal Encoder-Decoder segmentors. + + Multimodal segmentation architecture is used for open-vocabulary + semantic segmentation with combining the visual and language + pretrain models. It consists of a image_encoder (backbone) to extract + visual feature, a text encoder to extract text feature, and a decode + head to generate semantic maps. + Note that the deep supervision during training is implemented in decode head. + + 1. The ``loss`` method is used to calculate the loss of model, + which includes two steps: (1) Extracts features to obtain the feature maps + (2) Call the decode head loss function to forward decode head model and + calculate losses. + + .. code:: text + + loss(): extract_feat() -> _decode_head_forward_train() + _decode_head_forward_train(): decode_head.loss() + + 2. The ``predict`` method is used to predict segmentation results, + which includes two steps: (1) Run inference function to obtain the list of + seg_logits (2) Call post-processing function to obtain list of + ``SegDataSampel`` including ``pred_sem_seg`` and ``seg_logits``. + + .. code:: text + + predict(): inference() -> postprocess_result() + inference(): whole_inference()/slide_inference() + whole_inference()/slide_inference(): encoder_decoder() + encoder_decoder(): extract_feat() -> decode_head.predict() + + 3. The ``_forward`` method is used to output the tensor by running the model, + which includes two steps: (1) Extracts features to obtain the feature maps + (2)Call the decode head forward function to forward decode head model. + + .. code:: text + + _forward(): extract_feat() -> _decode_head.forward() + + Args: + + image_encoder (ConfigType): The config for the visual encoder of segmentor. + text_encoder ((ConfigType): The config for the text encoder of segmentor. + decode_head (ConfigType): The config for the decode head of segmentor. + train_cfg (OptConfigType): The config for training. Defaults to None. + test_cfg (OptConfigType): The config for testing. Defaults to None. + data_preprocessor (dict, optional): The pre-process config of + :class:`BaseDataPreprocessor`. + pretrained (str, optional): The path for pretrained model. + Defaults to None. + asymetric_input (bool): whether to use different size of input for image encoder + and decode head. Defaults to False. + encoder_resolution (float): resize scale of input images for image encoder. + Defaults to None. + init_cfg (dict, optional): The weight initialized config for + :class:`BaseModule`. + """ # noqa: E501 + + def __init__(self, + image_encoder: ConfigType, + text_encoder: ConfigType, + decode_head: ConfigType, + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + data_preprocessor: OptConfigType = None, + pretrained: Optional[str] = None, + asymetric_input: bool = True, + encoder_resolution: float = None, + init_cfg: OptMultiConfig = None): + super().__init__( + data_preprocessor=data_preprocessor, init_cfg=init_cfg) + if pretrained is not None: + image_encoder.init_cfg = dict( + type='Pretrained_Part', checkpoint=pretrained) + text_encoder.init_cfg = dict( + type='Pretrained_Part', checkpoint=pretrained) + decode_head.init_cfg = dict( + type='Pretrained_Part', checkpoint=pretrained) + + if asymetric_input: + assert encoder_resolution is not None, \ + 'if asymetric_input set True, ' \ + 'clip_resolution must be a certain value' + self.asymetric_input = asymetric_input + self.encoder_resolution = encoder_resolution + self.image_encoder = MODELS.build(image_encoder) + self.text_encoder = MODELS.build(text_encoder) + self._init_decode_head(decode_head) + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + assert self.with_decode_head + + def _init_decode_head(self, decode_head: ConfigType) -> None: + """Initialize ``decode_head``""" + self.decode_head = MODELS.build(decode_head) + self.align_corners = self.decode_head.align_corners + self.num_classes = self.decode_head.num_classes + self.out_channels = self.decode_head.out_channels + + def extract_feat(self, inputs: Tensor) -> List[Tensor]: + """Extract visual features from images.""" + x = self.image_encoder(inputs) + return x + + def encode_decode(self, inputs: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Encode the name of classes with text_encoder and encode images with + image_encoder. + + Then decode the class embedding and visual feature into a semantic + segmentation map of the same size as input. + """ + classifier_embeds = self.text_encoder() + clip_inputs = inputs + if self.asymetric_input: + clip_inputs = F.interpolate( + inputs, scale_factor=self.encoder_resolution, mode='bilinear') + x = self.image_encoder(clip_inputs) + seg_logits = self.decode_head.predict([inputs, x, classifier_embeds], + batch_img_metas, self.test_cfg) + + return seg_logits + + def _decode_head_forward_train(self, inputs: List[Tensor], + data_samples: SampleList) -> dict: + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + loss_decode = self.decode_head.loss(inputs, data_samples, + self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode')) + return losses + + def loss(self, inputs: Tensor, data_samples: SampleList) -> dict: + """Calculate losses from a batch of inputs and data samples. + + Args: + inputs (Tensor): Input images. + data_samples (list[:obj:`SegDataSample`]): The seg data samples. + It usually includes information such as `metainfo` and + `gt_sem_seg`. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + classifier_embeds = self.text_encoder() + clip_inputs = inputs + if self.asymetric_input: + clip_inputs = F.interpolate( + inputs, scale_factor=self.encoder_resolution, mode='bilinear') + x = self.image_encoder(clip_inputs) + + losses = dict() + + loss_decode = self._decode_head_forward_train( + [inputs, x, classifier_embeds], data_samples) + losses.update(loss_decode) + + return losses + + def predict(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> SampleList: + """Predict results from a batch of inputs and data samples with post- + processing. + + Args: + inputs (Tensor): Inputs with shape (N, C, H, W). + data_samples (List[:obj:`SegDataSample`], optional): The seg data + samples. It usually includes information such as `metainfo` + and `gt_sem_seg`. + + Returns: + list[:obj:`SegDataSample`]: Segmentation results of the + input images. Each SegDataSample usually contain: + + - ``pred_sem_seg``(PixelData): Prediction of semantic segmentation. + - ``seg_logits``(PixelData): Predicted logits of semantic + segmentation before normalization. + """ + if data_samples is not None: + batch_img_metas = [ + data_sample.metainfo for data_sample in data_samples + ] + else: + batch_img_metas = [ + dict( + ori_shape=inputs.shape[2:], + img_shape=inputs.shape[2:], + pad_shape=inputs.shape[2:], + padding_size=[0, 0, 0, 0]) + ] * inputs.shape[0] + + seg_logits = self.inference(inputs, batch_img_metas) + + return self.postprocess_result(seg_logits, data_samples) + + def _forward(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> Tensor: + """Network forward process. + + Args: + inputs (Tensor): Inputs with shape (N, C, H, W). + data_samples (List[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `metainfo` and `gt_sem_seg`. + + Returns: + Tensor: Forward output of model without any post-processes. + """ + x = self.extract_feat(inputs) + return self.decode_head.forward(x) + + def slide_inference(self, inputs: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Inference by sliding-window with overlap. + + If h_crop > h_img or w_crop > w_img, the small patch will be used to + decode without padding. + + Args: + inputs (tensor): the tensor should have a shape NxCxHxW, + which contains all images in the batch. + batch_img_metas (List[dict]): List of image metainfo where each may + also contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', and 'pad_shape'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + + Returns: + Tensor: The segmentation results, seg_logits from model of each + input image. + """ + + h_stride, w_stride = self.test_cfg.stride + h_crop, w_crop = self.test_cfg.crop_size + batch_size, _, h_img, w_img = inputs.size() + out_channels = self.out_channels + h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 + w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 + preds = inputs.new_zeros((batch_size, out_channels, h_img, w_img)) + count_mat = inputs.new_zeros((batch_size, 1, h_img, w_img)) + for h_idx in range(h_grids): + for w_idx in range(w_grids): + y1 = h_idx * h_stride + x1 = w_idx * w_stride + y2 = min(y1 + h_crop, h_img) + x2 = min(x1 + w_crop, w_img) + y1 = max(y2 - h_crop, 0) + x1 = max(x2 - w_crop, 0) + crop_img = inputs[:, :, y1:y2, x1:x2] + # change the image shape to patch shape + batch_img_metas[0]['img_shape'] = crop_img.shape[2:] + # the output of encode_decode is seg logits tensor map + # with shape [N, C, H, W] + crop_seg_logit = self.encode_decode(crop_img, batch_img_metas) + preds += F.pad(crop_seg_logit, + (int(x1), int(preds.shape[3] - x2), int(y1), + int(preds.shape[2] - y2))) + + count_mat[:, :, y1:y2, x1:x2] += 1 + assert (count_mat == 0).sum() == 0 + seg_logits = preds / count_mat + + return seg_logits + + def whole_inference(self, inputs: Tensor, + batch_img_metas: List[dict]) -> Tensor: + """Inference with full image. + + Args: + inputs (Tensor): The tensor should have a shape NxCxHxW, which + contains all images in the batch. + batch_img_metas (List[dict]): List of image metainfo where each may + also contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', and 'pad_shape'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + + Returns: + Tensor: The segmentation results, seg_logits from model of each + input image. + """ + + seg_logits = self.encode_decode(inputs, batch_img_metas) + + return seg_logits + + def inference(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor: + """Inference with slide/whole style. + + Args: + inputs (Tensor): The input image of shape (N, 3, H, W). + batch_img_metas (List[dict]): List of image metainfo where each may + also contain: 'img_shape', 'scale_factor', 'flip', 'img_path', + 'ori_shape', 'pad_shape', and 'padding_size'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:PackSegInputs`. + + Returns: + Tensor: The segmentation results, seg_logits from model of each + input image. + """ + + assert self.test_cfg.mode in ['slide', 'whole'] + ori_shape = batch_img_metas[0]['ori_shape'] + assert all(_['ori_shape'] == ori_shape for _ in batch_img_metas) + if self.test_cfg.mode == 'slide': + seg_logit = self.slide_inference(inputs, batch_img_metas) + else: + seg_logit = self.whole_inference(inputs, batch_img_metas) + + return seg_logit + + def aug_test(self, inputs, batch_img_metas, rescale=True): + """Test with augmentations. + + Only rescale=True is supported. + """ + # aug_test rescale all imgs back to ori_shape for now + assert rescale + # to save memory, we get augmented seg logit inplace + seg_logit = self.inference(inputs[0], batch_img_metas[0], rescale) + for i in range(1, len(inputs)): + cur_seg_logit = self.inference(inputs[i], batch_img_metas[i], + rescale) + seg_logit += cur_seg_logit + seg_logit /= len(inputs) + seg_pred = seg_logit.argmax(dim=1) + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred diff --git a/mmseg/models/segmentors/seg_tta.py b/mmseg/models/segmentors/seg_tta.py new file mode 100644 index 0000000000000000000000000000000000000000..63ef61d223a572dec4fc3e43e1550b98cd2e7302 --- /dev/null +++ b/mmseg/models/segmentors/seg_tta.py @@ -0,0 +1,47 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List + +import torch +from mmengine.model import BaseTTAModel +from mmengine.structures import PixelData + +from mmseg.registry import MODELS +from mmseg.utils import SampleList + + +@MODELS.register_module() +class SegTTAModel(BaseTTAModel): + + def merge_preds(self, data_samples_list: List[SampleList]) -> SampleList: + """Merge predictions of enhanced data to one prediction. + + Args: + data_samples_list (List[SampleList]): List of predictions + of all enhanced data. + + Returns: + SampleList: Merged prediction. + """ + predictions = [] + for data_samples in data_samples_list: + seg_logits = data_samples[0].seg_logits.data + logits = torch.zeros(seg_logits.shape).to(seg_logits) + for data_sample in data_samples: + seg_logit = data_sample.seg_logits.data + if self.module.out_channels > 1: + logits += seg_logit.softmax(dim=0) + else: + logits += seg_logit.sigmoid() + logits /= len(data_samples) + if self.module.out_channels == 1: + seg_pred = (logits > self.module.decode_head.threshold + ).to(logits).squeeze(1) + else: + seg_pred = logits.argmax(dim=0) + data_sample.set_data({'pred_sem_seg': PixelData(data=seg_pred)}) + if hasattr(data_samples[0], 'gt_sem_seg'): + data_sample.set_data( + {'gt_sem_seg': data_samples[0].gt_sem_seg}) + data_sample.set_metainfo({'img_path': data_samples[0].img_path}) + predictions.append(data_sample) + return predictions diff --git a/mmseg/models/text_encoder/__init__.py b/mmseg/models/text_encoder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..199856d9d79800cbcd9aa7b77223a6528c6b7e0a --- /dev/null +++ b/mmseg/models/text_encoder/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .clip_text_encoder import CLIPTextEncoder + +__all__ = ['CLIPTextEncoder'] diff --git a/mmseg/models/text_encoder/__pycache__/__init__.cpython-39.pyc b/mmseg/models/text_encoder/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ae81f9a758f4a9494bc5fd8b0d43e1fed9e3d1a Binary files /dev/null and b/mmseg/models/text_encoder/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/models/text_encoder/__pycache__/clip_text_encoder.cpython-39.pyc b/mmseg/models/text_encoder/__pycache__/clip_text_encoder.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7bd56ca7d18ecc9e28882782dcf4a9f2401d2f73 Binary files /dev/null and b/mmseg/models/text_encoder/__pycache__/clip_text_encoder.cpython-39.pyc differ diff --git a/mmseg/models/text_encoder/clip_text_encoder.py b/mmseg/models/text_encoder/clip_text_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..1a18b86395ebcf0443e9aab05f4454acada98990 --- /dev/null +++ b/mmseg/models/text_encoder/clip_text_encoder.py @@ -0,0 +1,229 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List + +import numpy as np +import torch +import torch.nn as nn +from mmcv.cnn import build_norm_layer +from mmcv.cnn.bricks.transformer import BaseTransformerLayer +from mmengine.model import BaseModule, ModuleList +from mmengine.runner.checkpoint import CheckpointLoader, load_state_dict +from torch.nn import functional as F + +from mmseg.registry import MODELS +from mmseg.utils import get_classes, get_predefined_templates, tokenizer + + +@MODELS.register_module() +class CLIPTextEncoder(BaseModule): + """A text encoder with transformer architecture to encode the label text. + + Modified from https://github.com/MendelXu/SAN/blob/main/san/model/clip_utils/classifier.py # noqa:E501 + Copyright (c) 2023 MendelXu. + Licensed under the MIT License + + Args: + dataset_name: (str|None): The name of the dataset to which + the data belongs. + vocabulary: (List[str]|None): The list of class names. Default: None. + templates: (List[str]|None): The prompt template used for labels. + Default: None. + total_vocab_size: (int): Number of all words used by the pre-trained + model. Default: 49408 (CLIP). + context_length: (int): The max length of prompt text. + Default: 77 (CLIP). + embed_dims: (int): Width of transformer model. Default: 512. + num_layers: (int): Depth of transformer. Default: 12, + num_heads: (int): Number of attention heads in transformer. + Default: 8, + mlp_ratio: (int) Ratio of mlp hidden dim to embedding dim in + transformer. Default: 4, + output_dims: (int) Dim of output text embeddings. Default: 512, + cache_feature: (bool) Whether to save class embeddings in cache. + Default: True, + cat_bg: (bool) Whether to add background embedding. Default: True. + norm_cfg (dict|None): Config for norm layer. Default: dict(type='LN') + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + dataset_name: str = None, + vocabulary: List[str] = None, + templates: str = 'vild', + total_vocab_size: int = 49408, + context_length: int = 77, + embed_dims: int = 512, + num_layers: int = 12, + num_heads: int = 8, + mlp_ratio: int = 4, + output_dims: int = 512, + cache_feature: bool = True, + cat_bg: bool = True, + norm_cfg: dict = dict(type='LN'), + init_cfg: dict = None): + super().__init__(init_cfg) + if isinstance(templates, List): + self.templates = templates + else: + self.templates = get_predefined_templates(templates) + + assert dataset_name is not None or vocabulary is not None, \ + "text_encoder required either 'dataset_name' or 'vocabulary'" + assert dataset_name is None or vocabulary is None, \ + "there is conflict between 'dataset_name' and 'vocabulary'" + self.dataset_name = dataset_name + self.vocabulary = vocabulary + self.num_pos = context_length + self.token_embedding = nn.Embedding(total_vocab_size, embed_dims) + self.positional_embedding = nn.Parameter( + torch.empty(context_length, embed_dims)) + self.text_projection = nn.Parameter( + torch.empty(embed_dims, output_dims)) + self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) + self.transformer = ModuleList() + self.register_buffer( + 'attn_mask', self.build_attention_mask(), persistent=False) + for i in range(num_layers): + self.transformer.append( + BaseTransformerLayer( + attn_cfgs=dict( + type='MultiheadAttention', + embed_dims=embed_dims, + num_heads=num_heads, + batch_first=False, + bias=True), + ffn_cfgs=dict( + type='FFN', + embed_dims=embed_dims, + feedforward_channels=mlp_ratio * embed_dims, + act_cfg=dict(type='QuickGELU')), + operation_order=('norm', 'self_attn', 'norm', 'ffn'))) + self.ln_final = build_norm_layer( + norm_cfg, embed_dims, postfix='_final')[1] + + self.cache_feature = cache_feature + if self.cache_feature: + self.cache = {} + + self._freeze() + + self.cat_bg = cat_bg + if self.cat_bg: + self.bg_embed = nn.Parameter( + torch.randn(1, self.text_projection.shape[1])) + + @property + def ln_final(self): + return getattr(self, self.final_name) + + def build_attention_mask(self): + """lazily create causal attention mask, with full attention between the + tokens. + + pytorch uses additive attention mask; fill with -inf + """ + mask = torch.empty(self.num_pos, self.num_pos) + mask.fill_(float('-inf')) + mask.triu_(1) # zero out the lower diagonal + return mask + + def _freeze(self): + for param in self.parameters(): + param.requires_grad = False + + def init_weights(self): + if self.cat_bg: + nn.init.normal_( + self.bg_embed, + std=self.bg_embed.shape[1]**-0.5, + ) + if isinstance(self.init_cfg, dict) and \ + self.init_cfg.get('type') == 'Pretrained_Part': + checkpoint = CheckpointLoader.load_checkpoint( + self.init_cfg['checkpoint'], logger=None, map_location='cpu') + + state_dict = checkpoint.copy() + para_prefix = 'text_encoder' + prefix_len = len(para_prefix) + 1 + for k, v in checkpoint.items(): + state_dict.pop(k) + if para_prefix in k: + state_dict[k[prefix_len:]] = v + + load_state_dict(self, state_dict, strict=False, logger=None) + + else: + super().init_weights() + + @torch.no_grad() + def encode_text(self, text, normalize=False): + """encode class token.""" + + embed_device = self.token_embedding.weight.device + x = self.token_embedding( + text.to(embed_device)) # [batch_size, n_ctx, d_model] + x = x + self.positional_embedding + x = x.permute(1, 0, 2) # NLD -> LND + for block in self.transformer: + x = block(query=x, attn_masks=self.attn_mask) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x) # [batch_size, n_ctx, transformer.width] + # take features from the eot embedding + # (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), + text.argmax(dim=-1)] @ self.text_projection + return F.normalize(x, dim=-1) if normalize else x + + def template_encode(self, vocabulary): + """Prompt engineering.""" + text_embed_bucket = [] + for template in self.templates: + text_inputs = tokenizer.tokenize( + [template.format(noun) for noun in vocabulary]) + text_embed = self.encode_text(text_inputs, normalize=True) + text_embed_bucket.append(text_embed) + text_embed = torch.stack(text_embed_bucket).mean(dim=0) + text_embed = text_embed / text_embed.norm(dim=-1, keepdim=True) + return text_embed + + def forward(self): + """Forward function.""" + if self.dataset_name is None: # encoding vocabulary directly + class_names = self.vocabulary + if self.cache_feature: + new_classes = [ + word for word in class_names if word not in self.cache + ] + if len(new_classes) > 0: + class_embeds = self.template_encode(new_classes) + self.cache.update(dict(zip(new_classes, class_embeds))) + class_embeds = torch.stack( + [self.cache[word] for word in class_names]) + else: + class_embeds = self.template_encode(class_names) + + else: # encoding the classes of the dataset + class_names = get_classes(self.dataset_name) + if class_names[0] == 'background': + class_names = class_names[1:] + if self.cache_feature: + if self.dataset_name not in self.cache: + class_embeds = self.template_encode(class_names) + self.cache[self.dataset_name] = class_embeds + else: + class_embeds = self.cache[self.dataset_name] + else: + class_embeds = self.template_encode(class_names) + + if self.cat_bg: + class_embeds = torch.cat([class_embeds, self.bg_embed]) + class_embeds = F.normalize(class_embeds, p=2, dim=-1) + return self.logit_scale.exp() * class_embeds + + +@MODELS.register_module() +class QuickGELU(nn.Module): + # From https://github.com/openai/CLIP/blob/main/clip/model.py + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) diff --git a/mmseg/models/utils/__init__.py b/mmseg/models/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c0751b17c02de14e9bf1bfc02230d507a143e9c0 --- /dev/null +++ b/mmseg/models/utils/__init__.py @@ -0,0 +1,27 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .basic_block import BasicBlock, Bottleneck +from .embed import PatchEmbed +from .encoding import Encoding +from .inverted_residual import InvertedResidual, InvertedResidualV3 +from .make_divisible import make_divisible +from .point_sample import get_uncertain_point_coords_with_randomness +from .ppm import DAPPM, PAPPM +from .res_layer import ResLayer +from .se_layer import SELayer +from .self_attention_block import SelfAttentionBlock +from .shape_convert import (nchw2nlc2nchw, nchw_to_nlc, nlc2nchw2nlc, + nlc_to_nchw) +from .up_conv_block import UpConvBlock + +# isort: off +from .wrappers import Upsample, resize +from .san_layers import MLP, LayerNorm2d, cross_attn_layer + +__all__ = [ + 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', + 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed', + 'nchw_to_nlc', 'nlc_to_nchw', 'nchw2nlc2nchw', 'nlc2nchw2nlc', 'Encoding', + 'Upsample', 'resize', 'DAPPM', 'PAPPM', 'BasicBlock', 'Bottleneck', + 'cross_attn_layer', 'LayerNorm2d', 'MLP', + 'get_uncertain_point_coords_with_randomness' +] diff --git a/mmseg/models/utils/__pycache__/__init__.cpython-39.pyc b/mmseg/models/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e14db6ee488e85c7f36798229c6e4af51be0ec7a Binary files /dev/null and b/mmseg/models/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/basic_block.cpython-39.pyc b/mmseg/models/utils/__pycache__/basic_block.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40cb8b5613d4d4d719d8abe6596d9230e8dac2ff Binary files /dev/null and b/mmseg/models/utils/__pycache__/basic_block.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/embed.cpython-39.pyc b/mmseg/models/utils/__pycache__/embed.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..122b66b3a39542c36dec9e19ab8fcb76cfeb2413 Binary files /dev/null and b/mmseg/models/utils/__pycache__/embed.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/encoding.cpython-39.pyc b/mmseg/models/utils/__pycache__/encoding.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5efc6fa73a71765cdbc7aa898450e0107e6b26e9 Binary files /dev/null and b/mmseg/models/utils/__pycache__/encoding.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/inverted_residual.cpython-39.pyc b/mmseg/models/utils/__pycache__/inverted_residual.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb72ad9814cccf5703a91045d6d536f2dcaba19e Binary files /dev/null and b/mmseg/models/utils/__pycache__/inverted_residual.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/make_divisible.cpython-39.pyc b/mmseg/models/utils/__pycache__/make_divisible.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30dd889eef33b19d70f99dcb1fde73bc2c9357df Binary files /dev/null and b/mmseg/models/utils/__pycache__/make_divisible.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/point_sample.cpython-39.pyc b/mmseg/models/utils/__pycache__/point_sample.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..778b76180271f581f53280ee372473245030fdce Binary files /dev/null and b/mmseg/models/utils/__pycache__/point_sample.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/ppm.cpython-39.pyc b/mmseg/models/utils/__pycache__/ppm.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..775e8912fae18e0511d93a0eb0672ad5d8211013 Binary files /dev/null and b/mmseg/models/utils/__pycache__/ppm.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/res_layer.cpython-39.pyc b/mmseg/models/utils/__pycache__/res_layer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c7c191773d935c74c55afab990265304e95229e Binary files /dev/null and b/mmseg/models/utils/__pycache__/res_layer.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/san_layers.cpython-39.pyc b/mmseg/models/utils/__pycache__/san_layers.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32b8c9d3089261b660a6518a10d0fa525554968b Binary files /dev/null and b/mmseg/models/utils/__pycache__/san_layers.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/se_layer.cpython-39.pyc b/mmseg/models/utils/__pycache__/se_layer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e10d017a8039ecc66336f1a127a82a80a9728f5a Binary files /dev/null and b/mmseg/models/utils/__pycache__/se_layer.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/self_attention_block.cpython-39.pyc b/mmseg/models/utils/__pycache__/self_attention_block.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..59cc1019a78aabe394b7bc9580dca0795109763c Binary files /dev/null and b/mmseg/models/utils/__pycache__/self_attention_block.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/shape_convert.cpython-39.pyc b/mmseg/models/utils/__pycache__/shape_convert.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9f4220c236dd8ce6b1e0259702ce3f064811b96c Binary files /dev/null and b/mmseg/models/utils/__pycache__/shape_convert.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/up_conv_block.cpython-39.pyc b/mmseg/models/utils/__pycache__/up_conv_block.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1c8aef269470249569c55666b336f7a33f998c3 Binary files /dev/null and b/mmseg/models/utils/__pycache__/up_conv_block.cpython-39.pyc differ diff --git a/mmseg/models/utils/__pycache__/wrappers.cpython-39.pyc b/mmseg/models/utils/__pycache__/wrappers.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1bb45499ff7b7b03c5ff01dd8ca2f1968c6f0f07 Binary files /dev/null and b/mmseg/models/utils/__pycache__/wrappers.cpython-39.pyc differ diff --git a/mmseg/models/utils/basic_block.py b/mmseg/models/utils/basic_block.py new file mode 100644 index 0000000000000000000000000000000000000000..4e1ad8146dd200c5f1e543adf22ada654ee196a4 --- /dev/null +++ b/mmseg/models/utils/basic_block.py @@ -0,0 +1,143 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.utils import OptConfigType + + +class BasicBlock(BaseModule): + """Basic block from `ResNet `_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at the + last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 1 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: nn.Module = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv1 = ConvModule( + in_channels, + channels, + kernel_size=3, + stride=stride, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=None) + self.downsample = downsample + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + + def forward(self, x: Tensor) -> Tensor: + residual = x + out = self.conv1(x) + out = self.conv2(out) + + if self.downsample: + residual = self.downsample(x) + + out += residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out + + +class Bottleneck(BaseModule): + """Bottleneck block from `ResNet `_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at + the last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 2 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = None, + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv1 = ConvModule( + in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + 3, + stride, + 1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv3 = ConvModule( + channels, + channels * self.expansion, + 1, + norm_cfg=norm_cfg, + act_cfg=None) + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + self.downsample = downsample + + def forward(self, x: Tensor) -> Tensor: + residual = x + + out = self.conv1(x) + out = self.conv2(out) + out = self.conv3(out) + + if self.downsample: + residual = self.downsample(x) + + out += residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out diff --git a/mmseg/models/utils/embed.py b/mmseg/models/utils/embed.py new file mode 100644 index 0000000000000000000000000000000000000000..aef0a40b0a87bb6616db96fe2c72c19cc6f5b366 --- /dev/null +++ b/mmseg/models/utils/embed.py @@ -0,0 +1,330 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +from typing import Sequence + +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import build_conv_layer, build_norm_layer +from mmengine.model import BaseModule +from mmengine.utils import to_2tuple + + +class AdaptivePadding(nn.Module): + """Applies padding to input (if needed) so that input can get fully covered + by filter you specified. It support two modes "same" and "corner". The + "same" mode is same with "SAME" padding mode in TensorFlow, pad zero around + input. The "corner" mode would pad zero to bottom right. + + Args: + kernel_size (int | tuple): Size of the kernel: + stride (int | tuple): Stride of the filter. Default: 1: + dilation (int | tuple): Spacing between kernel elements. + Default: 1. + padding (str): Support "same" and "corner", "corner" mode + would pad zero to bottom right, and "same" mode would + pad zero around input. Default: "corner". + Example: + >>> kernel_size = 16 + >>> stride = 16 + >>> dilation = 1 + >>> input = torch.rand(1, 1, 15, 17) + >>> adap_pad = AdaptivePadding( + >>> kernel_size=kernel_size, + >>> stride=stride, + >>> dilation=dilation, + >>> padding="corner") + >>> out = adap_pad(input) + >>> assert (out.shape[2], out.shape[3]) == (16, 32) + >>> input = torch.rand(1, 1, 16, 17) + >>> out = adap_pad(input) + >>> assert (out.shape[2], out.shape[3]) == (16, 32) + """ + + def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'): + + super().__init__() + + assert padding in ('same', 'corner') + + kernel_size = to_2tuple(kernel_size) + stride = to_2tuple(stride) + dilation = to_2tuple(dilation) + + self.padding = padding + self.kernel_size = kernel_size + self.stride = stride + self.dilation = dilation + + def get_pad_shape(self, input_shape): + input_h, input_w = input_shape + kernel_h, kernel_w = self.kernel_size + stride_h, stride_w = self.stride + output_h = math.ceil(input_h / stride_h) + output_w = math.ceil(input_w / stride_w) + pad_h = max((output_h - 1) * stride_h + + (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0) + pad_w = max((output_w - 1) * stride_w + + (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0) + return pad_h, pad_w + + def forward(self, x): + pad_h, pad_w = self.get_pad_shape(x.size()[-2:]) + if pad_h > 0 or pad_w > 0: + if self.padding == 'corner': + x = F.pad(x, [0, pad_w, 0, pad_h]) + elif self.padding == 'same': + x = F.pad(x, [ + pad_w // 2, pad_w - pad_w // 2, pad_h // 2, + pad_h - pad_h // 2 + ]) + return x + + +class PatchEmbed(BaseModule): + """Image to Patch Embedding. + + We use a conv layer to implement PatchEmbed. + + Args: + in_channels (int): The num of input channels. Default: 3 + embed_dims (int): The dimensions of embedding. Default: 768 + conv_type (str): The config dict for embedding + conv layer type selection. Default: "Conv2d". + kernel_size (int): The kernel_size of embedding conv. Default: 16. + stride (int, optional): The slide stride of embedding conv. + Default: None (Would be set as `kernel_size`). + padding (int | tuple | string ): The padding length of + embedding conv. When it is a string, it means the mode + of adaptive padding, support "same" and "corner" now. + Default: "corner". + dilation (int): The dilation rate of embedding conv. Default: 1. + bias (bool): Bias of embed conv. Default: True. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. + input_size (int | tuple | None): The size of input, which will be + used to calculate the out size. Only work when `dynamic_size` + is False. Default: None. + init_cfg (`mmengine.ConfigDict`, optional): The Config for + initialization. Default: None. + """ + + def __init__(self, + in_channels=3, + embed_dims=768, + conv_type='Conv2d', + kernel_size=16, + stride=None, + padding='corner', + dilation=1, + bias=True, + norm_cfg=None, + input_size=None, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + self.embed_dims = embed_dims + if stride is None: + stride = kernel_size + + kernel_size = to_2tuple(kernel_size) + stride = to_2tuple(stride) + dilation = to_2tuple(dilation) + + if isinstance(padding, str): + self.adap_padding = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + # disable the padding of conv + padding = 0 + else: + self.adap_padding = None + padding = to_2tuple(padding) + + self.projection = build_conv_layer( + dict(type=conv_type), + in_channels=in_channels, + out_channels=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + if norm_cfg is not None: + self.norm = build_norm_layer(norm_cfg, embed_dims)[1] + else: + self.norm = None + + if input_size: + input_size = to_2tuple(input_size) + # `init_out_size` would be used outside to + # calculate the num_patches + # when `use_abs_pos_embed` outside + self.init_input_size = input_size + if self.adap_padding: + pad_h, pad_w = self.adap_padding.get_pad_shape(input_size) + input_h, input_w = input_size + input_h = input_h + pad_h + input_w = input_w + pad_w + input_size = (input_h, input_w) + + # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html + h_out = (input_size[0] + 2 * padding[0] - dilation[0] * + (kernel_size[0] - 1) - 1) // stride[0] + 1 + w_out = (input_size[1] + 2 * padding[1] - dilation[1] * + (kernel_size[1] - 1) - 1) // stride[1] + 1 + self.init_out_size = (h_out, w_out) + else: + self.init_input_size = None + self.init_out_size = None + + def forward(self, x): + """ + Args: + x (Tensor): Has shape (B, C, H, W). In most case, C is 3. + + Returns: + tuple: Contains merged results and its spatial shape. + + - x (Tensor): Has shape (B, out_h * out_w, embed_dims) + - out_size (tuple[int]): Spatial shape of x, arrange as + (out_h, out_w). + """ + + if self.adap_padding: + x = self.adap_padding(x) + + x = self.projection(x) + out_size = (x.shape[2], x.shape[3]) + x = x.flatten(2).transpose(1, 2) + if self.norm is not None: + x = self.norm(x) + return x, out_size + + +class PatchMerging(BaseModule): + """Merge patch feature map. + + This layer groups feature map by kernel_size, and applies norm and linear + layers to the grouped feature map. Our implementation uses `nn.Unfold` to + merge patch, which is about 25% faster than original implementation. + Instead, we need to modify pretrained models for compatibility. + + Args: + in_channels (int): The num of input channels. + out_channels (int): The num of output channels. + kernel_size (int | tuple, optional): the kernel size in the unfold + layer. Defaults to 2. + stride (int | tuple, optional): the stride of the sliding blocks in the + unfold layer. Default: None. (Would be set as `kernel_size`) + padding (int | tuple | string ): The padding length of + embedding conv. When it is a string, it means the mode + of adaptive padding, support "same" and "corner" now. + Default: "corner". + dilation (int | tuple, optional): dilation parameter in the unfold + layer. Default: 1. + bias (bool, optional): Whether to add bias in linear layer or not. + Defaults: False. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='LN'). + init_cfg (dict, optional): The extra config for initialization. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size=2, + stride=None, + padding='corner', + dilation=1, + bias=False, + norm_cfg=dict(type='LN'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.in_channels = in_channels + self.out_channels = out_channels + if stride: + stride = stride + else: + stride = kernel_size + + kernel_size = to_2tuple(kernel_size) + stride = to_2tuple(stride) + dilation = to_2tuple(dilation) + + if isinstance(padding, str): + self.adap_padding = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + # disable the padding of unfold + padding = 0 + else: + self.adap_padding = None + + padding = to_2tuple(padding) + self.sampler = nn.Unfold( + kernel_size=kernel_size, + dilation=dilation, + padding=padding, + stride=stride) + + sample_dim = kernel_size[0] * kernel_size[1] * in_channels + + if norm_cfg is not None: + self.norm = build_norm_layer(norm_cfg, sample_dim)[1] + else: + self.norm = None + + self.reduction = nn.Linear(sample_dim, out_channels, bias=bias) + + def forward(self, x, input_size): + """ + Args: + x (Tensor): Has shape (B, H*W, C_in). + input_size (tuple[int]): The spatial shape of x, arrange as (H, W). + Default: None. + + Returns: + tuple: Contains merged results and its spatial shape. + + - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out) + - out_size (tuple[int]): Spatial shape of x, arrange as + (Merged_H, Merged_W). + """ + B, L, C = x.shape + assert isinstance(input_size, Sequence), f'Expect ' \ + f'input_size is ' \ + f'`Sequence` ' \ + f'but get {input_size}' + + H, W = input_size + assert L == H * W, 'input feature has wrong size' + + x = x.view(B, H, W, C).permute([0, 3, 1, 2]) # B, C, H, W + # Use nn.Unfold to merge patch. About 25% faster than original method, + # but need to modify pretrained model for compatibility + + if self.adap_padding: + x = self.adap_padding(x) + H, W = x.shape[-2:] + + x = self.sampler(x) + # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2) + + out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] * + (self.sampler.kernel_size[0] - 1) - + 1) // self.sampler.stride[0] + 1 + out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] * + (self.sampler.kernel_size[1] - 1) - + 1) // self.sampler.stride[1] + 1 + + output_size = (out_h, out_w) + x = x.transpose(1, 2) # B, H/2*W/2, 4*C + x = self.norm(x) if self.norm else x + x = self.reduction(x) + return x, output_size diff --git a/mmseg/models/utils/encoding.py b/mmseg/models/utils/encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..ee4f0574fbc1957cf8da591a0e4befd6d8a125d3 --- /dev/null +++ b/mmseg/models/utils/encoding.py @@ -0,0 +1,75 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.nn import functional as F + + +class Encoding(nn.Module): + """Encoding Layer: a learnable residual encoder. + + Input is of shape (batch_size, channels, height, width). + Output is of shape (batch_size, num_codes, channels). + + Args: + channels: dimension of the features or feature channels + num_codes: number of code words + """ + + def __init__(self, channels, num_codes): + super().__init__() + # init codewords and smoothing factor + self.channels, self.num_codes = channels, num_codes + std = 1. / ((num_codes * channels)**0.5) + # [num_codes, channels] + self.codewords = nn.Parameter( + torch.empty(num_codes, channels, + dtype=torch.float).uniform_(-std, std), + requires_grad=True) + # [num_codes] + self.scale = nn.Parameter( + torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), + requires_grad=True) + + @staticmethod + def scaled_l2(x, codewords, scale): + num_codes, channels = codewords.size() + batch_size = x.size(0) + reshaped_scale = scale.view((1, 1, num_codes)) + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + + scaled_l2_norm = reshaped_scale * ( + expanded_x - reshaped_codewords).pow(2).sum(dim=3) + return scaled_l2_norm + + @staticmethod + def aggregate(assignment_weights, x, codewords): + num_codes, channels = codewords.size() + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + batch_size = x.size(0) + + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + encoded_feat = (assignment_weights.unsqueeze(3) * + (expanded_x - reshaped_codewords)).sum(dim=1) + return encoded_feat + + def forward(self, x): + assert x.dim() == 4 and x.size(1) == self.channels + # [batch_size, channels, height, width] + batch_size = x.size(0) + # [batch_size, height x width, channels] + x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() + # assignment_weights: [batch_size, channels, num_codes] + assignment_weights = F.softmax( + self.scaled_l2(x, self.codewords, self.scale), dim=2) + # aggregate + encoded_feat = self.aggregate(assignment_weights, x, self.codewords) + return encoded_feat + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \ + f'x{self.channels})' + return repr_str diff --git a/mmseg/models/utils/inverted_residual.py b/mmseg/models/utils/inverted_residual.py new file mode 100644 index 0000000000000000000000000000000000000000..56190b3bfe7cc8fe98bf34c3812db18dd34a8f02 --- /dev/null +++ b/mmseg/models/utils/inverted_residual.py @@ -0,0 +1,213 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.cnn import ConvModule +from torch import nn +from torch.utils import checkpoint as cp + +from .se_layer import SELayer + + +class InvertedResidual(nn.Module): + """InvertedResidual block for MobileNetV2. + + Args: + in_channels (int): The input channels of the InvertedResidual block. + out_channels (int): The output channels of the InvertedResidual block. + stride (int): Stride of the middle (first) 3x3 convolution. + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + dilation (int): Dilation rate of depthwise conv. Default: 1 + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + stride, + expand_ratio, + dilation=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + with_cp=False, + **kwargs): + super().__init__() + self.stride = stride + assert stride in [1, 2], f'stride must in [1, 2]. ' \ + f'But received {stride}.' + self.with_cp = with_cp + self.use_res_connect = self.stride == 1 and in_channels == out_channels + hidden_dim = int(round(in_channels * expand_ratio)) + + layers = [] + if expand_ratio != 1: + layers.append( + ConvModule( + in_channels=in_channels, + out_channels=hidden_dim, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs)) + layers.extend([ + ConvModule( + in_channels=hidden_dim, + out_channels=hidden_dim, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=hidden_dim, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs), + ConvModule( + in_channels=hidden_dim, + out_channels=out_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None, + **kwargs) + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + + def _inner_forward(x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InvertedResidualV3(nn.Module): + """Inverted Residual Block for MobileNetV3. + + Args: + in_channels (int): The input channels of this Module. + out_channels (int): The output channels of this Module. + mid_channels (int): The input channels of the depthwise convolution. + kernel_size (int): The kernel size of the depthwise convolution. + Default: 3. + stride (int): The stride of the depthwise convolution. Default: 1. + se_cfg (dict): Config dict for se layer. Default: None, which means no + se layer. + with_expand_conv (bool): Use expand conv or not. If set False, + mid_channels must be the same with in_channels. Default: True. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + mid_channels, + kernel_size=3, + stride=1, + se_cfg=None, + with_expand_conv=True, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + with_cp=False): + super().__init__() + self.with_res_shortcut = (stride == 1 and in_channels == out_channels) + assert stride in [1, 2] + self.with_cp = with_cp + self.with_se = se_cfg is not None + self.with_expand_conv = with_expand_conv + + if self.with_se: + assert isinstance(se_cfg, dict) + if not self.with_expand_conv: + assert mid_channels == in_channels + + if self.with_expand_conv: + self.expand_conv = ConvModule( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.depthwise_conv = ConvModule( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + padding=kernel_size // 2, + groups=mid_channels, + conv_cfg=dict( + type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + if self.with_se: + self.se = SELayer(**se_cfg) + + self.linear_conv = ConvModule( + in_channels=mid_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, x): + + def _inner_forward(x): + out = x + + if self.with_expand_conv: + out = self.expand_conv(out) + + out = self.depthwise_conv(out) + + if self.with_se: + out = self.se(out) + + out = self.linear_conv(out) + + if self.with_res_shortcut: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out diff --git a/mmseg/models/utils/make_divisible.py b/mmseg/models/utils/make_divisible.py new file mode 100644 index 0000000000000000000000000000000000000000..ed42c2eeea2a6aed03a0be5516b8d1ef1139e486 --- /dev/null +++ b/mmseg/models/utils/make_divisible.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +def make_divisible(value, divisor, min_value=None, min_ratio=0.9): + """Make divisible function. + + This function rounds the channel number to the nearest value that can be + divisible by the divisor. It is taken from the original tf repo. It ensures + that all layers have a channel number that is divisible by divisor. It can + be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa + + Args: + value (int): The original channel number. + divisor (int): The divisor to fully divide the channel number. + min_value (int): The minimum value of the output channel. + Default: None, means that the minimum value equal to the divisor. + min_ratio (float): The minimum ratio of the rounded channel number to + the original channel number. Default: 0.9. + + Returns: + int: The modified output channel number. + """ + + if min_value is None: + min_value = divisor + new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than (1-min_ratio). + if new_value < min_ratio * value: + new_value += divisor + return new_value diff --git a/mmseg/models/utils/point_sample.py b/mmseg/models/utils/point_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..1afc957f3da7d1dc030c21d40311c768c6952ea4 --- /dev/null +++ b/mmseg/models/utils/point_sample.py @@ -0,0 +1,88 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.ops import point_sample +from torch import Tensor + + +def get_uncertainty(mask_preds: Tensor, labels: Tensor) -> Tensor: + """Estimate uncertainty based on pred logits. + + We estimate uncertainty as L1 distance between 0.0 and the logits + prediction in 'mask_preds' for the foreground class in `classes`. + + Args: + mask_preds (Tensor): mask predication logits, shape (num_rois, + num_classes, mask_height, mask_width). + + labels (Tensor): Either predicted or ground truth label for + each predicted mask, of length num_rois. + + Returns: + scores (Tensor): Uncertainty scores with the most uncertain + locations having the highest uncertainty score, + shape (num_rois, 1, mask_height, mask_width) + """ + if mask_preds.shape[1] == 1: + gt_class_logits = mask_preds.clone() + else: + inds = torch.arange(mask_preds.shape[0], device=mask_preds.device) + gt_class_logits = mask_preds[inds, labels].unsqueeze(1) + return -torch.abs(gt_class_logits) + + +def get_uncertain_point_coords_with_randomness( + mask_preds: Tensor, labels: Tensor, num_points: int, + oversample_ratio: float, importance_sample_ratio: float) -> Tensor: + """Get ``num_points`` most uncertain points with random points during + train. + + Sample points in [0, 1] x [0, 1] coordinate space based on their + uncertainty. The uncertainties are calculated for each point using + 'get_uncertainty()' function that takes point's logit prediction as + input. + + Args: + mask_preds (Tensor): A tensor of shape (num_rois, num_classes, + mask_height, mask_width) for class-specific or class-agnostic + prediction. + labels (Tensor): The ground truth class for each instance. + num_points (int): The number of points to sample. + oversample_ratio (float): Oversampling parameter. + importance_sample_ratio (float): Ratio of points that are sampled + via importnace sampling. + + Returns: + point_coords (Tensor): A tensor of shape (num_rois, num_points, 2) + that contains the coordinates sampled points. + """ + assert oversample_ratio >= 1 + assert 0 <= importance_sample_ratio <= 1 + batch_size = mask_preds.shape[0] + num_sampled = int(num_points * oversample_ratio) + point_coords = torch.rand( + batch_size, num_sampled, 2, device=mask_preds.device) + point_logits = point_sample(mask_preds, point_coords) + # It is crucial to calculate uncertainty based on the sampled + # prediction value for the points. Calculating uncertainties of the + # coarse predictions first and sampling them for points leads to + # incorrect results. To illustrate this: assume uncertainty func( + # logits)=-abs(logits), a sampled point between two coarse + # predictions with -1 and 1 logits has 0 logits, and therefore 0 + # uncertainty value. However, if we calculate uncertainties for the + # coarse predictions first, both will have -1 uncertainty, + # and sampled point will get -1 uncertainty. + point_uncertainties = get_uncertainty(point_logits, labels) + num_uncertain_points = int(importance_sample_ratio * num_points) + num_random_points = num_points - num_uncertain_points + idx = torch.topk( + point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] + shift = num_sampled * torch.arange( + batch_size, dtype=torch.long, device=mask_preds.device) + idx += shift[:, None] + point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( + batch_size, num_uncertain_points, 2) + if num_random_points > 0: + rand_roi_coords = torch.rand( + batch_size, num_random_points, 2, device=mask_preds.device) + point_coords = torch.cat((point_coords, rand_roi_coords), dim=1) + return point_coords diff --git a/mmseg/models/utils/ppm.py b/mmseg/models/utils/ppm.py new file mode 100644 index 0000000000000000000000000000000000000000..5fe6ff26fae6869b989cecde96af3ceff1a37b38 --- /dev/null +++ b/mmseg/models/utils/ppm.py @@ -0,0 +1,193 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule, ModuleList, Sequential +from torch import Tensor + + +class DAPPM(BaseModule): + """DAPPM module in `DDRNet `_. + + Args: + in_channels (int): Input channels. + branch_channels (int): Branch channels. + out_channels (int): Output channels. + num_scales (int): Number of scales. + kernel_sizes (list[int]): Kernel sizes of each scale. + strides (list[int]): Strides of each scale. + paddings (list[int]): Paddings of each scale. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer in ConvModule. + Default: dict(order=('norm', 'act', 'conv'), bias=False). + upsample_mode (str): Upsample mode. Default: 'bilinear'. + """ + + def __init__(self, + in_channels: int, + branch_channels: int, + out_channels: int, + num_scales: int, + kernel_sizes: List[int] = [5, 9, 17], + strides: List[int] = [2, 4, 8], + paddings: List[int] = [2, 4, 8], + norm_cfg: Dict = dict(type='BN', momentum=0.1), + act_cfg: Dict = dict(type='ReLU', inplace=True), + conv_cfg: Dict = dict( + order=('norm', 'act', 'conv'), bias=False), + upsample_mode: str = 'bilinear'): + super().__init__() + + self.num_scales = num_scales + self.unsample_mode = upsample_mode + self.in_channels = in_channels + self.branch_channels = branch_channels + self.out_channels = out_channels + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.conv_cfg = conv_cfg + + self.scales = ModuleList([ + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ]) + for i in range(1, num_scales - 1): + self.scales.append( + Sequential(*[ + nn.AvgPool2d( + kernel_size=kernel_sizes[i - 1], + stride=strides[i - 1], + padding=paddings[i - 1]), + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ])) + self.scales.append( + Sequential(*[ + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ])) + self.processes = ModuleList() + for i in range(num_scales - 1): + self.processes.append( + ConvModule( + branch_channels, + branch_channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg)) + + self.compression = ConvModule( + branch_channels * num_scales, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + self.shortcut = ConvModule( + in_channels, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + def forward(self, inputs: Tensor): + feats = [] + feats.append(self.scales[0](inputs)) + + for i in range(1, self.num_scales): + feat_up = F.interpolate( + self.scales[i](inputs), + size=inputs.shape[2:], + mode=self.unsample_mode) + feats.append(self.processes[i - 1](feat_up + feats[i - 1])) + + return self.compression(torch.cat(feats, + dim=1)) + self.shortcut(inputs) + + +class PAPPM(DAPPM): + """PAPPM module in `PIDNet `_. + + Args: + in_channels (int): Input channels. + branch_channels (int): Branch channels. + out_channels (int): Output channels. + num_scales (int): Number of scales. + kernel_sizes (list[int]): Kernel sizes of each scale. + strides (list[int]): Strides of each scale. + paddings (list[int]): Paddings of each scale. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', momentum=0.1). + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer in ConvModule. + Default: dict(order=('norm', 'act', 'conv'), bias=False). + upsample_mode (str): Upsample mode. Default: 'bilinear'. + """ + + def __init__(self, + in_channels: int, + branch_channels: int, + out_channels: int, + num_scales: int, + kernel_sizes: List[int] = [5, 9, 17], + strides: List[int] = [2, 4, 8], + paddings: List[int] = [2, 4, 8], + norm_cfg: Dict = dict(type='BN', momentum=0.1), + act_cfg: Dict = dict(type='ReLU', inplace=True), + conv_cfg: Dict = dict( + order=('norm', 'act', 'conv'), bias=False), + upsample_mode: str = 'bilinear'): + super().__init__(in_channels, branch_channels, out_channels, + num_scales, kernel_sizes, strides, paddings, norm_cfg, + act_cfg, conv_cfg, upsample_mode) + + self.processes = ConvModule( + self.branch_channels * (self.num_scales - 1), + self.branch_channels * (self.num_scales - 1), + kernel_size=3, + padding=1, + groups=self.num_scales - 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + **self.conv_cfg) + + def forward(self, inputs: Tensor): + x_ = self.scales[0](inputs) + feats = [] + for i in range(1, self.num_scales): + feat_up = F.interpolate( + self.scales[i](inputs), + size=inputs.shape[2:], + mode=self.unsample_mode, + align_corners=False) + feats.append(feat_up + x_) + scale_out = self.processes(torch.cat(feats, dim=1)) + return self.compression(torch.cat([x_, scale_out], + dim=1)) + self.shortcut(inputs) diff --git a/mmseg/models/utils/res_layer.py b/mmseg/models/utils/res_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..3dd7a6f75a168f2f7e3c61f82d309b1cf0d502bc --- /dev/null +++ b/mmseg/models/utils/res_layer.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.cnn import build_conv_layer, build_norm_layer +from mmengine.model import Sequential +from torch import nn as nn + + +class ResLayer(Sequential): + """ResLayer to build ResNet style backbone. + + Args: + block (nn.Module): block used to build ResLayer. + inplanes (int): inplanes of block. + planes (int): planes of block. + num_blocks (int): number of blocks. + stride (int): stride of the first block. Default: 1 + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. Default: False + conv_cfg (dict): dictionary to construct and config conv layer. + Default: None + norm_cfg (dict): dictionary to construct and config norm layer. + Default: dict(type='BN') + multi_grid (int | None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + """ + + def __init__(self, + block, + inplanes, + planes, + num_blocks, + stride=1, + dilation=1, + avg_down=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + multi_grid=None, + contract_dilation=False, + **kwargs): + self.block = block + + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = [] + conv_stride = stride + if avg_down: + conv_stride = 1 + downsample.append( + nn.AvgPool2d( + kernel_size=stride, + stride=stride, + ceil_mode=True, + count_include_pad=False)) + downsample.extend([ + build_conv_layer( + conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=conv_stride, + bias=False), + build_norm_layer(norm_cfg, planes * block.expansion)[1] + ]) + downsample = nn.Sequential(*downsample) + + layers = [] + if multi_grid is None: + if dilation > 1 and contract_dilation: + first_dilation = dilation // 2 + else: + first_dilation = dilation + else: + first_dilation = multi_grid[0] + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=stride, + dilation=first_dilation, + downsample=downsample, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=1, + dilation=dilation if multi_grid is None else multi_grid[i], + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + super().__init__(*layers) diff --git a/mmseg/models/utils/san_layers.py b/mmseg/models/utils/san_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..2267686daf62658c5dc81408e0a399c43aee83aa --- /dev/null +++ b/mmseg/models/utils/san_layers.py @@ -0,0 +1,418 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Modified from https://github.com/MendelXu/SAN/blob/main/san/model/attn_helper.py # noqa: E501 +# Copyright (c) 2023 MendelXu. +# Licensed under the MIT License + +import warnings +from typing import Optional + +import torch +from mmcv.cnn.bricks.transformer import BaseTransformerLayer +from torch import Tensor, nn +from torch.nn import functional as F + + +def cross_attn_with_self_bias( + query: Tensor, + key: Tensor, + value: Tensor, + embed_dim_to_check: int, + num_heads: int, + in_proj_weight: Tensor, + in_proj_bias: Tensor, + bias_k: Optional[Tensor], + bias_v: Optional[Tensor], + add_zero_attn: bool, + dropout_p: float, + out_proj_weight: Tensor, + out_proj_bias: Tensor, + training: bool = True, + key_padding_mask: Optional[Tensor] = None, + need_weights: bool = True, + attn_mask: Optional[Tensor] = None, + use_separate_proj_weight: bool = False, + q_proj_weight: Optional[Tensor] = None, + k_proj_weight: Optional[Tensor] = None, + v_proj_weight: Optional[Tensor] = None, + static_k: Optional[Tensor] = None, + static_v: Optional[Tensor] = None, +): + """Forward function of multi-head attention. Modified from + multi_head_attention_forward in + https://github.com/pytorch/pytorch/blob/main/torch/nn/functional.py. + + Args: + query, key, value: map a query and a set of key-value pairs to an output. + See "Attention Is All You Need" for more details. + embed_dim_to_check: total dimension of the model. + num_heads: parallel attention heads. + in_proj_weight, in_proj_bias: input projection weight and bias. + bias_k, bias_v: bias of the key and value sequences to be added at dim=0. + add_zero_attn: add a new batch of zeros to the key and + value sequences at dim=1. + dropout_p: probability of an element to be zeroed. + out_proj_weight, out_proj_bias: the output projection weight and bias. + training: apply dropout if is ``True``. + key_padding_mask: if provided, specified padding elements in the key will + be ignored by the attention. This is an binary mask. When the value is True, + the corresponding value on the attention layer will be filled with -inf. + need_weights: output attn_output_weights. + Default: `True` + Note: `needs_weight` defaults to `True`, but should be set to `False` + For best performance when attention weights are not needed. + *Setting needs_weights to `True` + leads to a significant performance degradation.* + attn_mask: 2D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all + the batches while a 3D mask allows to specify a different mask for the entries of each batch. + use_separate_proj_weight: the function accept the proj. weights for query, key, + and value in different forms. If false, in_proj_weight will be used, which is + a combination of q_proj_weight, k_proj_weight, v_proj_weight. + q_proj_weight, k_proj_weight, v_proj_weight, in_proj_bias: input projection weight and bias. + static_k, static_v: static key and value used for attention operators. + """ # noqa: E501 + tgt_len, bsz, embed_dim = query.size() + assert embed_dim == embed_dim_to_check + # allow MHA to have different sizes for the feature dimension + assert key.size(0) == value.size(0) and key.size(1) == value.size(1) + + head_dim = embed_dim // num_heads + assert head_dim * num_heads == embed_dim, \ + 'embed_dim must be divisible by num_heads' + scaling = float(head_dim)**-0.5 + + if not use_separate_proj_weight: + if (query is key or torch.equal( + query, key)) and (key is value or torch.equal(key, value)): + # self-attention + raise NotImplementedError('self-attention is not implemented') + + elif key is value or torch.equal(key, value): + # encoder-decoder attention + # This is inline in_proj function + # with in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = 0 + _end = embed_dim + _w = in_proj_weight[_start:_end, :] + if _b is not None: + _b = _b[_start:_end] + q = F.linear(query, _w, _b) + + if key is None: + assert value is None + k = None + v = None + q_k = None + q_v = None + else: + # This is inline in_proj function with + # in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = embed_dim + _end = None + _w = in_proj_weight[_start:, :] + if _b is not None: + _b = _b[_start:] + k, v = F.linear(key, _w, _b).chunk(2, dim=-1) + q_k, q_v = F.linear(query, _w, _b).chunk(2, dim=-1) + else: + # This is inline in_proj function with + # in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = 0 + _end = embed_dim + _w = in_proj_weight[_start:_end, :] + if _b is not None: + _b = _b[_start:_end] + q = F.linear(query, _w, _b) + + # This is inline in_proj function with + # in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = embed_dim + _end = embed_dim * 2 + _w = in_proj_weight[_start:_end, :] + if _b is not None: + _b = _b[_start:_end] + k = F.linear(key, _w, _b) + q_k = F.linear(query, _w, _b) + # This is inline in_proj function with + # in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = embed_dim * 2 + _end = None + _w = in_proj_weight[_start:, :] + if _b is not None: + _b = _b[_start:] + v = F.linear(value, _w, _b) + q_v = F.linear(query, _w, _b) + else: + q_proj_weight_non_opt = \ + torch.jit._unwrap_optional(q_proj_weight) + len1, len2 = q_proj_weight_non_opt.size() + assert len1 == embed_dim and len2 == query.size(-1) + + k_proj_weight_non_opt = \ + torch.jit._unwrap_optional(k_proj_weight) + len1, len2 = k_proj_weight_non_opt.size() + assert len1 == embed_dim and len2 == key.size(-1) + + v_proj_weight_non_opt = \ + torch.jit._unwrap_optional(v_proj_weight) + len1, len2 = v_proj_weight_non_opt.size() + assert len1 == embed_dim and len2 == value.size(-1) + + if in_proj_bias is not None: + q = F.linear(query, q_proj_weight_non_opt, + in_proj_bias[0:embed_dim]) + k = F.linear(key, k_proj_weight_non_opt, + in_proj_bias[embed_dim:(embed_dim * 2)]) + v = F.linear(value, v_proj_weight_non_opt, + in_proj_bias[(embed_dim * 2):]) + else: + q = F.linear(query, q_proj_weight_non_opt, in_proj_bias) + k = F.linear(key, k_proj_weight_non_opt, in_proj_bias) + v = F.linear(value, v_proj_weight_non_opt, in_proj_bias) + q = q * scaling + + if attn_mask is not None: + assert ( + attn_mask.dtype == torch.float32 + or attn_mask.dtype == torch.float64 + or attn_mask.dtype == torch.float16 + or attn_mask.dtype == torch.uint8 or attn_mask.dtype == torch.bool + ), 'Only float, byte, and bool types are supported for ' \ + 'attn_mask, not {}'.format(attn_mask.dtype) + if attn_mask.dtype == torch.uint8: + warnings.warn('Byte tensor for attn_mask in nn.MultiheadAttention ' + 'is deprecated. Use bool tensor instead.') + attn_mask = attn_mask.to(torch.bool) + + if attn_mask.dim() == 2: + attn_mask = attn_mask.unsqueeze(0) + if list(attn_mask.size()) != [1, query.size(0), key.size(0)]: + raise RuntimeError( + 'The size of the 2D attn_mask is not correct.') + elif attn_mask.dim() == 3: + if list(attn_mask.size()) != [ + bsz * num_heads, + query.size(0), key.size(0) + ]: + raise RuntimeError( + 'The size of the 3D attn_mask is not correct.') + else: + raise RuntimeError( + "attn_mask's dimension {} is not supported".format( + attn_mask.dim())) + # attn_mask's dim is 3 now. + + # convert ByteTensor key_padding_mask to bool + if key_padding_mask is not None and key_padding_mask.dtype == torch.uint8: + warnings.warn( + 'Byte tensor for key_padding_mask in nn.MultiheadAttention ' + 'is deprecated. Use bool tensor instead.') + key_padding_mask = key_padding_mask.to(torch.bool) + + if bias_k is not None and bias_v is not None: + if static_k is None and static_v is None: + k = torch.cat([k, bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = F.pad(attn_mask, (0, 1)) + if key_padding_mask is not None: + key_padding_mask = F.pad(key_padding_mask, (0, 1)) + else: + assert static_k is None, 'bias cannot be added to static key.' + assert static_v is None, 'bias cannot be added to static value.' + else: + assert bias_k is None + assert bias_v is None + + q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1) + if k is not None: + k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1) + q_k = q_k.contiguous().view(tgt_len, bsz * num_heads, + head_dim).transpose(0, 1) + if v is not None: + v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1) + q_v = q_v.contiguous().view(tgt_len, bsz * num_heads, + head_dim).transpose(0, 1) + + if static_k is not None: + assert static_k.size(0) == bsz * num_heads + assert static_k.size(2) == head_dim + k = static_k + + if static_v is not None: + assert static_v.size(0) == bsz * num_heads + assert static_v.size(2) == head_dim + v = static_v + + src_len = k.size(1) + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if add_zero_attn: + src_len += 1 + k = torch.cat( + [ + k, + torch.zeros( + (k.size(0), 1) + k.size()[2:], + dtype=k.dtype, + device=k.device), + ], + dim=1, + ) + v = torch.cat( + [ + v, + torch.zeros( + (v.size(0), 1) + v.size()[2:], + dtype=v.dtype, + device=v.device), + ], + dim=1, + ) + if attn_mask is not None: + attn_mask = F.pad(attn_mask, (0, 1)) + if key_padding_mask is not None: + key_padding_mask = F.pad(key_padding_mask, (0, 1)) + + attn_output_weights = torch.bmm(q, k.transpose(1, 2)) + assert list( + attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len] + + if attn_mask is not None: + if attn_mask.dtype == torch.bool: + attn_output_weights.masked_fill_(attn_mask, float('-inf')) + else: + attn_output_weights += attn_mask + + if key_padding_mask is not None: + attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, + src_len) + attn_output_weights = attn_output_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2), + float('-inf'), + ) + attn_output_weights = attn_output_weights.view(bsz * num_heads, + tgt_len, src_len) + # attn_out_weights: [bsz * num_heads, tgt_len, src_len] + # ->[bsz * num_heads, tgt_len, src_len+1] + self_weight = (q * q_k).sum( + dim=-1, keepdim=True) # [bsz * num_heads, tgt_len, 1] + total_attn_output_weights = torch.cat([attn_output_weights, self_weight], + dim=-1) + total_attn_output_weights = F.softmax(total_attn_output_weights, dim=-1) + total_attn_output_weights = F.dropout( + total_attn_output_weights, p=dropout_p, training=training) + attn_output_weights = \ + total_attn_output_weights[:, :, : -1] + # [bsz * num_heads, tgt_len, src_len] + self_weight = \ + total_attn_output_weights[:, :, -1:] # [bsz * num_heads, tgt_len, 1] + + attn_output = torch.bmm(attn_output_weights, + v) # [bsz * num_heads, tgt_len, head_dim] + attn_output = (attn_output + self_weight * q_v + ) # [bsz * num_heads, tgt_len, head_dim] + assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim] + attn_output = attn_output.transpose(0, 1).contiguous().view( + tgt_len, bsz, embed_dim) + attn_output = F.linear(attn_output, out_proj_weight, out_proj_bias) + + if need_weights: + # average attention weights over heads + attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, + src_len) + return attn_output, attn_output_weights # .sum(dim=1) / num_heads + else: + return attn_output, None + + +def cross_attn_layer(tf_layer: BaseTransformerLayer, x, mem, attn_bias): + """Implementation of transformer layer with cross attention. The cross + attention shares the embedding weights with self-attention of tf_layer. + Args: + tf_layer: (TransformerEncoderLayer): The Module of transformer layer. + x (Tensor): query [K,N,C] + mem (Tensor): key and value [L,N,C] + attn_bias (Tensor): attention bias [N*num_head,K,L] + + Return: + x (Tensor): cross attention output [K,N,C] + """ + self_attn_layer = tf_layer.attentions[0].attn + attn_layer_paras = { + 'embed_dim_to_check': self_attn_layer.embed_dim, + 'num_heads': self_attn_layer.num_heads, + 'in_proj_weight': self_attn_layer.in_proj_weight, + 'in_proj_bias': self_attn_layer.in_proj_bias, + 'bias_k': self_attn_layer.bias_k, + 'bias_v': self_attn_layer.bias_v, + 'add_zero_attn': self_attn_layer.add_zero_attn, + 'dropout_p': self_attn_layer.dropout, + 'out_proj_weight': self_attn_layer.out_proj.weight, + 'out_proj_bias': self_attn_layer.out_proj.bias, + 'training': self_attn_layer.training + } + + q_x = tf_layer.norms[0](x) + k_x = v_x = tf_layer.norms[0](mem) + x = x + cross_attn_with_self_bias( + q_x, + k_x, + v_x, + attn_mask=attn_bias, + need_weights=False, + **attn_layer_paras)[0] + x = tf_layer.ffns[0](tf_layer.norms[1](x), identity=x) + return x + + +class LayerNorm2d(nn.Module): + """A LayerNorm variant, popularized by Transformers, that performs point- + wise mean and variance normalization over the channel dimension for inputs + that have shape (batch_size, channels, height, width). + + https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa B950 + """ + + def __init__(self, normalized_shape, eps=1e-6): + super().__init__() + self.weight = nn.Parameter(torch.ones(normalized_shape)) + self.bias = nn.Parameter(torch.zeros(normalized_shape)) + self.eps = eps + self.normalized_shape = (normalized_shape, ) + + def forward(self, x: torch.Tensor): + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + x = self.weight[:, None, None] * x + self.bias[:, None, None] + return x + + +class MLP(nn.Module): + """Very simple multi-layer perceptron (also called FFN)""" + + def __init__(self, + input_dim, + hidden_dim, + output_dim, + num_layers, + affine_func=nn.Linear): + super().__init__() + self.num_layers = num_layers + h = [hidden_dim] * (num_layers - 1) + self.layers = nn.ModuleList( + affine_func(n, k) + for n, k in zip([input_dim] + h, h + [output_dim])) + + def forward(self, x: torch.Tensor): + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) + return x diff --git a/mmseg/models/utils/se_layer.py b/mmseg/models/utils/se_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..0ff632cfea728a7ffd99f1578c828c588d78f3db --- /dev/null +++ b/mmseg/models/utils/se_layer.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmengine.utils import is_tuple_of + +from .make_divisible import make_divisible + + +class SELayer(nn.Module): + """Squeeze-and-Excitation Module. + + Args: + channels (int): The input (and output) channels of the SE layer. + ratio (int): Squeeze ratio in SELayer, the intermediate channel will be + ``int(channels/ratio)``. Default: 16. + conv_cfg (None or dict): Config dict for convolution layer. + Default: None, which means using conv2d. + act_cfg (dict or Sequence[dict]): Config dict for activation layer. + If act_cfg is a dict, two activation layers will be configured + by this dict. If act_cfg is a sequence of dicts, the first + activation layer will be configured by the first dict and the + second activation layer will be configured by the second dict. + Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, + divisor=6.0)). + """ + + def __init__(self, + channels, + ratio=16, + conv_cfg=None, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))): + super().__init__() + if isinstance(act_cfg, dict): + act_cfg = (act_cfg, act_cfg) + assert len(act_cfg) == 2 + assert is_tuple_of(act_cfg, dict) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.conv1 = ConvModule( + in_channels=channels, + out_channels=make_divisible(channels // ratio, 8), + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[0]) + self.conv2 = ConvModule( + in_channels=make_divisible(channels // ratio, 8), + out_channels=channels, + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[1]) + + def forward(self, x): + out = self.global_avgpool(x) + out = self.conv1(out) + out = self.conv2(out) + return x * out diff --git a/mmseg/models/utils/self_attention_block.py b/mmseg/models/utils/self_attention_block.py new file mode 100644 index 0000000000000000000000000000000000000000..5bb6e8284e599637c12553e27199338a820709e3 --- /dev/null +++ b/mmseg/models/utils/self_attention_block.py @@ -0,0 +1,161 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.cnn import ConvModule +from mmengine.model.weight_init import constant_init +from torch import nn as nn +from torch.nn import functional as F + + +class SelfAttentionBlock(nn.Module): + """General self-attention block/non-local block. + + Please refer to https://arxiv.org/abs/1706.03762 for details about key, + query and value. + + Args: + key_in_channels (int): Input channels of key feature. + query_in_channels (int): Input channels of query feature. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_downsample (nn.Module): Query downsample module. + key_downsample (nn.Module): Key downsample module. + key_query_num_convs (int): Number of convs for key/query projection. + value_num_convs (int): Number of convs for value projection. + matmul_norm (bool): Whether normalize attention map with sqrt of + channels + with_out (bool): Whether use out projection. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, key_in_channels, query_in_channels, channels, + out_channels, share_key_query, query_downsample, + key_downsample, key_query_num_convs, value_out_num_convs, + key_query_norm, value_out_norm, matmul_norm, with_out, + conv_cfg, norm_cfg, act_cfg): + super().__init__() + if share_key_query: + assert key_in_channels == query_in_channels + self.key_in_channels = key_in_channels + self.query_in_channels = query_in_channels + self.out_channels = out_channels + self.channels = channels + self.share_key_query = share_key_query + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.key_project = self.build_project( + key_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if share_key_query: + self.query_project = self.key_project + else: + self.query_project = self.build_project( + query_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.value_project = self.build_project( + key_in_channels, + channels if with_out else out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if with_out: + self.out_project = self.build_project( + channels, + out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.out_project = None + + self.query_downsample = query_downsample + self.key_downsample = key_downsample + self.matmul_norm = matmul_norm + + self.init_weights() + + def init_weights(self): + """Initialize weight of later layer.""" + if self.out_project is not None: + if not isinstance(self.out_project, ConvModule): + constant_init(self.out_project, 0) + + def build_project(self, in_channels, channels, num_convs, use_conv_module, + conv_cfg, norm_cfg, act_cfg): + """Build projection layer for key/query/value/out.""" + if use_conv_module: + convs = [ + ConvModule( + in_channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + ] + for _ in range(num_convs - 1): + convs.append( + ConvModule( + channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + else: + convs = [nn.Conv2d(in_channels, channels, 1)] + for _ in range(num_convs - 1): + convs.append(nn.Conv2d(channels, channels, 1)) + if len(convs) > 1: + convs = nn.Sequential(*convs) + else: + convs = convs[0] + return convs + + def forward(self, query_feats, key_feats): + """Forward function.""" + batch_size = query_feats.size(0) + query = self.query_project(query_feats) + if self.query_downsample is not None: + query = self.query_downsample(query) + query = query.reshape(*query.shape[:2], -1) + query = query.permute(0, 2, 1).contiguous() + + key = self.key_project(key_feats) + value = self.value_project(key_feats) + if self.key_downsample is not None: + key = self.key_downsample(key) + value = self.key_downsample(value) + key = key.reshape(*key.shape[:2], -1) + value = value.reshape(*value.shape[:2], -1) + value = value.permute(0, 2, 1).contiguous() + + sim_map = torch.matmul(query, key) + if self.matmul_norm: + sim_map = (self.channels**-.5) * sim_map + sim_map = F.softmax(sim_map, dim=-1) + + context = torch.matmul(sim_map, value) + context = context.permute(0, 2, 1).contiguous() + context = context.reshape(batch_size, -1, *query_feats.shape[2:]) + if self.out_project is not None: + context = self.out_project(context) + return context diff --git a/mmseg/models/utils/shape_convert.py b/mmseg/models/utils/shape_convert.py new file mode 100644 index 0000000000000000000000000000000000000000..cce1e220b645d4b02df1ec2d9ed3137c8acba707 --- /dev/null +++ b/mmseg/models/utils/shape_convert.py @@ -0,0 +1,107 @@ +# Copyright (c) OpenMMLab. All rights reserved. +def nlc_to_nchw(x, hw_shape): + """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor. + + Args: + x (Tensor): The input tensor of shape [N, L, C] before conversion. + hw_shape (Sequence[int]): The height and width of output feature map. + + Returns: + Tensor: The output tensor of shape [N, C, H, W] after conversion. + """ + H, W = hw_shape + assert len(x.shape) == 3 + B, L, C = x.shape + assert L == H * W, 'The seq_len doesn\'t match H, W' + return x.transpose(1, 2).reshape(B, C, H, W) + + +def nchw_to_nlc(x): + """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor. + + Args: + x (Tensor): The input tensor of shape [N, C, H, W] before conversion. + + Returns: + Tensor: The output tensor of shape [N, L, C] after conversion. + """ + assert len(x.shape) == 4 + return x.flatten(2).transpose(1, 2).contiguous() + + +def nchw2nlc2nchw(module, x, contiguous=False, **kwargs): + """Flatten [N, C, H, W] shape tensor `x` to [N, L, C] shape tensor. Use the + reshaped tensor as the input of `module`, and the convert the output of + `module`, whose shape is. + + [N, L, C], to [N, C, H, W]. + + Args: + module (Callable): A callable object the takes a tensor + with shape [N, L, C] as input. + x (Tensor): The input tensor of shape [N, C, H, W]. + contiguous: + contiguous (Bool): Whether to make the tensor contiguous + after each shape transform. + + Returns: + Tensor: The output tensor of shape [N, C, H, W]. + + Example: + >>> import torch + >>> import torch.nn as nn + >>> norm = nn.LayerNorm(4) + >>> feature_map = torch.rand(4, 4, 5, 5) + >>> output = nchw2nlc2nchw(norm, feature_map) + """ + B, C, H, W = x.shape + if not contiguous: + x = x.flatten(2).transpose(1, 2) + x = module(x, **kwargs) + x = x.transpose(1, 2).reshape(B, C, H, W) + else: + x = x.flatten(2).transpose(1, 2).contiguous() + x = module(x, **kwargs) + x = x.transpose(1, 2).reshape(B, C, H, W).contiguous() + return x + + +def nlc2nchw2nlc(module, x, hw_shape, contiguous=False, **kwargs): + """Convert [N, L, C] shape tensor `x` to [N, C, H, W] shape tensor. Use the + reshaped tensor as the input of `module`, and convert the output of + `module`, whose shape is. + + [N, C, H, W], to [N, L, C]. + + Args: + module (Callable): A callable object the takes a tensor + with shape [N, C, H, W] as input. + x (Tensor): The input tensor of shape [N, L, C]. + hw_shape: (Sequence[int]): The height and width of the + feature map with shape [N, C, H, W]. + contiguous (Bool): Whether to make the tensor contiguous + after each shape transform. + + Returns: + Tensor: The output tensor of shape [N, L, C]. + + Example: + >>> import torch + >>> import torch.nn as nn + >>> conv = nn.Conv2d(16, 16, 3, 1, 1) + >>> feature_map = torch.rand(4, 25, 16) + >>> output = nlc2nchw2nlc(conv, feature_map, (5, 5)) + """ + H, W = hw_shape + assert len(x.shape) == 3 + B, L, C = x.shape + assert L == H * W, 'The seq_len doesn\'t match H, W' + if not contiguous: + x = x.transpose(1, 2).reshape(B, C, H, W) + x = module(x, **kwargs) + x = x.flatten(2).transpose(1, 2) + else: + x = x.transpose(1, 2).reshape(B, C, H, W).contiguous() + x = module(x, **kwargs) + x = x.flatten(2).transpose(1, 2).contiguous() + return x diff --git a/mmseg/models/utils/up_conv_block.py b/mmseg/models/utils/up_conv_block.py new file mode 100644 index 0000000000000000000000000000000000000000..4fa3b598de96d53c169232d9c89ac458f6921e8d --- /dev/null +++ b/mmseg/models/utils/up_conv_block.py @@ -0,0 +1,102 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, build_upsample_layer + + +class UpConvBlock(nn.Module): + """Upsample convolution block in decoder for UNet. + + This upsample convolution block consists of one upsample module + followed by one convolution block. The upsample module expands the + high-level low-resolution feature map and the convolution block fuses + the upsampled high-level low-resolution feature map and the low-level + high-resolution feature map from encoder. + + Args: + conv_block (nn.Sequential): Sequential of convolutional layers. + in_channels (int): Number of input channels of the high-level + skip_channels (int): Number of input channels of the low-level + high-resolution feature map from encoder. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers in the conv_block. + Default: 2. + stride (int): Stride of convolutional layer in conv_block. Default: 1. + dilation (int): Dilation rate of convolutional layer in conv_block. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). If the size of + high-level feature map is the same as that of skip feature map + (low-level feature map from encoder), it does not need upsample the + high-level feature map and the upsample_cfg is None. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + conv_block, + in_channels, + skip_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + dcn=None, + plugins=None): + super().__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.conv_block = conv_block( + in_channels=2 * skip_channels, + out_channels=out_channels, + num_convs=num_convs, + stride=stride, + dilation=dilation, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None) + if upsample_cfg is not None: + self.upsample = build_upsample_layer( + cfg=upsample_cfg, + in_channels=in_channels, + out_channels=skip_channels, + with_cp=with_cp, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.upsample = ConvModule( + in_channels, + skip_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, skip, x): + """Forward function.""" + + x = self.upsample(x) + out = torch.cat([skip, x], dim=1) + out = self.conv_block(out) + + return out diff --git a/mmseg/models/utils/wrappers.py b/mmseg/models/utils/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..abbd0c029623b4f480a067e4b78adfec234ef8d0 --- /dev/null +++ b/mmseg/models/utils/wrappers.py @@ -0,0 +1,51 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +import torch.nn.functional as F + + +def resize(input, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None, + warning=True): + if warning: + if size is not None and align_corners: + input_h, input_w = tuple(int(x) for x in input.shape[2:]) + output_h, output_w = tuple(int(x) for x in size) + if output_h > input_h or output_w > output_h: + if ((output_h > 1 and output_w > 1 and input_h > 1 + and input_w > 1) and (output_h - 1) % (input_h - 1) + and (output_w - 1) % (input_w - 1)): + warnings.warn( + f'When align_corners={align_corners}, ' + 'the output would more aligned if ' + f'input size {(input_h, input_w)} is `x+1` and ' + f'out size {(output_h, output_w)} is `nx+1`') + return F.interpolate(input, size, scale_factor, mode, align_corners) + + +class Upsample(nn.Module): + + def __init__(self, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None): + super().__init__() + self.size = size + if isinstance(scale_factor, tuple): + self.scale_factor = tuple(float(factor) for factor in scale_factor) + else: + self.scale_factor = float(scale_factor) if scale_factor else None + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + if not self.size: + size = [int(t * self.scale_factor) for t in x.shape[-2:]] + else: + size = self.size + return resize(x, size, None, self.mode, self.align_corners) diff --git a/mmseg/registry/__init__.py b/mmseg/registry/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ee514d1a2a2bdd54a0a9b017ec227160ee502be5 --- /dev/null +++ b/mmseg/registry/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .registry import (DATA_SAMPLERS, DATASETS, EVALUATOR, HOOKS, INFERENCERS, + LOG_PROCESSORS, LOOPS, METRICS, MODEL_WRAPPERS, MODELS, + OPTIM_WRAPPER_CONSTRUCTORS, OPTIM_WRAPPERS, OPTIMIZERS, + PARAM_SCHEDULERS, RUNNER_CONSTRUCTORS, RUNNERS, + TASK_UTILS, TRANSFORMS, VISBACKENDS, VISUALIZERS, + WEIGHT_INITIALIZERS) + +__all__ = [ + 'HOOKS', 'DATASETS', 'DATA_SAMPLERS', 'TRANSFORMS', 'MODELS', + 'WEIGHT_INITIALIZERS', 'OPTIMIZERS', 'OPTIM_WRAPPER_CONSTRUCTORS', + 'TASK_UTILS', 'PARAM_SCHEDULERS', 'METRICS', 'MODEL_WRAPPERS', + 'VISBACKENDS', 'VISUALIZERS', 'RUNNERS', 'RUNNER_CONSTRUCTORS', 'LOOPS', + 'EVALUATOR', 'LOG_PROCESSORS', 'OPTIM_WRAPPERS', 'INFERENCERS' +] diff --git a/mmseg/registry/__pycache__/__init__.cpython-39.pyc b/mmseg/registry/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..355d2c0212adab7c7d749903931244a621d36f48 Binary files /dev/null and b/mmseg/registry/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/registry/__pycache__/registry.cpython-39.pyc b/mmseg/registry/__pycache__/registry.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f15d280fe7fa7c2691daa92f363849b5b0503d7 Binary files /dev/null and b/mmseg/registry/__pycache__/registry.cpython-39.pyc differ diff --git a/mmseg/registry/registry.py b/mmseg/registry/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..37b6a776095856c2fab0101b5b0ec8ed7e8fa8f2 --- /dev/null +++ b/mmseg/registry/registry.py @@ -0,0 +1,118 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""MMSegmentation provides 21 registry nodes to support using modules across +projects. Each node is a child of the root registry in MMEngine. + +More details can be found at +https://mmengine.readthedocs.io/en/latest/advanced_tutorials/registry.html. +""" + +from mmengine.registry import DATA_SAMPLERS as MMENGINE_DATA_SAMPLERS +from mmengine.registry import DATASETS as MMENGINE_DATASETS +from mmengine.registry import EVALUATOR as MMENGINE_EVALUATOR +from mmengine.registry import HOOKS as MMENGINE_HOOKS +from mmengine.registry import INFERENCERS as MMENGINE_INFERENCERS +from mmengine.registry import LOG_PROCESSORS as MMENGINE_LOG_PROCESSORS +from mmengine.registry import LOOPS as MMENGINE_LOOPS +from mmengine.registry import METRICS as MMENGINE_METRICS +from mmengine.registry import MODEL_WRAPPERS as MMENGINE_MODEL_WRAPPERS +from mmengine.registry import MODELS as MMENGINE_MODELS +from mmengine.registry import \ + OPTIM_WRAPPER_CONSTRUCTORS as MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS +from mmengine.registry import OPTIM_WRAPPERS as MMENGINE_OPTIM_WRAPPERS +from mmengine.registry import OPTIMIZERS as MMENGINE_OPTIMIZERS +from mmengine.registry import PARAM_SCHEDULERS as MMENGINE_PARAM_SCHEDULERS +from mmengine.registry import \ + RUNNER_CONSTRUCTORS as MMENGINE_RUNNER_CONSTRUCTORS +from mmengine.registry import RUNNERS as MMENGINE_RUNNERS +from mmengine.registry import TASK_UTILS as MMENGINE_TASK_UTILS +from mmengine.registry import TRANSFORMS as MMENGINE_TRANSFORMS +from mmengine.registry import VISBACKENDS as MMENGINE_VISBACKENDS +from mmengine.registry import VISUALIZERS as MMENGINE_VISUALIZERS +from mmengine.registry import \ + WEIGHT_INITIALIZERS as MMENGINE_WEIGHT_INITIALIZERS +from mmengine.registry import Registry + +# manage all kinds of runners like `EpochBasedRunner` and `IterBasedRunner` +RUNNERS = Registry('runner', parent=MMENGINE_RUNNERS) +# manage runner constructors that define how to initialize runners +RUNNER_CONSTRUCTORS = Registry( + 'runner constructor', parent=MMENGINE_RUNNER_CONSTRUCTORS) +# manage all kinds of loops like `EpochBasedTrainLoop` +LOOPS = Registry('loop', parent=MMENGINE_LOOPS) +# manage all kinds of hooks like `CheckpointHook` +HOOKS = Registry( + 'hook', parent=MMENGINE_HOOKS, locations=['mmseg.engine.hooks']) + +# manage data-related modules +DATASETS = Registry( + 'dataset', parent=MMENGINE_DATASETS, locations=['mmseg.datasets']) +DATA_SAMPLERS = Registry('data sampler', parent=MMENGINE_DATA_SAMPLERS) +TRANSFORMS = Registry( + 'transform', + parent=MMENGINE_TRANSFORMS, + locations=['mmseg.datasets.transforms']) + +# mangage all kinds of modules inheriting `nn.Module` +MODELS = Registry('model', parent=MMENGINE_MODELS, locations=['mmseg.models']) +# mangage all kinds of model wrappers like 'MMDistributedDataParallel' +MODEL_WRAPPERS = Registry( + 'model_wrapper', + parent=MMENGINE_MODEL_WRAPPERS, + locations=['mmseg.models']) +# mangage all kinds of weight initialization modules like `Uniform` +WEIGHT_INITIALIZERS = Registry( + 'weight initializer', + parent=MMENGINE_WEIGHT_INITIALIZERS, + locations=['mmseg.models']) + +# mangage all kinds of optimizers like `SGD` and `Adam` +OPTIMIZERS = Registry( + 'optimizer', + parent=MMENGINE_OPTIMIZERS, + locations=['mmseg.engine.optimizers']) +# manage optimizer wrapper +OPTIM_WRAPPERS = Registry( + 'optim_wrapper', + parent=MMENGINE_OPTIM_WRAPPERS, + locations=['mmseg.engine.optimizers']) +# manage constructors that customize the optimization hyperparameters. +OPTIM_WRAPPER_CONSTRUCTORS = Registry( + 'optimizer wrapper constructor', + parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS, + locations=['mmseg.engine.optimizers']) +# mangage all kinds of parameter schedulers like `MultiStepLR` +PARAM_SCHEDULERS = Registry( + 'parameter scheduler', + parent=MMENGINE_PARAM_SCHEDULERS, + locations=['mmseg.engine.schedulers']) + +# manage all kinds of metrics +METRICS = Registry( + 'metric', parent=MMENGINE_METRICS, locations=['mmseg.evaluation']) +# manage evaluator +EVALUATOR = Registry( + 'evaluator', parent=MMENGINE_EVALUATOR, locations=['mmseg.evaluation']) + +# manage task-specific modules like ohem pixel sampler +TASK_UTILS = Registry( + 'task util', parent=MMENGINE_TASK_UTILS, locations=['mmseg.models']) + +# manage visualizer +VISUALIZERS = Registry( + 'visualizer', + parent=MMENGINE_VISUALIZERS, + locations=['mmseg.visualization']) +# manage visualizer backend +VISBACKENDS = Registry( + 'vis_backend', + parent=MMENGINE_VISBACKENDS, + locations=['mmseg.visualization']) + +# manage logprocessor +LOG_PROCESSORS = Registry( + 'log_processor', + parent=MMENGINE_LOG_PROCESSORS, + locations=['mmseg.visualization']) + +# manage inferencer +INFERENCERS = Registry('inferencer', parent=MMENGINE_INFERENCERS) diff --git a/mmseg/structures/__init__.py b/mmseg/structures/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..63d118dca3ebcff30ca241f9378475bcce072627 --- /dev/null +++ b/mmseg/structures/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .sampler import BasePixelSampler, OHEMPixelSampler, build_pixel_sampler +from .seg_data_sample import SegDataSample + +__all__ = [ + 'SegDataSample', 'BasePixelSampler', 'OHEMPixelSampler', + 'build_pixel_sampler' +] diff --git a/mmseg/structures/__pycache__/__init__.cpython-39.pyc b/mmseg/structures/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d216f5f0cec1e450957fabc002e62b6511fab67f Binary files /dev/null and b/mmseg/structures/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/structures/__pycache__/seg_data_sample.cpython-39.pyc b/mmseg/structures/__pycache__/seg_data_sample.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad1e7e168946f85878ea4ec933603be5a71a4ffb Binary files /dev/null and b/mmseg/structures/__pycache__/seg_data_sample.cpython-39.pyc differ diff --git a/mmseg/structures/sampler/__init__.py b/mmseg/structures/sampler/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..91d762d1b4552b391ece046fa3d094409011bcec --- /dev/null +++ b/mmseg/structures/sampler/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base_pixel_sampler import BasePixelSampler +from .builder import build_pixel_sampler +from .ohem_pixel_sampler import OHEMPixelSampler + +__all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] diff --git a/mmseg/structures/sampler/__pycache__/__init__.cpython-39.pyc b/mmseg/structures/sampler/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2105057f939b2c2166090f1cb9a997c6f8f60c90 Binary files /dev/null and b/mmseg/structures/sampler/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/structures/sampler/__pycache__/base_pixel_sampler.cpython-39.pyc b/mmseg/structures/sampler/__pycache__/base_pixel_sampler.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..367e7805bfbe8d7deabbfa949042512609992494 Binary files /dev/null and b/mmseg/structures/sampler/__pycache__/base_pixel_sampler.cpython-39.pyc differ diff --git a/mmseg/structures/sampler/__pycache__/builder.cpython-39.pyc b/mmseg/structures/sampler/__pycache__/builder.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72a532c70d7b932a87367df30d68d7c2be409e24 Binary files /dev/null and b/mmseg/structures/sampler/__pycache__/builder.cpython-39.pyc differ diff --git a/mmseg/structures/sampler/__pycache__/ohem_pixel_sampler.cpython-39.pyc b/mmseg/structures/sampler/__pycache__/ohem_pixel_sampler.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5d05f0d2e67ed85ea7f7a01f25de4b94db33f83 Binary files /dev/null and b/mmseg/structures/sampler/__pycache__/ohem_pixel_sampler.cpython-39.pyc differ diff --git a/mmseg/structures/sampler/base_pixel_sampler.py b/mmseg/structures/sampler/base_pixel_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..03672cd478a2e464cc734ae92686c86f219da0a9 --- /dev/null +++ b/mmseg/structures/sampler/base_pixel_sampler.py @@ -0,0 +1,13 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + + +class BasePixelSampler(metaclass=ABCMeta): + """Base class of pixel sampler.""" + + def __init__(self, **kwargs): + pass + + @abstractmethod + def sample(self, seg_logit, seg_label): + """Placeholder for sample function.""" diff --git a/mmseg/structures/sampler/builder.py b/mmseg/structures/sampler/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..48e14790264a3d4c4ff54d84e5bab67b1623a1df --- /dev/null +++ b/mmseg/structures/sampler/builder.py @@ -0,0 +1,14 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +from mmseg.registry import TASK_UTILS + +PIXEL_SAMPLERS = TASK_UTILS + + +def build_pixel_sampler(cfg, **default_args): + """Build pixel sampler for segmentation map.""" + warnings.warn( + '``build_pixel_sampler`` would be deprecated soon, please use ' + '``mmseg.registry.TASK_UTILS.build()`` ') + return TASK_UTILS.build(cfg, default_args=default_args) diff --git a/mmseg/structures/sampler/ohem_pixel_sampler.py b/mmseg/structures/sampler/ohem_pixel_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..a974273cab504be269e7f391e23a521b97bd8588 --- /dev/null +++ b/mmseg/structures/sampler/ohem_pixel_sampler.py @@ -0,0 +1,85 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .base_pixel_sampler import BasePixelSampler +from .builder import PIXEL_SAMPLERS + + +@PIXEL_SAMPLERS.register_module() +class OHEMPixelSampler(BasePixelSampler): + """Online Hard Example Mining Sampler for segmentation. + + Args: + context (nn.Module): The context of sampler, subclass of + :obj:`BaseDecodeHead`. + thresh (float, optional): The threshold for hard example selection. + Below which, are prediction with low confidence. If not + specified, the hard examples will be pixels of top ``min_kept`` + loss. Default: None. + min_kept (int, optional): The minimum number of predictions to keep. + Default: 100000. + """ + + def __init__(self, context, thresh=None, min_kept=100000): + super().__init__() + self.context = context + assert min_kept > 1 + self.thresh = thresh + self.min_kept = min_kept + + def sample(self, seg_logit, seg_label): + """Sample pixels that have high loss or with low prediction confidence. + + Args: + seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W) + seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W) + + Returns: + torch.Tensor: segmentation weight, shape (N, H, W) + """ + with torch.no_grad(): + assert seg_logit.shape[2:] == seg_label.shape[2:] + assert seg_label.shape[1] == 1 + seg_label = seg_label.squeeze(1).long() + batch_kept = self.min_kept * seg_label.size(0) + valid_mask = seg_label != self.context.ignore_index + seg_weight = seg_logit.new_zeros(size=seg_label.size()) + valid_seg_weight = seg_weight[valid_mask] + if self.thresh is not None: + seg_prob = F.softmax(seg_logit, dim=1) + + tmp_seg_label = seg_label.clone().unsqueeze(1) + tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 + seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1) + sort_prob, sort_indices = seg_prob[valid_mask].sort() + + if sort_prob.numel() > 0: + min_threshold = sort_prob[min(batch_kept, + sort_prob.numel() - 1)] + else: + min_threshold = 0.0 + threshold = max(min_threshold, self.thresh) + valid_seg_weight[seg_prob[valid_mask] < threshold] = 1. + else: + if not isinstance(self.context.loss_decode, nn.ModuleList): + losses_decode = [self.context.loss_decode] + else: + losses_decode = self.context.loss_decode + losses = 0.0 + for loss_module in losses_decode: + losses += loss_module( + seg_logit, + seg_label, + weight=None, + ignore_index=self.context.ignore_index, + reduction_override='none') + + # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa + _, sort_indices = losses[valid_mask].sort(descending=True) + valid_seg_weight[sort_indices[:batch_kept]] = 1. + + seg_weight[valid_mask] = valid_seg_weight + + return seg_weight diff --git a/mmseg/structures/seg_data_sample.py b/mmseg/structures/seg_data_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..ce68b5474330e2149d7d1c4de2d2406ae5b0345e --- /dev/null +++ b/mmseg/structures/seg_data_sample.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.structures import BaseDataElement, PixelData + + +class SegDataSample(BaseDataElement): + """A data structure interface of MMSegmentation. They are used as + interfaces between different components. + + The attributes in ``SegDataSample`` are divided into several parts: + + - ``gt_sem_seg``(PixelData): Ground truth of semantic segmentation. + - ``pred_sem_seg``(PixelData): Prediction of semantic segmentation. + - ``seg_logits``(PixelData): Predicted logits of semantic segmentation. + + Examples: + >>> import torch + >>> import numpy as np + >>> from mmengine.structures import PixelData + >>> from mmseg.structures import SegDataSample + + >>> data_sample = SegDataSample() + >>> img_meta = dict(img_shape=(4, 4, 3), + ... pad_shape=(4, 4, 3)) + >>> gt_segmentations = PixelData(metainfo=img_meta) + >>> gt_segmentations.data = torch.randint(0, 2, (1, 4, 4)) + >>> data_sample.gt_sem_seg = gt_segmentations + >>> assert 'img_shape' in data_sample.gt_sem_seg.metainfo_keys() + >>> data_sample.gt_sem_seg.shape + (4, 4) + >>> print(data_sample) + + ) at 0x1c2aae44d60> + + >>> data_sample = SegDataSample() + >>> gt_sem_seg_data = dict(sem_seg=torch.rand(1, 4, 4)) + >>> gt_sem_seg = PixelData(**gt_sem_seg_data) + >>> data_sample.gt_sem_seg = gt_sem_seg + >>> assert 'gt_sem_seg' in data_sample + >>> assert 'sem_seg' in data_sample.gt_sem_seg + """ + + @property + def gt_sem_seg(self) -> PixelData: + return self._gt_sem_seg + + @gt_sem_seg.setter + def gt_sem_seg(self, value: PixelData) -> None: + self.set_field(value, '_gt_sem_seg', dtype=PixelData) + + @gt_sem_seg.deleter + def gt_sem_seg(self) -> None: + del self._gt_sem_seg + + @property + def pred_sem_seg(self) -> PixelData: + return self._pred_sem_seg + + @pred_sem_seg.setter + def pred_sem_seg(self, value: PixelData) -> None: + self.set_field(value, '_pred_sem_seg', dtype=PixelData) + + @pred_sem_seg.deleter + def pred_sem_seg(self) -> None: + del self._pred_sem_seg + + @property + def seg_logits(self) -> PixelData: + return self._seg_logits + + @seg_logits.setter + def seg_logits(self, value: PixelData) -> None: + self.set_field(value, '_seg_logits', dtype=PixelData) + + @seg_logits.deleter + def seg_logits(self) -> None: + del self._seg_logits diff --git a/mmseg/utils/__init__.py b/mmseg/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0a2af58c6e0316d6f961df81160f3fc61a8a29e3 --- /dev/null +++ b/mmseg/utils/__init__.py @@ -0,0 +1,70 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# yapf: disable +from .class_names import (ade_classes, ade_palette, bdd100k_classes, + bdd100k_palette, cityscapes_classes, + cityscapes_palette, cocostuff_classes, + cocostuff_palette, dataset_aliases, get_classes, + get_palette, isaid_classes, isaid_palette, + loveda_classes, loveda_palette, potsdam_classes, + potsdam_palette, stare_classes, stare_palette, + synapse_classes, synapse_palette, vaihingen_classes, + vaihingen_palette, voc_classes, voc_palette) +# yapf: enable +from .collect_env import collect_env +from .get_templates import get_predefined_templates +from .io import datafrombytes +from .misc import add_prefix, stack_batch +from .set_env import register_all_modules +from .tokenizer import tokenize +from .typing_utils import (ConfigType, ForwardResults, MultiConfig, + OptConfigType, OptMultiConfig, OptSampleList, + SampleList, TensorDict, TensorList) + +# isort: off +from .mask_classification import MatchMasks, seg_data_to_instance_data + +__all__ = [ + 'collect_env', + 'register_all_modules', + 'stack_batch', + 'add_prefix', + 'ConfigType', + 'OptConfigType', + 'MultiConfig', + 'OptMultiConfig', + 'SampleList', + 'OptSampleList', + 'TensorDict', + 'TensorList', + 'ForwardResults', + 'cityscapes_classes', + 'ade_classes', + 'voc_classes', + 'cocostuff_classes', + 'loveda_classes', + 'potsdam_classes', + 'vaihingen_classes', + 'isaid_classes', + 'stare_classes', + 'cityscapes_palette', + 'ade_palette', + 'voc_palette', + 'cocostuff_palette', + 'loveda_palette', + 'potsdam_palette', + 'vaihingen_palette', + 'isaid_palette', + 'stare_palette', + 'dataset_aliases', + 'get_classes', + 'get_palette', + 'datafrombytes', + 'synapse_palette', + 'synapse_classes', + 'get_predefined_templates', + 'tokenize', + 'seg_data_to_instance_data', + 'MatchMasks', + 'bdd100k_classes', + 'bdd100k_palette', +] diff --git a/mmseg/utils/__pycache__/__init__.cpython-39.pyc b/mmseg/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa8c73988ad26fae27b6b26ea5f84d1764f4f754 Binary files /dev/null and b/mmseg/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/utils/__pycache__/class_names.cpython-39.pyc b/mmseg/utils/__pycache__/class_names.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e7b9895b3cd66c958a118da86cd1ee5826fc137 Binary files /dev/null and b/mmseg/utils/__pycache__/class_names.cpython-39.pyc differ diff --git a/mmseg/utils/__pycache__/collect_env.cpython-39.pyc b/mmseg/utils/__pycache__/collect_env.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac97927329c8d80c233a6c01826913024a6db8f6 Binary files /dev/null and b/mmseg/utils/__pycache__/collect_env.cpython-39.pyc differ diff --git a/mmseg/utils/__pycache__/get_templates.cpython-39.pyc b/mmseg/utils/__pycache__/get_templates.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94c64444a81872a81239ca6e98a0e3f80aed33f8 Binary files /dev/null and b/mmseg/utils/__pycache__/get_templates.cpython-39.pyc differ diff --git a/mmseg/utils/__pycache__/io.cpython-39.pyc b/mmseg/utils/__pycache__/io.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ca798cae6b68e557ba9463c3dde7fc4a8b7ba49 Binary files /dev/null and b/mmseg/utils/__pycache__/io.cpython-39.pyc differ diff --git a/mmseg/utils/__pycache__/mask_classification.cpython-39.pyc b/mmseg/utils/__pycache__/mask_classification.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0cc4090f334ed52c9858b4c83b002f478230b8fb Binary files /dev/null and b/mmseg/utils/__pycache__/mask_classification.cpython-39.pyc differ diff --git a/mmseg/utils/__pycache__/misc.cpython-39.pyc b/mmseg/utils/__pycache__/misc.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ba0b72223d633105c5958aca99b162cc45418dc Binary files /dev/null and b/mmseg/utils/__pycache__/misc.cpython-39.pyc differ diff --git a/mmseg/utils/__pycache__/set_env.cpython-39.pyc b/mmseg/utils/__pycache__/set_env.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e7942499be64cab392acdfab83edea0ec31e0f9 Binary files /dev/null and b/mmseg/utils/__pycache__/set_env.cpython-39.pyc differ diff --git a/mmseg/utils/__pycache__/tokenizer.cpython-39.pyc b/mmseg/utils/__pycache__/tokenizer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc7d192ed9fe63ebc597db5088eb612ce7c999f0 Binary files /dev/null and b/mmseg/utils/__pycache__/tokenizer.cpython-39.pyc differ diff --git a/mmseg/utils/__pycache__/typing_utils.cpython-39.pyc b/mmseg/utils/__pycache__/typing_utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a559269b30fabb8b8efe9f606898632579a9936e Binary files /dev/null and b/mmseg/utils/__pycache__/typing_utils.cpython-39.pyc differ diff --git a/mmseg/utils/bpe_simple_vocab_16e6.txt.gz b/mmseg/utils/bpe_simple_vocab_16e6.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a15856e00a06a9fbed8cdd34d2393fea4a3113 --- /dev/null +++ b/mmseg/utils/bpe_simple_vocab_16e6.txt.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924691ac288e54409236115652ad4aa250f48203de50a9e4722a6ecd48d6804a +size 1356917 diff --git a/mmseg/utils/class_names.py b/mmseg/utils/class_names.py new file mode 100644 index 0000000000000000000000000000000000000000..5ab35f99dcabd886b40e88188d9395fff557ffc2 --- /dev/null +++ b/mmseg/utils/class_names.py @@ -0,0 +1,529 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.utils import is_str + + +def cityscapes_classes(): + """Cityscapes class names for external use.""" + return [ + 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle' + ] + + +def ade_classes(): + """ADE20K class names for external use.""" + return [ + 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', + 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', + 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', + 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', + 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', + 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', + 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', + 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', + 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', + 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', + 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', + 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', + 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', + 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', + 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', + 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', + 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', + 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', + 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag' + ] + + +def voc_classes(): + """Pascal VOC class names for external use.""" + return [ + 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', + 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor' + ] + + +def pcontext_classes(): + """Pascal Context class names for external use.""" + return [ + 'aeroplane', 'bag', 'bed', 'bedclothes', 'bench', 'bicycle', 'bird', + 'boat', 'book', 'bottle', 'building', 'bus', 'cabinet', 'car', 'cat', + 'ceiling', 'chair', 'cloth', 'computer', 'cow', 'cup', 'curtain', + 'dog', 'door', 'fence', 'floor', 'flower', 'food', 'grass', 'ground', + 'horse', 'keyboard', 'light', 'motorbike', 'mountain', 'mouse', + 'person', 'plate', 'platform', 'pottedplant', 'road', 'rock', 'sheep', + 'shelves', 'sidewalk', 'sign', 'sky', 'snow', 'sofa', 'table', 'track', + 'train', 'tree', 'truck', 'tvmonitor', 'wall', 'water', 'window', + 'wood' + ] + + +def cocostuff_classes(): + """CocoStuff class names for external use.""" + return [ + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', + 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', + 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', + 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', + 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', + 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', + 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', + 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', + 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', + 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', + 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', + 'scissors', 'teddy bear', 'hair drier', 'toothbrush', 'banner', + 'blanket', 'branch', 'bridge', 'building-other', 'bush', 'cabinet', + 'cage', 'cardboard', 'carpet', 'ceiling-other', 'ceiling-tile', + 'cloth', 'clothes', 'clouds', 'counter', 'cupboard', 'curtain', + 'desk-stuff', 'dirt', 'door-stuff', 'fence', 'floor-marble', + 'floor-other', 'floor-stone', 'floor-tile', 'floor-wood', 'flower', + 'fog', 'food-other', 'fruit', 'furniture-other', 'grass', 'gravel', + 'ground-other', 'hill', 'house', 'leaves', 'light', 'mat', 'metal', + 'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net', 'paper', + 'pavement', 'pillow', 'plant-other', 'plastic', 'platform', + 'playingfield', 'railing', 'railroad', 'river', 'road', 'rock', 'roof', + 'rug', 'salad', 'sand', 'sea', 'shelf', 'sky-other', 'skyscraper', + 'snow', 'solid-other', 'stairs', 'stone', 'straw', 'structural-other', + 'table', 'tent', 'textile-other', 'towel', 'tree', 'vegetable', + 'wall-brick', 'wall-concrete', 'wall-other', 'wall-panel', + 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', 'waterdrops', + 'window-blind', 'window-other', 'wood' + ] + + +def loveda_classes(): + """LoveDA class names for external use.""" + return [ + 'background', 'building', 'road', 'water', 'barren', 'forest', + 'agricultural' + ] + + +def potsdam_classes(): + """Potsdam class names for external use.""" + return [ + 'impervious_surface', 'building', 'low_vegetation', 'tree', 'car', + 'clutter' + ] + + +def vaihingen_classes(): + """Vaihingen class names for external use.""" + return [ + 'impervious_surface', 'building', 'low_vegetation', 'tree', 'car', + 'clutter' + ] + + +def isaid_classes(): + """iSAID class names for external use.""" + return [ + 'background', 'ship', 'store_tank', 'baseball_diamond', 'tennis_court', + 'basketball_court', 'Ground_Track_Field', 'Bridge', 'Large_Vehicle', + 'Small_Vehicle', 'Helicopter', 'Swimming_pool', 'Roundabout', + 'Soccer_ball_field', 'plane', 'Harbor' + ] + + +def stare_classes(): + """stare class names for external use.""" + return ['background', 'vessel'] + + +def mapillary_v1_classes(): + """mapillary_v1 class names for external use.""" + return [ + 'Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier', + 'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', + 'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane', 'Sidewalk', + 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', + 'Other Rider', 'Lane Marking - Crosswalk', 'Lane Marking - General', + 'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation', 'Water', + 'Banner', 'Bench', 'Bike Rack', 'Billboard', 'Catch Basin', + 'CCTV Camera', 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', + 'Phone Booth', 'Pothole', 'Street Light', 'Pole', 'Traffic Sign Frame', + 'Utility Pole', 'Traffic Light', 'Traffic Sign (Back)', + 'Traffic Sign (Front)', 'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', + 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', + 'Truck', 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled' + ] + + +def mapillary_v1_palette(): + """mapillary_v1_ palette for external use.""" + return [[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], + [180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255], + [140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232], + [150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60], + [255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128], + [255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180], + [190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30], + [255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220], + [220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40], + [33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150], + [210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80], + [250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20], + [119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142], + [0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], + [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10], [0, 0, 0]] + + +def mapillary_v2_classes(): + """mapillary_v2 class names for external use.""" + return [ + 'Bird', 'Ground Animal', 'Ambiguous Barrier', 'Concrete Block', 'Curb', + 'Fence', 'Guard Rail', 'Barrier', 'Road Median', 'Road Side', + 'Lane Separator', 'Temporary Barrier', 'Wall', 'Bike Lane', + 'Crosswalk - Plain', 'Curb Cut', 'Driveway', 'Parking', + 'Parking Aisle', 'Pedestrian Area', 'Rail Track', 'Road', + 'Road Shoulder', 'Service Lane', 'Sidewalk', 'Traffic Island', + 'Bridge', 'Building', 'Garage', 'Tunnel', 'Person', 'Person Group', + 'Bicyclist', 'Motorcyclist', 'Other Rider', + 'Lane Marking - Dashed Line', 'Lane Marking - Straight Line', + 'Lane Marking - Zigzag Line', 'Lane Marking - Ambiguous', + 'Lane Marking - Arrow (Left)', 'Lane Marking - Arrow (Other)', + 'Lane Marking - Arrow (Right)', + 'Lane Marking - Arrow (Split Left or Straight)', + 'Lane Marking - Arrow (Split Right or Straight)', + 'Lane Marking - Arrow (Straight)', 'Lane Marking - Crosswalk', + 'Lane Marking - Give Way (Row)', 'Lane Marking - Give Way (Single)', + 'Lane Marking - Hatched (Chevron)', + 'Lane Marking - Hatched (Diagonal)', 'Lane Marking - Other', + 'Lane Marking - Stop Line', 'Lane Marking - Symbol (Bicycle)', + 'Lane Marking - Symbol (Other)', 'Lane Marking - Text', + 'Lane Marking (only) - Dashed Line', 'Lane Marking (only) - Crosswalk', + 'Lane Marking (only) - Other', 'Lane Marking (only) - Test', + 'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation', 'Water', + 'Banner', 'Bench', 'Bike Rack', 'Catch Basin', 'CCTV Camera', + 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', 'Parking Meter', + 'Phone Booth', 'Pothole', 'Signage - Advertisement', + 'Signage - Ambiguous', 'Signage - Back', 'Signage - Information', + 'Signage - Other', 'Signage - Store', 'Street Light', 'Pole', + 'Pole Group', 'Traffic Sign Frame', 'Utility Pole', 'Traffic Cone', + 'Traffic Light - General (Single)', 'Traffic Light - Pedestrians', + 'Traffic Light - General (Upright)', + 'Traffic Light - General (Horizontal)', 'Traffic Light - Cyclists', + 'Traffic Light - Other', 'Traffic Sign - Ambiguous', + 'Traffic Sign (Back)', 'Traffic Sign - Direction (Back)', + 'Traffic Sign - Direction (Front)', 'Traffic Sign (Front)', + 'Traffic Sign - Parking', 'Traffic Sign - Temporary (Back)', + 'Traffic Sign - Temporary (Front)', 'Trash Can', 'Bicycle', 'Boat', + 'Bus', 'Car', 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', + 'Trailer', 'Truck', 'Vehicle Group', 'Wheeled Slow', 'Water Valve', + 'Car Mount', 'Dynamic', 'Ego Vehicle', 'Ground', 'Static', 'Unlabeled' + ] + + +def mapillary_v2_palette(): + """mapillary_v2_ palette for external use.""" + return [[165, 42, 42], [0, 192, 0], [250, 170, 31], [250, 170, 32], + [196, 196, 196], [190, 153, 153], [180, 165, 180], [90, 120, 150], + [250, 170, 33], [250, 170, 34], [128, 128, 128], [250, 170, 35], + [102, 102, 156], [128, 64, 255], [140, 140, 200], [170, 170, 170], + [250, 170, 36], [250, 170, 160], [250, 170, 37], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], [110, 110, 110], + [244, 35, 232], [128, 196, 128], [150, 100, 100], [70, 70, 70], + [150, 150, 150], [150, 120, 90], [220, 20, 60], [220, 20, 60], + [255, 0, 0], [255, 0, 100], [255, 0, 200], [255, 255, 255], + [255, 255, 255], [250, 170, 29], [250, 170, 28], [250, 170, 26], + [250, 170, 25], [250, 170, 24], [250, 170, 22], [250, 170, 21], + [250, 170, 20], [255, 255, 255], [250, 170, 19], [250, 170, 18], + [250, 170, 12], [250, 170, 11], [255, 255, 255], [255, 255, 255], + [250, 170, 16], [250, 170, 15], [250, 170, 15], [255, 255, 255], + [255, 255, 255], [255, 255, 255], [255, 255, 255], [64, 170, 64], + [230, 160, 50], [70, 130, 180], [190, 255, 255], [152, 251, 152], + [107, 142, 35], [0, 170, 30], [255, 255, 128], [250, 0, 30], + [100, 140, 180], [220, 128, 128], [222, 40, 40], [100, 170, 30], + [40, 40, 40], [33, 33, 33], [100, 128, 160], [20, 20, 255], + [142, 0, 0], [70, 100, 150], [250, 171, 30], [250, 172, 30], + [250, 173, 30], [250, 174, 30], [250, 175, 30], [250, 176, 30], + [210, 170, 100], [153, 153, 153], [153, 153, 153], [128, 128, 128], + [0, 0, 80], [210, 60, 60], [250, 170, 30], [250, 170, 30], + [250, 170, 30], [250, 170, 30], [250, 170, 30], [250, 170, 30], + [192, 192, 192], [192, 192, 192], [192, 192, 192], [220, 220, 0], + [220, 220, 0], [0, 0, 196], [192, 192, 192], [220, 220, 0], + [140, 140, 20], [119, 11, 32], [150, 0, 255], [0, 60, 100], + [0, 0, 142], [0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], + [0, 0, 110], [0, 0, 70], [0, 0, 142], [0, 0, 192], [170, 170, 170], + [32, 32, 32], [111, 74, 0], [120, 10, 10], [81, 0, 81], + [111, 111, 0], [0, 0, 0]] + + +def cityscapes_palette(): + """Cityscapes palette for external use.""" + return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100], + [0, 0, 230], [119, 11, 32]] + + +def ade_palette(): + """ADE20K palette for external use.""" + return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]] + + +def voc_palette(): + """Pascal VOC palette for external use.""" + return [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + +def pcontext_palette(): + """Pascal Context palette for external use.""" + return [[180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], + [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], + [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], + [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], + [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], + [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], + [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], + [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], + [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], + [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], + [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], + [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], + [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], + [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], + [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + +def cocostuff_palette(): + """CocoStuff palette for external use.""" + return [[0, 192, 64], [0, 192, 64], [0, 64, 96], [128, 192, 192], + [0, 64, 64], [0, 192, 224], [0, 192, 192], [128, 192, 64], + [0, 192, 96], [128, 192, 64], [128, 32, 192], [0, 0, 224], + [0, 0, 64], [0, 160, 192], [128, 0, 96], [128, 0, 192], + [0, 32, 192], [128, 128, 224], [0, 0, 192], [128, 160, 192], + [128, 128, 0], [128, 0, 32], [128, 32, 0], [128, 0, 128], + [64, 128, 32], [0, 160, 0], [0, 0, 0], [192, 128, 160], [0, 32, 0], + [0, 128, 128], [64, 128, 160], [128, 160, 0], [0, 128, 0], + [192, 128, 32], [128, 96, 128], [0, 0, 128], [64, 0, 32], + [0, 224, 128], [128, 0, 0], [192, 0, 160], [0, 96, 128], + [128, 128, 128], [64, 0, 160], [128, 224, 128], [128, 128, 64], + [192, 0, 32], [128, 96, 0], [128, 0, 192], [0, 128, 32], + [64, 224, 0], [0, 0, 64], [128, 128, 160], [64, 96, 0], + [0, 128, 192], [0, 128, 160], [192, 224, 0], [0, 128, 64], + [128, 128, 32], [192, 32, 128], [0, 64, 192], [0, 0, 32], + [64, 160, 128], [128, 64, 64], [128, 0, 160], [64, 32, 128], + [128, 192, 192], [0, 0, 160], [192, 160, 128], [128, 192, 0], + [128, 0, 96], [192, 32, 0], [128, 64, 128], [64, 128, 96], + [64, 160, 0], [0, 64, 0], [192, 128, 224], [64, 32, 0], + [0, 192, 128], [64, 128, 224], [192, 160, 0], [0, 192, 0], + [192, 128, 96], [192, 96, 128], [0, 64, 128], [64, 0, 96], + [64, 224, 128], [128, 64, 0], [192, 0, 224], [64, 96, 128], + [128, 192, 128], [64, 0, 224], [192, 224, 128], [128, 192, 64], + [192, 0, 96], [192, 96, 0], [128, 64, 192], [0, 128, 96], + [0, 224, 0], [64, 64, 64], [128, 128, 224], [0, 96, 0], + [64, 192, 192], [0, 128, 224], [128, 224, 0], [64, 192, 64], + [128, 128, 96], [128, 32, 128], [64, 0, 192], [0, 64, 96], + [0, 160, 128], [192, 0, 64], [128, 64, 224], [0, 32, 128], + [192, 128, 192], [0, 64, 224], [128, 160, 128], [192, 128, 0], + [128, 64, 32], [128, 32, 64], [192, 0, 128], [64, 192, 32], + [0, 160, 64], [64, 0, 0], [192, 192, 160], [0, 32, 64], + [64, 128, 128], [64, 192, 160], [128, 160, 64], [64, 128, 0], + [192, 192, 32], [128, 96, 192], [64, 0, 128], [64, 64, 32], + [0, 224, 192], [192, 0, 0], [192, 64, 160], [0, 96, 192], + [192, 128, 128], [64, 64, 160], [128, 224, 192], [192, 128, 64], + [192, 64, 32], [128, 96, 64], [192, 0, 192], [0, 192, 32], + [64, 224, 64], [64, 0, 64], [128, 192, 160], [64, 96, 64], + [64, 128, 192], [0, 192, 160], [192, 224, 64], [64, 128, 64], + [128, 192, 32], [192, 32, 192], [64, 64, 192], [0, 64, 32], + [64, 160, 192], [192, 64, 64], [128, 64, 160], [64, 32, 192], + [192, 192, 192], [0, 64, 160], [192, 160, 192], [192, 192, 0], + [128, 64, 96], [192, 32, 64], [192, 64, 128], [64, 192, 96], + [64, 160, 64], [64, 64, 0]] + + +def loveda_palette(): + """LoveDA palette for external use.""" + return [[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255], + [159, 129, 183], [0, 255, 0], [255, 195, 128]] + + +def potsdam_palette(): + """Potsdam palette for external use.""" + return [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], + [255, 255, 0], [255, 0, 0]] + + +def vaihingen_palette(): + """Vaihingen palette for external use.""" + return [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], + [255, 255, 0], [255, 0, 0]] + + +def isaid_palette(): + """iSAID palette for external use.""" + return [[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127], + [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, + 127], [0, 0, 127], + [0, 0, 191], [0, 0, 255], [0, 191, 127], [0, 127, 191], + [0, 127, 255], [0, 100, 155]] + + +def stare_palette(): + """STARE palette for external use.""" + return [[120, 120, 120], [6, 230, 230]] + + +def synapse_palette(): + """Synapse palette for external use.""" + return [[0, 0, 0], [0, 0, 255], [0, 255, 0], [255, 0, 0], [0, 255, 255], + [255, 0, 255], [255, 255, 0], [60, 255, 255], [240, 240, 240]] + + +def synapse_classes(): + """Synapse class names for external use.""" + return [ + 'background', 'aorta', 'gallbladder', 'left_kidney', 'right_kidney', + 'liver', 'pancreas', 'spleen', 'stomach' + ] + + +def lip_classes(): + """LIP class names for external use.""" + return [ + 'background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', + 'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', + 'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe', + 'rightShoe' + ] + + +def lip_palette(): + """LIP palette for external use.""" + return [ + 'Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'UpperClothes', + 'Dress', 'Coat', 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', + 'Face', 'Left-arm', 'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe', + 'Right-shoe' + ] + + +def bdd100k_classes(): + """BDD100K class names for external use(the class name is compatible with + Cityscapes ).""" + return [ + 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle' + ] + + +def bdd100k_palette(): + """bdd100k palette for external use(same with cityscapes)""" + return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100], + [0, 0, 230], [119, 11, 32]] + + +dataset_aliases = { + 'cityscapes': ['cityscapes'], + 'ade': ['ade', 'ade20k'], + 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'], + 'pcontext': ['pcontext', 'pascal_context', 'voc2010'], + 'loveda': ['loveda'], + 'potsdam': ['potsdam'], + 'vaihingen': ['vaihingen'], + 'cocostuff': [ + 'cocostuff', 'cocostuff10k', 'cocostuff164k', 'coco-stuff', + 'coco-stuff10k', 'coco-stuff164k', 'coco_stuff', 'coco_stuff10k', + 'coco_stuff164k' + ], + 'isaid': ['isaid', 'iSAID'], + 'stare': ['stare', 'STARE'], + 'lip': ['LIP', 'lip'], + 'mapillary_v1': ['mapillary_v1'], + 'mapillary_v2': ['mapillary_v2'], + 'bdd100k': ['bdd100k'] +} + + +def get_classes(dataset): + """Get class names of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_classes()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels + + +def get_palette(dataset): + """Get class palette (RGB) of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_palette()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels diff --git a/mmseg/utils/collect_env.py b/mmseg/utils/collect_env.py new file mode 100644 index 0000000000000000000000000000000000000000..d5d6ea290283e3af2f29475f82d225072cf39d99 --- /dev/null +++ b/mmseg/utils/collect_env.py @@ -0,0 +1,18 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.utils import get_git_hash +from mmengine.utils.dl_utils import collect_env as collect_base_env + +import mmseg + + +def collect_env(): + """Collect the information of the running environments.""" + env_info = collect_base_env() + env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' + + return env_info + + +if __name__ == '__main__': + for name, val in collect_env().items(): + print(f'{name}: {val}') diff --git a/mmseg/utils/get_templates.py b/mmseg/utils/get_templates.py new file mode 100644 index 0000000000000000000000000000000000000000..7e9032ba96cbe750134676fe46fc26fb607779f5 --- /dev/null +++ b/mmseg/utils/get_templates.py @@ -0,0 +1,109 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List + +PREDEFINED_TEMPLATES = { + 'imagenet': [ + 'a bad photo of a {}.', + 'a photo of many {}.', + 'a sculpture of a {}.', + 'a photo of the hard to see {}.', + 'a low resolution photo of the {}.', + 'a rendering of a {}.', + 'graffiti of a {}.', + 'a bad photo of the {}.', + 'a cropped photo of the {}.', + 'a tattoo of a {}.', + 'the embroidered {}.', + 'a photo of a hard to see {}.', + 'a bright photo of a {}.', + 'a photo of a clean {}.', + 'a photo of a dirty {}.', + 'a dark photo of the {}.', + 'a drawing of a {}.', + 'a photo of my {}.', + 'the plastic {}.', + 'a photo of the cool {}.', + 'a close-up photo of a {}.', + 'a black and white photo of the {}.', + 'a painting of the {}.', + 'a painting of a {}.', + 'a pixelated photo of the {}.', + 'a sculpture of the {}.', + 'a bright photo of the {}.', + 'a cropped photo of a {}.', + 'a plastic {}.', + 'a photo of the dirty {}.', + 'a jpeg corrupted photo of a {}.', + 'a blurry photo of the {}.', + 'a photo of the {}.', + 'a good photo of the {}.', + 'a rendering of the {}.', + 'a {} in a video game.', + 'a photo of one {}.', + 'a doodle of a {}.', + 'a close-up photo of the {}.', + 'a photo of a {}.', + 'the origami {}.', + 'the {} in a video game.', + 'a sketch of a {}.', + 'a doodle of the {}.', + 'a origami {}.', + 'a low resolution photo of a {}.', + 'the toy {}.', + 'a rendition of the {}.', + 'a photo of the clean {}.', + 'a photo of a large {}.', + 'a rendition of a {}.', + 'a photo of a nice {}.', + 'a photo of a weird {}.', + 'a blurry photo of a {}.', + 'a cartoon {}.', + 'art of a {}.', + 'a sketch of the {}.', + 'a embroidered {}.', + 'a pixelated photo of a {}.', + 'itap of the {}.', + 'a jpeg corrupted photo of the {}.', + 'a good photo of a {}.', + 'a plushie {}.', + 'a photo of the nice {}.', + 'a photo of the small {}.', + 'a photo of the weird {}.', + 'the cartoon {}.', + 'art of the {}.', + 'a drawing of the {}.', + 'a photo of the large {}.', + 'a black and white photo of a {}.', + 'the plushie {}.', + 'a dark photo of a {}.', + 'itap of a {}.', + 'graffiti of the {}.', + 'a toy {}.', + 'itap of my {}.', + 'a photo of a cool {}.', + 'a photo of a small {}.', + 'a tattoo of the {}.', + ], + 'vild': [ + 'a photo of a {}.', + 'This is a photo of a {}', + 'There is a {} in the scene', + 'There is the {} in the scene', + 'a photo of a {} in the scene', + 'a photo of a small {}.', + 'a photo of a medium {}.', + 'a photo of a large {}.', + 'This is a photo of a small {}.', + 'This is a photo of a medium {}.', + 'This is a photo of a large {}.', + 'There is a small {} in the scene.', + 'There is a medium {} in the scene.', + 'There is a large {} in the scene.', + ], +} + + +def get_predefined_templates(template_set_name: str) -> List[str]: + if template_set_name not in PREDEFINED_TEMPLATES: + raise ValueError(f'Template set {template_set_name} not found') + return PREDEFINED_TEMPLATES[template_set_name] diff --git a/mmseg/utils/io.py b/mmseg/utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..7029c3cddda02c89cbb50cee9f8b7e7fa57378d9 --- /dev/null +++ b/mmseg/utils/io.py @@ -0,0 +1,42 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import gzip +import io +import pickle + +import cv2 +import numpy as np + + +def datafrombytes(content: bytes, backend: str = 'numpy') -> np.ndarray: + """Data decoding from bytes. + + Args: + content (bytes): The data bytes got from files or other streams. + backend (str): The data decoding backend type. Options are 'numpy', + 'nifti', 'cv2' and 'pickle'. Defaults to 'numpy'. + + Returns: + numpy.ndarray: Loaded data array. + """ + if backend == 'pickle': + data = pickle.loads(content) + else: + with io.BytesIO(content) as f: + if backend == 'nifti': + f = gzip.open(f) + try: + from nibabel import FileHolder, Nifti1Image + except ImportError: + print('nifti files io depends on nibabel, please run' + '`pip install nibabel` to install it') + fh = FileHolder(fileobj=f) + data = Nifti1Image.from_file_map({'header': fh, 'image': fh}) + data = Nifti1Image.from_bytes(data.to_bytes()).get_fdata() + elif backend == 'numpy': + data = np.load(f) + elif backend == 'cv2': + data = np.frombuffer(f.read(), dtype=np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_UNCHANGED) + else: + raise ValueError + return data diff --git a/mmseg/utils/mask_classification.py b/mmseg/utils/mask_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..205d5259754abfe07e0d84ae0739cf08043815ff --- /dev/null +++ b/mmseg/utils/mask_classification.py @@ -0,0 +1,205 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Tuple + +import torch +from mmcv.ops import point_sample +from mmengine.structures import InstanceData +from torch import Tensor + +from mmseg.registry import TASK_UTILS +from mmseg.utils import ConfigType, SampleList + + +def seg_data_to_instance_data(ignore_index: int, + batch_data_samples: SampleList): + """Convert the paradigm of ground truth from semantic segmentation to + instance segmentation. + + Args: + ignore_index (int): The label index to be ignored. + batch_data_samples (List[SegDataSample]): The Data + Samples. It usually includes information such as + `gt_sem_seg`. + + Returns: + tuple[Tensor]: A tuple contains two lists. + - batch_gt_instances (List[InstanceData]): Batch of + gt_instance. It usually includes ``labels``, each is + unique ground truth label id of images, with + shape (num_gt, ) and ``masks``, each is ground truth + masks of each instances of a image, shape (num_gt, h, w). + - batch_img_metas (List[Dict]): List of image meta information. + """ + batch_gt_instances = [] + + for data_sample in batch_data_samples: + gt_sem_seg = data_sample.gt_sem_seg.data + classes = torch.unique( + gt_sem_seg, + sorted=False, + return_inverse=False, + return_counts=False) + + # remove ignored region + gt_labels = classes[classes != ignore_index] + + masks = [] + for class_id in gt_labels: + masks.append(gt_sem_seg == class_id) + + if len(masks) == 0: + gt_masks = torch.zeros( + (0, gt_sem_seg.shape[-2], + gt_sem_seg.shape[-1])).to(gt_sem_seg).long() + else: + gt_masks = torch.stack(masks).squeeze(1).long() + + instance_data = InstanceData(labels=gt_labels, masks=gt_masks) + batch_gt_instances.append(instance_data) + return batch_gt_instances + + +class MatchMasks: + """Match the predictions to category labels. + + Args: + num_points (int): the number of sampled points to compute cost. + num_queries (int): the number of prediction masks. + num_classes (int): the number of classes. + assigner (BaseAssigner): the assigner to compute matching. + """ + + def __init__(self, + num_points: int, + num_queries: int, + num_classes: int, + assigner: ConfigType = None): + assert assigner is not None, "\'assigner\' in decode_head.train_cfg" \ + 'cannot be None' + assert num_points > 0, 'num_points should be a positive integer.' + self.num_points = num_points + self.num_queries = num_queries + self.num_classes = num_classes + self.assigner = TASK_UTILS.build(assigner) + + def get_targets(self, cls_scores: List[Tensor], mask_preds: List[Tensor], + batch_gt_instances: List[InstanceData]) -> Tuple: + """Compute best mask matches for all images for a decoder layer. + + Args: + cls_scores (List[Tensor]): Mask score logits from a single + decoder layer for all images. Each with shape (num_queries, + cls_out_channels). + mask_preds (List[Tensor]): Mask logits from a single decoder + layer for all images. Each with shape (num_queries, h, w). + batch_gt_instances (List[InstanceData]): each contains + ``labels`` and ``masks``. + + Returns: + tuple: a tuple containing the following targets. + + - labels (List[Tensor]): Labels of all images.\ + Each with shape (num_queries, ). + - mask_targets (List[Tensor]): Mask targets of\ + all images. Each with shape (num_queries, h, w). + - mask_weights (List[Tensor]): Mask weights of\ + all images. Each with shape (num_queries, ). + - avg_factor (int): Average factor that is used to + average the loss. `avg_factor` is usually equal + to the number of positive priors. + """ + batch_size = cls_scores.shape[0] + results = dict({ + 'labels': [], + 'mask_targets': [], + 'mask_weights': [], + }) + for i in range(batch_size): + labels, mask_targets, mask_weights\ + = self._get_targets_single(cls_scores[i], + mask_preds[i], + batch_gt_instances[i]) + results['labels'].append(labels) + results['mask_targets'].append(mask_targets) + results['mask_weights'].append(mask_weights) + + # shape (batch_size, num_queries) + labels = torch.stack(results['labels'], dim=0) + # shape (batch_size, num_gts, h, w) + mask_targets = torch.cat(results['mask_targets'], dim=0) + # shape (batch_size, num_queries) + mask_weights = torch.stack(results['mask_weights'], dim=0) + + avg_factor = sum( + [len(gt_instances.labels) for gt_instances in batch_gt_instances]) + + res = (labels, mask_targets, mask_weights, avg_factor) + + return res + + def _get_targets_single(self, cls_score: Tensor, mask_pred: Tensor, + gt_instances: InstanceData) \ + -> Tuple[Tensor, Tensor, Tensor]: + """Compute a set of best mask matches for one image. + + Args: + cls_score (Tensor): Mask score logits from a single decoder layer + for one image. Shape (num_queries, cls_out_channels). + mask_pred (Tensor): Mask logits for a single decoder layer for one + image. Shape (num_queries, h, w). + gt_instances (:obj:`InstanceData`): It contains ``labels`` and + ``masks``. + + Returns: + tuple[Tensor]: A tuple containing the following for one image. + + - labels (Tensor): Labels of each image. \ + shape (num_queries, ). + - mask_targets (Tensor): Mask targets of each image. \ + shape (num_queries, h, w). + - mask_weights (Tensor): Mask weights of each image. \ + shape (num_queries, ). + """ + gt_labels = gt_instances.labels + gt_masks = gt_instances.masks + # when "gt_labels" is empty, classify all queries to background + if len(gt_labels) == 0: + labels = gt_labels.new_full((self.num_queries, ), + self.num_classes, + dtype=torch.long) + mask_targets = gt_labels + mask_weights = gt_labels.new_zeros((self.num_queries, )) + return labels, mask_targets, mask_weights + # sample points + num_queries = cls_score.shape[0] + num_gts = gt_labels.shape[0] + + point_coords = torch.rand((1, self.num_points, 2), + device=cls_score.device) + # shape (num_queries, num_points) + mask_points_pred = point_sample( + mask_pred.unsqueeze(1), point_coords.repeat(num_queries, 1, + 1)).squeeze(1) + # shape (num_gts, num_points) + gt_points_masks = point_sample( + gt_masks.unsqueeze(1).float(), point_coords.repeat(num_gts, 1, + 1)).squeeze(1) + + sampled_gt_instances = InstanceData( + labels=gt_labels, masks=gt_points_masks) + sampled_pred_instances = InstanceData( + scores=cls_score, masks=mask_points_pred) + # assign and sample + matched_quiery_inds, matched_label_inds = self.assigner.assign( + pred_instances=sampled_pred_instances, + gt_instances=sampled_gt_instances) + labels = gt_labels.new_full((self.num_queries, ), + self.num_classes, + dtype=torch.long) + labels[matched_quiery_inds] = gt_labels[matched_label_inds] + + mask_weights = gt_labels.new_zeros((self.num_queries, )) + mask_weights[matched_quiery_inds] = 1 + mask_targets = gt_masks[matched_label_inds] + + return labels, mask_targets, mask_weights diff --git a/mmseg/utils/misc.py b/mmseg/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..dfc469e8320d375135846cfb0474a0fc8d9b15d0 --- /dev/null +++ b/mmseg/utils/misc.py @@ -0,0 +1,128 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Optional, Union + +import numpy as np +import torch +import torch.nn.functional as F + +from .typing_utils import SampleList + + +def add_prefix(inputs, prefix): + """Add prefix for dict. + + Args: + inputs (dict): The input dict with str keys. + prefix (str): The prefix to add. + + Returns: + + dict: The dict with keys updated with ``prefix``. + """ + + outputs = dict() + for name, value in inputs.items(): + outputs[f'{prefix}.{name}'] = value + + return outputs + + +def stack_batch(inputs: List[torch.Tensor], + data_samples: Optional[SampleList] = None, + size: Optional[tuple] = None, + size_divisor: Optional[int] = None, + pad_val: Union[int, float] = 0, + seg_pad_val: Union[int, float] = 255) -> torch.Tensor: + """Stack multiple inputs to form a batch and pad the images and gt_sem_segs + to the max shape use the right bottom padding mode. + + Args: + inputs (List[Tensor]): The input multiple tensors. each is a + CHW 3D-tensor. + data_samples (list[:obj:`SegDataSample`]): The list of data samples. + It usually includes information such as `gt_sem_seg`. + size (tuple, optional): Fixed padding size. + size_divisor (int, optional): The divisor of padded size. + pad_val (int, float): The padding value. Defaults to 0 + seg_pad_val (int, float): The padding value. Defaults to 255 + + Returns: + Tensor: The 4D-tensor. + List[:obj:`SegDataSample`]: After the padding of the gt_seg_map. + """ + assert isinstance(inputs, list), \ + f'Expected input type to be list, but got {type(inputs)}' + assert len({tensor.ndim for tensor in inputs}) == 1, \ + f'Expected the dimensions of all inputs must be the same, ' \ + f'but got {[tensor.ndim for tensor in inputs]}' + assert inputs[0].ndim == 3, f'Expected tensor dimension to be 3, ' \ + f'but got {inputs[0].ndim}' + assert len({tensor.shape[0] for tensor in inputs}) == 1, \ + f'Expected the channels of all inputs must be the same, ' \ + f'but got {[tensor.shape[0] for tensor in inputs]}' + + # only one of size and size_divisor should be valid + assert (size is not None) ^ (size_divisor is not None), \ + 'only one of size and size_divisor should be valid' + + padded_inputs = [] + padded_samples = [] + inputs_sizes = [(img.shape[-2], img.shape[-1]) for img in inputs] + max_size = np.stack(inputs_sizes).max(0) + if size_divisor is not None and size_divisor > 1: + # the last two dims are H,W, both subject to divisibility requirement + max_size = (max_size + + (size_divisor - 1)) // size_divisor * size_divisor + + for i in range(len(inputs)): + tensor = inputs[i] + if size is not None: + width = max(size[-1] - tensor.shape[-1], 0) + height = max(size[-2] - tensor.shape[-2], 0) + # (padding_left, padding_right, padding_top, padding_bottom) + padding_size = (0, width, 0, height) + elif size_divisor is not None: + width = max(max_size[-1] - tensor.shape[-1], 0) + height = max(max_size[-2] - tensor.shape[-2], 0) + padding_size = (0, width, 0, height) + else: + padding_size = [0, 0, 0, 0] + + # pad img + pad_img = F.pad(tensor, padding_size, value=pad_val) + padded_inputs.append(pad_img) + # pad gt_sem_seg + if data_samples is not None: + data_sample = data_samples[i] + pad_shape = None + if 'gt_sem_seg' in data_sample: + gt_sem_seg = data_sample.gt_sem_seg.data + del data_sample.gt_sem_seg.data + data_sample.gt_sem_seg.data = F.pad( + gt_sem_seg, padding_size, value=seg_pad_val) + pad_shape = data_sample.gt_sem_seg.shape + if 'gt_edge_map' in data_sample: + gt_edge_map = data_sample.gt_edge_map.data + del data_sample.gt_edge_map.data + data_sample.gt_edge_map.data = F.pad( + gt_edge_map, padding_size, value=seg_pad_val) + pad_shape = data_sample.gt_edge_map.shape + if 'gt_depth_map' in data_sample: + gt_depth_map = data_sample.gt_depth_map.data + del data_sample.gt_depth_map.data + data_sample.gt_depth_map.data = F.pad( + gt_depth_map, padding_size, value=seg_pad_val) + pad_shape = data_sample.gt_depth_map.shape + data_sample.set_metainfo({ + 'img_shape': tensor.shape[-2:], + 'pad_shape': pad_shape, + 'padding_size': padding_size + }) + padded_samples.append(data_sample) + else: + padded_samples.append( + dict( + img_padding_size=padding_size, + pad_shape=pad_img.shape[-2:])) + + return torch.stack(padded_inputs, dim=0), padded_samples diff --git a/mmseg/utils/set_env.py b/mmseg/utils/set_env.py new file mode 100644 index 0000000000000000000000000000000000000000..c948950d62a7463295c1055a27a9a0ce881d9fad --- /dev/null +++ b/mmseg/utils/set_env.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import datetime +import warnings + +from mmengine import DefaultScope + + +def register_all_modules(init_default_scope: bool = True) -> None: + """Register all modules in mmseg into the registries. + + Args: + init_default_scope (bool): Whether initialize the mmseg default scope. + When `init_default_scope=True`, the global default scope will be + set to `mmseg`, and all registries will build modules from mmseg's + registry node. To understand more about the registry, please refer + to https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/registry.md + Defaults to True. + """ # noqa + import mmseg.datasets # noqa: F401,F403 + import mmseg.engine # noqa: F401,F403 + import mmseg.evaluation # noqa: F401,F403 + import mmseg.models # noqa: F401,F403 + import mmseg.structures # noqa: F401,F403 + + if init_default_scope: + never_created = DefaultScope.get_current_instance() is None \ + or not DefaultScope.check_instance_created('mmseg') + if never_created: + DefaultScope.get_instance('mmseg', scope_name='mmseg') + return + current_scope = DefaultScope.get_current_instance() + if current_scope.scope_name != 'mmseg': + warnings.warn('The current default scope ' + f'"{current_scope.scope_name}" is not "mmseg", ' + '`register_all_modules` will force the current' + 'default scope to be "mmseg". If this is not ' + 'expected, please set `init_default_scope=False`.') + # avoid name conflict + new_instance_name = f'mmseg-{datetime.datetime.now()}' + DefaultScope.get_instance(new_instance_name, scope_name='mmseg') diff --git a/mmseg/utils/tokenizer.py b/mmseg/utils/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..d56f5fae602506a27b9ae8835415e8dea7b611b7 --- /dev/null +++ b/mmseg/utils/tokenizer.py @@ -0,0 +1,240 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""CLIP tokenizer. + +Copied from https://github.com/openai/CLIP. Originally MIT License, Copyright +(c) 2021 OpenAI. +""" +import gzip +import html +import os +from functools import lru_cache +from typing import List, Union + +import ftfy +import regex as re +import torch + +os.environ['TOKENIZERS_PARALLELISM'] = 'false' + + +@lru_cache() +def default_bpe(): + return os.path.join( + os.path.dirname(os.path.abspath(__file__)), + 'bpe_simple_vocab_16e6.txt.gz') + + +@lru_cache() +def bytes_to_unicode(): + """Returns list of utf-8 byte and a corresponding list of unicode strings. + + The reversible bpe codes work on unicode strings. This means you need a + large # of unicode characters in your vocab if you want to avoid UNKs. When + you're at something like a 10B token dataset you end up needing around 5K + for decent coverage. This is a significant percentage of your normal, say, + 32K bpe vocab. To avoid that, we want lookup tables between utf-8 bytes and + unicode strings. And avoids mapping to whitespace/control characters the + bpe code barfs on. + """ + bs = list(range(ord('!'), + ord('~') + 1)) + list(range( + ord('¡'), + ord('¬') + 1)) + list(range(ord('®'), + ord('ÿ') + 1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + + Word is represented as tuple of symbols (symbols being variable-length + strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer: + + def __init__(self, bpe_path: str = default_bpe(), special_tokens=None): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode('utf-8').split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + if not special_tokens: + special_tokens = ['', ''] + else: + special_tokens = ['', '' + ] + special_tokens + vocab.extend(special_tokens) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {t: t for t in special_tokens} + special = '|'.join(special_tokens) + self.pat = re.compile( + special + + r"""|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE) + + self.vocab_size = len(self.encoder) + self.all_special_ids = [self.encoder[t] for t in special_tokens] + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min( + pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: # noqa: E722, E261 + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[ + i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] + for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] + for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode( + 'utf-8', errors='replace').replace('', ' ') + return text + + +_tokenizer = SimpleTokenizer() + + +def decode(output_ids: torch.Tensor): + output_ids = output_ids.cpu().numpy() + return _tokenizer.decode(output_ids) + + +def tokenize(texts: Union[str, List[str]], + context_length: int = 77) -> torch.LongTensor: + """Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, + shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder[''] + eot_token = _tokenizer.encoder[''] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] + for text in texts] + result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + tokens = tokens[:context_length] # Truncate + tokens[-1] = eot_token + result[i, :len(tokens)] = torch.tensor(tokens) + + return result + + +class HFTokenizer: + """HuggingFace tokenizer wrapper.""" + + def __init__(self, tokenizer_name: str): + from transformers import AutoTokenizer + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + + def save_pretrained(self, dest): + self.tokenizer.save_pretrained(dest) + + def __call__(self, + texts: Union[str, List[str]], + context_length: int = 77) -> torch.Tensor: + # same cleaning as for default tokenizer, except lowercasing + # adding lower (for case-sensitive tokenizers) will make it + # more robust but less sensitive to nuance + if isinstance(texts, str): + texts = [texts] + texts = [whitespace_clean(basic_clean(text)) for text in texts] + input_ids = self.tokenizer( + texts, + return_tensors='pt', + max_length=context_length, + padding='max_length', + truncation=True, + ).input_ids + return input_ids diff --git a/mmseg/utils/typing_utils.py b/mmseg/utils/typing_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fba7d3b92bba8301171d2a0fffadfabfcd112976 --- /dev/null +++ b/mmseg/utils/typing_utils.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Collecting some commonly used type hint in mmflow.""" +from typing import Dict, List, Optional, Sequence, Tuple, Union + +import torch +from mmengine.config import ConfigDict + +from mmseg.structures import SegDataSample + +# Type hint of config data +ConfigType = Union[ConfigDict, dict] +OptConfigType = Optional[ConfigType] +# Type hint of one or more config data +MultiConfig = Union[ConfigType, Sequence[ConfigType]] +OptMultiConfig = Optional[MultiConfig] + +SampleList = Sequence[SegDataSample] +OptSampleList = Optional[SampleList] + +# Type hint of Tensor +TensorDict = Dict[str, torch.Tensor] +TensorList = Sequence[torch.Tensor] + +ForwardResults = Union[Dict[str, torch.Tensor], List[SegDataSample], + Tuple[torch.Tensor], torch.Tensor] diff --git a/mmseg/version.py b/mmseg/version.py new file mode 100644 index 0000000000000000000000000000000000000000..b76bb4580ddfa0ba0ba13fa4896c49bac9cef65a --- /dev/null +++ b/mmseg/version.py @@ -0,0 +1,18 @@ +# Copyright (c) Open-MMLab. All rights reserved. + +__version__ = '1.2.2' + + +def parse_version_info(version_str): + version_info = [] + for x in version_str.split('.'): + if x.isdigit(): + version_info.append(int(x)) + elif x.find('rc') != -1: + patch_version = x.split('rc') + version_info.append(int(patch_version[0])) + version_info.append(f'rc{patch_version[1]}') + return tuple(version_info) + + +version_info = parse_version_info(__version__) diff --git a/mmseg/visualization/__init__.py b/mmseg/visualization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8cbb211e5243aafb4ab3d91f6a6f7ce0735b13a9 --- /dev/null +++ b/mmseg/visualization/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .local_visualizer import SegLocalVisualizer + +__all__ = ['SegLocalVisualizer'] diff --git a/mmseg/visualization/__pycache__/__init__.cpython-39.pyc b/mmseg/visualization/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..032c7bf253d97fc9a2537aaa5816bc7b787e8700 Binary files /dev/null and b/mmseg/visualization/__pycache__/__init__.cpython-39.pyc differ diff --git a/mmseg/visualization/__pycache__/local_visualizer.cpython-39.pyc b/mmseg/visualization/__pycache__/local_visualizer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f001c9169040807567cb530891129c864c165734 Binary files /dev/null and b/mmseg/visualization/__pycache__/local_visualizer.cpython-39.pyc differ diff --git a/mmseg/visualization/local_visualizer.py b/mmseg/visualization/local_visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..ee3d652c7bbe9d93ca481fb7a7ed4bb976eec80d --- /dev/null +++ b/mmseg/visualization/local_visualizer.py @@ -0,0 +1,349 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List, Optional + +import cv2 +import mmcv +import numpy as np +import torch +from mmengine.dist import master_only +from mmengine.structures import PixelData +from mmengine.visualization import Visualizer + +from mmseg.registry import VISUALIZERS +from mmseg.structures import SegDataSample +from mmseg.utils import get_classes, get_palette + + +@VISUALIZERS.register_module() +class SegLocalVisualizer(Visualizer): + """Local Visualizer. + + Args: + name (str): Name of the instance. Defaults to 'visualizer'. + image (np.ndarray, optional): the origin image to draw. The format + should be RGB. Defaults to None. + vis_backends (list, optional): Visual backend config list. + Defaults to None. + save_dir (str, optional): Save file dir for all storage backends. + If it is None, the backend storage will not save any data. + classes (list, optional): Input classes for result rendering, as the + prediction of segmentation model is a segment map with label + indices, `classes` is a list which includes items responding to the + label indices. If classes is not defined, visualizer will take + `cityscapes` classes by default. Defaults to None. + palette (list, optional): Input palette for result rendering, which is + a list of color palette responding to the classes. Defaults to None. + dataset_name (str, optional): `Dataset name or alias `_ + visulizer will use the meta information of the dataset i.e. classes + and palette, but the `classes` and `palette` have higher priority. + Defaults to None. + alpha (int, float): The transparency of segmentation mask. + Defaults to 0.8. + + Examples: + >>> import numpy as np + >>> import torch + >>> from mmengine.structures import PixelData + >>> from mmseg.structures import SegDataSample + >>> from mmseg.visualization import SegLocalVisualizer + + >>> seg_local_visualizer = SegLocalVisualizer() + >>> image = np.random.randint(0, 256, + ... size=(10, 12, 3)).astype('uint8') + >>> gt_sem_seg_data = dict(data=torch.randint(0, 2, (1, 10, 12))) + >>> gt_sem_seg = PixelData(**gt_sem_seg_data) + >>> gt_seg_data_sample = SegDataSample() + >>> gt_seg_data_sample.gt_sem_seg = gt_sem_seg + >>> seg_local_visualizer.dataset_meta = dict( + >>> classes=('background', 'foreground'), + >>> palette=[[120, 120, 120], [6, 230, 230]]) + >>> seg_local_visualizer.add_datasample('visualizer_example', + ... image, gt_seg_data_sample) + >>> seg_local_visualizer.add_datasample( + ... 'visualizer_example', image, + ... gt_seg_data_sample, show=True) + """ # noqa + + def __init__(self, + name: str = 'visualizer', + image: Optional[np.ndarray] = None, + vis_backends: Optional[Dict] = None, + save_dir: Optional[str] = None, + classes: Optional[List] = None, + palette: Optional[List] = None, + dataset_name: Optional[str] = None, + alpha: float = 0.8, + **kwargs): + super().__init__(name, image, vis_backends, save_dir, **kwargs) + self.alpha: float = alpha + self.set_dataset_meta(palette, classes, dataset_name) + + def _get_center_loc(self, mask: np.ndarray) -> np.ndarray: + """Get semantic seg center coordinate. + + Args: + mask: np.ndarray: get from sem_seg + """ + loc = np.argwhere(mask == 1) + + loc_sort = np.array( + sorted(loc.tolist(), key=lambda row: (row[0], row[1]))) + y_list = loc_sort[:, 0] + unique, indices, counts = np.unique( + y_list, return_index=True, return_counts=True) + y_loc = unique[counts.argmax()] + y_most_freq_loc = loc[loc_sort[:, 0] == y_loc] + center_num = len(y_most_freq_loc) // 2 + x = y_most_freq_loc[center_num][1] + y = y_most_freq_loc[center_num][0] + return np.array([x, y]) + + def _draw_sem_seg(self, + image: np.ndarray, + sem_seg: PixelData, + classes: Optional[List], + palette: Optional[List], + with_labels: Optional[bool] = True) -> np.ndarray: + """Draw semantic seg of GT or prediction. + + Args: + image (np.ndarray): The image to draw. + sem_seg (:obj:`PixelData`): Data structure for pixel-level + annotations or predictions. + classes (list, optional): Input classes for result rendering, as + the prediction of segmentation model is a segment map with + label indices, `classes` is a list which includes items + responding to the label indices. If classes is not defined, + visualizer will take `cityscapes` classes by default. + Defaults to None. + palette (list, optional): Input palette for result rendering, which + is a list of color palette responding to the classes. + Defaults to None. + with_labels(bool, optional): Add semantic labels in visualization + result, Default to True. + + Returns: + np.ndarray: the drawn image which channel is RGB. + """ + num_classes = len(classes) + + sem_seg = sem_seg.cpu().data + ids = np.unique(sem_seg)[::-1] + legal_indices = ids < num_classes + ids = ids[legal_indices] + labels = np.array(ids, dtype=np.int64) + + colors = [palette[label] for label in labels] + + mask = np.zeros_like(image, dtype=np.uint8) + for label, color in zip(labels, colors): + mask[sem_seg[0] == label, :] = color + + if with_labels: + font = cv2.FONT_HERSHEY_SIMPLEX + # (0,1] to change the size of the text relative to the image + scale = 0.05 + fontScale = min(image.shape[0], image.shape[1]) / (25 / scale) + fontColor = (255, 255, 255) + if image.shape[0] < 300 or image.shape[1] < 300: + thickness = 1 + rectangleThickness = 1 + else: + thickness = 2 + rectangleThickness = 2 + lineType = 2 + + if isinstance(sem_seg[0], torch.Tensor): + masks = sem_seg[0].numpy() == labels[:, None, None] + else: + masks = sem_seg[0] == labels[:, None, None] + masks = masks.astype(np.uint8) + for mask_num in range(len(labels)): + classes_id = labels[mask_num] + classes_color = colors[mask_num] + loc = self._get_center_loc(masks[mask_num]) + text = classes[classes_id] + (label_width, label_height), baseline = cv2.getTextSize( + text, font, fontScale, thickness) + mask = cv2.rectangle(mask, loc, + (loc[0] + label_width + baseline, + loc[1] + label_height + baseline), + classes_color, -1) + mask = cv2.rectangle(mask, loc, + (loc[0] + label_width + baseline, + loc[1] + label_height + baseline), + (0, 0, 0), rectangleThickness) + mask = cv2.putText(mask, text, (loc[0], loc[1] + label_height), + font, fontScale, fontColor, thickness, + lineType) + color_seg = (image * (1 - self.alpha) + mask * self.alpha).astype( + np.uint8) + self.set_image(color_seg) + return color_seg + + def _draw_depth_map(self, image: np.ndarray, + depth_map: PixelData) -> np.ndarray: + """Draws a depth map on a given image. + + This function takes an image and a depth map as input, + renders the depth map, and concatenates it with the original image. + Finally, it updates the internal image state of the visualizer with + the concatenated result. + + Args: + image (np.ndarray): The original image where the depth map will + be drawn. The array should be in the format HxWx3 where H is + the height, W is the width. + + depth_map (PixelData): Depth map to be drawn. The depth map + should be in the form of a PixelData object. It will be + converted to a torch tensor if it is a numpy array. + + Returns: + np.ndarray: The concatenated image with the depth map drawn. + + Example: + >>> depth_map_data = PixelData(data=torch.rand(1, 10, 10)) + >>> image = np.random.randint(0, 256, + >>> size=(10, 10, 3)).astype('uint8') + >>> visualizer = SegLocalVisualizer() + >>> visualizer._draw_depth_map(image, depth_map_data) + """ + depth_map = depth_map.cpu().data + if isinstance(depth_map, np.ndarray): + depth_map = torch.from_numpy(depth_map) + if depth_map.ndim == 2: + depth_map = depth_map[None] + + depth_map = self.draw_featmap(depth_map, resize_shape=image.shape[:2]) + out_image = np.concatenate((image, depth_map), axis=0) + self.set_image(out_image) + return out_image + + def set_dataset_meta(self, + classes: Optional[List] = None, + palette: Optional[List] = None, + dataset_name: Optional[str] = None) -> None: + """Set meta information to visualizer. + + Args: + classes (list, optional): Input classes for result rendering, as + the prediction of segmentation model is a segment map with + label indices, `classes` is a list which includes items + responding to the label indices. If classes is not defined, + visualizer will take `cityscapes` classes by default. + Defaults to None. + palette (list, optional): Input palette for result rendering, which + is a list of color palette responding to the classes. + Defaults to None. + dataset_name (str, optional): `Dataset name or alias `_ + visulizer will use the meta information of the dataset i.e. + classes and palette, but the `classes` and `palette` have + higher priority. Defaults to None. + """ # noqa + # Set default value. When calling + # `SegLocalVisualizer().dataset_meta=xxx`, + # it will override the default value. + if dataset_name is None: + dataset_name = 'cityscapes' + classes = classes if classes else get_classes(dataset_name) + palette = palette if palette else get_palette(dataset_name) + assert len(classes) == len( + palette), 'The length of classes should be equal to palette' + self.dataset_meta: dict = {'classes': classes, 'palette': palette} + + @master_only + def add_datasample( + self, + name: str, + image: np.ndarray, + data_sample: Optional[SegDataSample] = None, + draw_gt: bool = True, + draw_pred: bool = True, + show: bool = False, + wait_time: float = 0, + # TODO: Supported in mmengine's Viusalizer. + out_file: Optional[str] = None, + step: int = 0, + with_labels: Optional[bool] = True) -> None: + """Draw datasample and save to all backends. + + - If GT and prediction are plotted at the same time, they are + displayed in a stitched image where the left image is the + ground truth and the right image is the prediction. + - If ``show`` is True, all storage backends are ignored, and + the images will be displayed in a local window. + - If ``out_file`` is specified, the drawn image will be + saved to ``out_file``. it is usually used when the display + is not available. + + Args: + name (str): The image identifier. + image (np.ndarray): The image to draw. + gt_sample (:obj:`SegDataSample`, optional): GT SegDataSample. + Defaults to None. + pred_sample (:obj:`SegDataSample`, optional): Prediction + SegDataSample. Defaults to None. + draw_gt (bool): Whether to draw GT SegDataSample. Default to True. + draw_pred (bool): Whether to draw Prediction SegDataSample. + Defaults to True. + show (bool): Whether to display the drawn image. Default to False. + wait_time (float): The interval of show (s). Defaults to 0. + out_file (str): Path to output file. Defaults to None. + step (int): Global step value to record. Defaults to 0. + with_labels(bool, optional): Add semantic labels in visualization + result, Defaults to True. + """ + classes = self.dataset_meta.get('classes', None) + palette = self.dataset_meta.get('palette', None) + + gt_img_data = None + pred_img_data = None + + if draw_gt and data_sample is not None: + if 'gt_sem_seg' in data_sample: + assert classes is not None, 'class information is ' \ + 'not provided when ' \ + 'visualizing semantic ' \ + 'segmentation results.' + gt_img_data = self._draw_sem_seg(image, data_sample.gt_sem_seg, + classes, palette, with_labels) + + if 'gt_depth_map' in data_sample: + gt_img_data = gt_img_data if gt_img_data is not None else image + gt_img_data = self._draw_depth_map(gt_img_data, + data_sample.gt_depth_map) + + if draw_pred and data_sample is not None: + + if 'pred_sem_seg' in data_sample: + + assert classes is not None, 'class information is ' \ + 'not provided when ' \ + 'visualizing semantic ' \ + 'segmentation results.' + pred_img_data = self._draw_sem_seg(image, + data_sample.pred_sem_seg, + classes, palette, + with_labels) + + if 'pred_depth_map' in data_sample: + pred_img_data = pred_img_data if pred_img_data is not None \ + else image + pred_img_data = self._draw_depth_map( + pred_img_data, data_sample.pred_depth_map) + + if gt_img_data is not None and pred_img_data is not None: + drawn_img = np.concatenate((gt_img_data, pred_img_data), axis=1) + elif gt_img_data is not None: + drawn_img = gt_img_data + else: + drawn_img = pred_img_data + + if show: + self.show(drawn_img, win_name=name, wait_time=wait_time) + + if out_file is not None: + mmcv.imwrite(mmcv.rgb2bgr(drawn_img), out_file) + else: + self.add_image(name, drawn_img, step) diff --git a/model_flops/snnet_flops_setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.json b/model_flops/snnet_flops_setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.json new file mode 100644 index 0000000000000000000000000000000000000000..0114f323d145b3301269a2de56e44150277d3086 --- /dev/null +++ b/model_flops/snnet_flops_setr_naive_512x512_160k_b16_ade20k_deit_3_s_l_224_snnetv2.json @@ -0,0 +1,136 @@ +{ + "0": 33522505728, + "1": 363157291008, + "2": 335562224128, + "3": 308067427328, + "4": 280572630528, + "5": 253077833728, + "6": 225986083328, + "7": 198491286528, + "8": 170996489728, + "9": 143501692928, + "10": 116006896128, + "11": 88512099328, + "12": 61017302528, + "14": 61923665408, + "15": 89418462208, + "16": 116913259008, + "17": 144408055808, + "18": 171499806208, + "19": 198994603008, + "20": 226489399808, + "21": 253984196608, + "22": 281478993408, + "23": 308973790208, + "24": 336468587008, + "25": 336468587008, + "26": 308973790208, + "27": 281478993408, + "28": 254387243008, + "29": 226892446208, + "30": 199397649408, + "31": 171902852608, + "32": 144408055808, + "33": 116913259008, + "34": 89418462208, + "35": 336468587008, + "36": 308973790208, + "37": 281882039808, + "38": 254387243008, + "39": 226892446208, + "40": 199397649408, + "41": 171902852608, + "42": 144408055808, + "43": 116913259008, + "44": 336468587008, + "45": 309376836608, + "46": 281882039808, + "47": 254387243008, + "48": 226892446208, + "49": 199397649408, + "50": 171902852608, + "51": 144408055808, + "52": 336871633408, + "53": 309376836608, + "54": 281882039808, + "55": 254387243008, + "56": 226892446208, + "57": 199397649408, + "58": 171902852608, + "59": 336468587008, + "60": 308973790208, + "61": 281478993408, + "62": 253984196608, + "63": 226489399808, + "64": 198994603008, + "65": 336468587008, + "66": 308973790208, + "67": 281478993408, + "68": 253984196608, + "69": 226489399808, + "70": 336468587008, + "71": 308973790208, + "72": 281478993408, + "73": 253984196608, + "74": 336468587008, + "75": 308973790208, + "76": 281478993408, + "77": 336468587008, + "78": 308973790208, + "79": 336468587008, + "80": 61823395328, + "81": 89318192128, + "82": 116812988928, + "83": 143904739328, + "84": 171399536128, + "85": 198894332928, + "86": 226389129728, + "87": 253883926528, + "88": 281378723328, + "89": 308873520128, + "90": 61823395328, + "91": 89318192128, + "92": 116409942528, + "93": 143904739328, + "94": 171399536128, + "95": 198894332928, + "96": 226389129728, + "97": 253883926528, + "98": 281378723328, + "99": 61823395328, + "100": 88915145728, + "101": 116409942528, + "102": 143904739328, + "103": 171399536128, + "104": 198894332928, + "105": 226389129728, + "106": 253883926528, + "107": 61420348928, + "108": 88915145728, + "109": 116409942528, + "110": 143904739328, + "111": 171399536128, + "112": 198894332928, + "113": 226389129728, + "114": 61823395328, + "115": 89318192128, + "116": 116812988928, + "117": 144307785728, + "118": 171802582528, + "119": 199297379328, + "120": 61823395328, + "121": 89318192128, + "122": 116812988928, + "123": 144307785728, + "124": 171802582528, + "125": 61823395328, + "126": 89318192128, + "127": 116812988928, + "128": 144307785728, + "129": 61823395328, + "130": 89318192128, + "131": 116812988928, + "132": 61823395328, + "133": 89318192128, + "134": 61823395328 +} \ No newline at end of file diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000000000000000000000000000000000000..a9f1eea092d5e971b5475b82ee835cec7f196bad --- /dev/null +++ b/packages.txt @@ -0,0 +1 @@ +ffmpeg \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f56cc4b805699a42cfde49003310753189e25a11 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +torch +opencv-python +mmengine +plotly +gradio +mmcv=2.1.0 +SciPy +ftfy \ No newline at end of file diff --git a/results/eval_single_scale_20230507_235400.json b/results/eval_single_scale_20230507_235400.json new file mode 100644 index 0000000000000000000000000000000000000000..74a56a5a306bc998043c31d1fd581af6e8213a58 --- /dev/null +++ b/results/eval_single_scale_20230507_235400.json @@ -0,0 +1,41274 @@ +{ + "0": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8181999999999999, + "mIoU": 0.4557, + "mAcc": 0.573, + "IoU.wall": 0.7586000061035156, + "IoU.building": 0.8181999969482422, + "IoU.sky": 0.9370999908447266, + "IoU.floor": 0.8023999786376953, + "IoU.tree": 0.7375, + "IoU.ceiling": 0.8263999938964843, + "IoU.road": 0.8202999877929688, + "IoU.bed ": 0.8651999664306641, + "IoU.windowpane": 0.607599983215332, + "IoU.grass": 0.6552999877929687, + "IoU.cabinet": 0.579900016784668, + "IoU.sidewalk": 0.630099983215332, + "IoU.person": 0.7694999694824218, + "IoU.earth": 0.32990001678466796, + "IoU.door": 0.45279998779296876, + "IoU.table": 0.5493000030517579, + "IoU.mountain": 0.5745999908447266, + "IoU.plant": 0.49720001220703125, + "IoU.curtain": 0.7312000274658204, + "IoU.chair": 0.5125999832153321, + "IoU.car": 0.8208000183105468, + "IoU.water": 0.5208000183105469, + "IoU.painting": 0.6731999969482422, + "IoU.sofa": 0.6183000183105469, + "IoU.shelf": 0.4159000015258789, + "IoU.house": 0.4890999984741211, + "IoU.sea": 0.6391999816894531, + "IoU.mirror": 0.6505000305175781, + "IoU.rug": 0.6647000122070312, + "IoU.field": 0.30829999923706053, + "IoU.armchair": 0.42209999084472655, + "IoU.seat": 0.6083000183105469, + "IoU.fence": 0.3793000030517578, + "IoU.desk": 0.4829999923706055, + "IoU.rock": 0.40669998168945315, + "IoU.wardrobe": 0.47150001525878904, + "IoU.lamp": 0.54, + "IoU.bathtub": 0.7741000366210937, + "IoU.railing": 0.2982999992370605, + "IoU.cushion": 0.5286999893188477, + "IoU.base": 0.2684000015258789, + "IoU.box": 0.21950000762939453, + "IoU.column": 0.43509998321533205, + "IoU.signboard": 0.33130001068115233, + "IoU.chest of drawers": 0.31040000915527344, + "IoU.counter": 0.271299991607666, + "IoU.sand": 0.3868999862670898, + "IoU.sink": 0.6648999786376953, + "IoU.skyscraper": 0.5261999893188477, + "IoU.fireplace": 0.6981999969482422, + "IoU.refrigerator": 0.7494999694824219, + "IoU.grandstand": 0.4518000030517578, + "IoU.path": 0.21889999389648437, + "IoU.stairs": 0.2667000007629394, + "IoU.runway": 0.6741999816894532, + "IoU.case": 0.5193999862670898, + "IoU.pool table": 0.9125, + "IoU.pillow": 0.5515000152587891, + "IoU.screen door": 0.6256000137329102, + "IoU.stairway": 0.29459999084472654, + "IoU.river": 0.25040000915527344, + "IoU.bridge": 0.6868000030517578, + "IoU.bookcase": 0.34240001678466797, + "IoU.blind": 0.42150001525878905, + "IoU.coffee table": 0.5736999893188477, + "IoU.toilet": 0.8166000366210937, + "IoU.flower": 0.3390999984741211, + "IoU.book": 0.43099998474121093, + "IoU.hill": 0.0840999984741211, + "IoU.bench": 0.4268000030517578, + "IoU.countertop": 0.5572999954223633, + "IoU.stove": 0.7120999908447265, + "IoU.palm": 0.45720001220703127, + "IoU.kitchen island": 0.31090000152587893, + "IoU.computer": 0.6215999984741211, + "IoU.swivel chair": 0.46830001831054685, + "IoU.boat": 0.7038999938964844, + "IoU.bar": 0.47310001373291016, + "IoU.arcade machine": 0.33110000610351564, + "IoU.hovel": 0.5, + "IoU.bus": 0.8325, + "IoU.towel": 0.5502000045776367, + "IoU.light": 0.327599983215332, + "IoU.truck": 0.2043000030517578, + "IoU.tower": 0.29600000381469727, + "IoU.chandelier": 0.5808000183105468, + "IoU.awning": 0.36400001525878906, + "IoU.streetlight": 0.14, + "IoU.booth": 0.35770000457763673, + "IoU.television receiver": 0.6234999847412109, + "IoU.airplane": 0.6216999816894532, + "IoU.dirt track": 0.10350000381469726, + "IoU.apparel": 0.33169998168945314, + "IoU.pole": 0.16209999084472657, + "IoU.land": 0.026800000667572023, + "IoU.bannister": 0.08970000267028809, + "IoU.escalator": 0.24879999160766603, + "IoU.ottoman": 0.4772999954223633, + "IoU.bottle": 0.330099983215332, + "IoU.buffet": 0.3997000122070313, + "IoU.poster": 0.21690000534057619, + "IoU.stage": 0.21069999694824218, + "IoU.van": 0.3988999938964844, + "IoU.ship": 0.7108000183105468, + "IoU.fountain": 0.1931999969482422, + "IoU.conveyer belt": 0.5652999877929688, + "IoU.canopy": 0.2027000045776367, + "IoU.washer": 0.710999984741211, + "IoU.plaything": 0.25510000228881835, + "IoU.swimming pool": 0.6134000015258789, + "IoU.stool": 0.23889999389648436, + "IoU.barrel": 0.5204000091552734, + "IoU.basket": 0.2102000045776367, + "IoU.waterfall": 0.6443000030517578, + "IoU.tent": 0.9130999755859375, + "IoU.bag": 0.09609999656677246, + "IoU.minibike": 0.5602999877929687, + "IoU.cradle": 0.7808000183105469, + "IoU.oven": 0.206200008392334, + "IoU.ball": 0.3761999893188477, + "IoU.food": 0.5583000183105469, + "IoU.step": 0.125, + "IoU.tank": 0.4684000015258789, + "IoU.trade name": 0.2453000068664551, + "IoU.microwave": 0.3820000076293945, + "IoU.pot": 0.35080001831054686, + "IoU.animal": 0.5886000061035156, + "IoU.bicycle": 0.47630001068115235, + "IoU.lake": 0.15, + "IoU.dishwasher": 0.5463000106811523, + "IoU.screen": 0.611500015258789, + "IoU.blanket": 0.13489999771118164, + "IoU.sculpture": 0.4697999954223633, + "IoU.hood": 0.482400016784668, + "IoU.sconce": 0.34509998321533203, + "IoU.vase": 0.2706999969482422, + "IoU.traffic light": 0.2673999977111816, + "IoU.tray": 0.028900001049041748, + "IoU.ashcan": 0.316200008392334, + "IoU.fan": 0.49209999084472655, + "IoU.pier": 0.34439998626708984, + "IoU.crt screen": 0.0, + "IoU.plate": 0.4336000061035156, + "IoU.monitor": 0.02380000114440918, + "IoU.bulletin board": 0.3081999969482422, + "IoU.shower": 0.002199999988079071, + "IoU.radiator": 0.5331999969482422, + "IoU.glass": 0.06639999866485596, + "IoU.clock": 0.243700008392334, + "IoU.flag": 0.34169998168945315, + "Acc.wall": 0.8776000213623046, + "Acc.building": 0.9191999816894532, + "Acc.sky": 0.9769000244140625, + "Acc.floor": 0.9033000183105468, + "Acc.tree": 0.8686000061035156, + "Acc.ceiling": 0.9180999755859375, + "Acc.road": 0.8976000213623047, + "Acc.bed ": 0.9433000183105469, + "Acc.windowpane": 0.7530000305175781, + "Acc.grass": 0.8080999755859375, + "Acc.cabinet": 0.6866999816894531, + "Acc.sidewalk": 0.7826999664306641, + "Acc.person": 0.9154000091552734, + "Acc.earth": 0.4534000015258789, + "Acc.door": 0.6227999877929687, + "Acc.table": 0.7116000366210937, + "Acc.mountain": 0.7087000274658203, + "Acc.plant": 0.6208000183105469, + "Acc.curtain": 0.841500015258789, + "Acc.chair": 0.642300033569336, + "Acc.car": 0.9130999755859375, + "Acc.water": 0.6713999938964844, + "Acc.painting": 0.857699966430664, + "Acc.sofa": 0.7786000061035157, + "Acc.shelf": 0.6245999908447266, + "Acc.house": 0.607599983215332, + "Acc.sea": 0.907300033569336, + "Acc.mirror": 0.7408999633789063, + "Acc.rug": 0.7366999816894532, + "Acc.field": 0.5349000167846679, + "Acc.armchair": 0.6405000305175781, + "Acc.seat": 0.8058000183105469, + "Acc.fence": 0.49709999084472656, + "Acc.desk": 0.6919000244140625, + "Acc.rock": 0.66, + "Acc.wardrobe": 0.5947000122070313, + "Acc.lamp": 0.6687000274658204, + "Acc.bathtub": 0.830999984741211, + "Acc.railing": 0.4540000152587891, + "Acc.cushion": 0.6816000366210937, + "Acc.base": 0.41970001220703124, + "Acc.box": 0.3034000015258789, + "Acc.column": 0.5533000183105469, + "Acc.signboard": 0.445, + "Acc.chest of drawers": 0.5641999816894532, + "Acc.counter": 0.37220001220703125, + "Acc.sand": 0.530099983215332, + "Acc.sink": 0.7583999633789062, + "Acc.skyscraper": 0.6072999954223632, + "Acc.fireplace": 0.9026000213623047, + "Acc.refrigerator": 0.8494999694824219, + "Acc.grandstand": 0.6816999816894531, + "Acc.path": 0.2997999954223633, + "Acc.stairs": 0.3479000091552734, + "Acc.runway": 0.8247000122070313, + "Acc.case": 0.6570999908447266, + "Acc.pool table": 0.9613999938964843, + "Acc.pillow": 0.6522000122070313, + "Acc.screen door": 0.7126000213623047, + "Acc.stairway": 0.40869998931884766, + "Acc.river": 0.43990001678466795, + "Acc.bridge": 0.8338999938964844, + "Acc.bookcase": 0.5495000076293945, + "Acc.blind": 0.4897999954223633, + "Acc.coffee table": 0.7805999755859375, + "Acc.toilet": 0.8837999725341796, + "Acc.flower": 0.5279000091552735, + "Acc.book": 0.5963000106811523, + "Acc.hill": 0.16149999618530272, + "Acc.bench": 0.514900016784668, + "Acc.countertop": 0.6994999694824219, + "Acc.stove": 0.7872000122070313, + "Acc.palm": 0.6661000061035156, + "Acc.kitchen island": 0.6577999877929688, + "Acc.computer": 0.7644000244140625, + "Acc.swivel chair": 0.6152999877929688, + "Acc.boat": 0.8416999816894531, + "Acc.bar": 0.6252999877929688, + "Acc.arcade machine": 0.37009998321533205, + "Acc.hovel": 0.5602999877929687, + "Acc.bus": 0.9048000335693359, + "Acc.towel": 0.7181999969482422, + "Acc.light": 0.3565999984741211, + "Acc.truck": 0.27360000610351565, + "Acc.tower": 0.41209999084472654, + "Acc.chandelier": 0.7370999908447265, + "Acc.awning": 0.43689998626708987, + "Acc.streetlight": 0.16190000534057616, + "Acc.booth": 0.44279998779296875, + "Acc.television receiver": 0.7376000213623047, + "Acc.airplane": 0.7002999877929688, + "Acc.dirt track": 0.18069999694824218, + "Acc.apparel": 0.47830001831054686, + "Acc.pole": 0.20540000915527343, + "Acc.land": 0.035, + "Acc.bannister": 0.11670000076293946, + "Acc.escalator": 0.28829999923706057, + "Acc.ottoman": 0.6272000122070313, + "Acc.bottle": 0.5700999832153321, + "Acc.buffet": 0.45369998931884764, + "Acc.poster": 0.3240000152587891, + "Acc.stage": 0.32049999237060545, + "Acc.van": 0.5172000122070313, + "Acc.ship": 0.8426000213623047, + "Acc.fountain": 0.20219999313354492, + "Acc.conveyer belt": 0.7908999633789062, + "Acc.canopy": 0.2989999961853027, + "Acc.washer": 0.7279000091552734, + "Acc.plaything": 0.4259000015258789, + "Acc.swimming pool": 0.8162000274658203, + "Acc.stool": 0.3057999992370605, + "Acc.barrel": 0.6131999969482422, + "Acc.basket": 0.2673999977111816, + "Acc.waterfall": 0.72, + "Acc.tent": 0.9905999755859375, + "Acc.bag": 0.11670000076293946, + "Acc.minibike": 0.674000015258789, + "Acc.cradle": 0.9684999847412109, + "Acc.oven": 0.5365000152587891, + "Acc.ball": 0.45560001373291015, + "Acc.food": 0.7077999877929687, + "Acc.step": 0.151899995803833, + "Acc.tank": 0.5806999969482421, + "Acc.trade name": 0.29770000457763673, + "Acc.microwave": 0.42770000457763674, + "Acc.pot": 0.41330001831054686, + "Acc.animal": 0.630099983215332, + "Acc.bicycle": 0.7216000366210937, + "Acc.lake": 0.15619999885559083, + "Acc.dishwasher": 0.6361000061035156, + "Acc.screen": 0.9305999755859375, + "Acc.blanket": 0.1447000026702881, + "Acc.sculpture": 0.6172999954223632, + "Acc.hood": 0.5345000076293945, + "Acc.sconce": 0.41639999389648436, + "Acc.vase": 0.36639999389648437, + "Acc.traffic light": 0.43270000457763674, + "Acc.tray": 0.04, + "Acc.ashcan": 0.4581999969482422, + "Acc.fan": 0.6368999862670899, + "Acc.pier": 0.5306999969482422, + "Acc.crt screen": 0.0, + "Acc.plate": 0.5613000106811523, + "Acc.monitor": 0.03210000038146973, + "Acc.bulletin board": 0.4286999893188477, + "Acc.shower": 0.009599999785423278, + "Acc.radiator": 0.605, + "Acc.glass": 0.06949999809265137, + "Acc.clock": 0.281200008392334, + "Acc.flag": 0.4109999847412109 + } + }, + "1": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8382, + "mIoU": 0.5135000000000001, + "mAcc": 0.6426999999999999, + "IoU.wall": 0.7844999694824218, + "IoU.building": 0.8337999725341797, + "IoU.sky": 0.9388999938964844, + "IoU.floor": 0.8220999908447265, + "IoU.tree": 0.7430999755859375, + "IoU.ceiling": 0.8411000061035157, + "IoU.road": 0.8363999938964843, + "IoU.bed ": 0.903499984741211, + "IoU.windowpane": 0.6277999877929688, + "IoU.grass": 0.6787999725341797, + "IoU.cabinet": 0.630099983215332, + "IoU.sidewalk": 0.6619000244140625, + "IoU.person": 0.8123000335693359, + "IoU.earth": 0.3902000045776367, + "IoU.door": 0.5191999816894531, + "IoU.table": 0.6095999908447266, + "IoU.mountain": 0.5818999862670898, + "IoU.plant": 0.5056999969482422, + "IoU.curtain": 0.7566999816894531, + "IoU.chair": 0.5868000030517578, + "IoU.car": 0.85, + "IoU.water": 0.6009000015258789, + "IoU.painting": 0.72, + "IoU.sofa": 0.7101000213623047, + "IoU.shelf": 0.42279998779296873, + "IoU.house": 0.5220999908447266, + "IoU.sea": 0.6662000274658203, + "IoU.mirror": 0.7129000091552734, + "IoU.rug": 0.668499984741211, + "IoU.field": 0.38479999542236326, + "IoU.armchair": 0.4868000030517578, + "IoU.seat": 0.6495999908447265, + "IoU.fence": 0.47959999084472654, + "IoU.desk": 0.5056000137329102, + "IoU.rock": 0.5311999893188477, + "IoU.wardrobe": 0.5700999832153321, + "IoU.lamp": 0.5897999954223633, + "IoU.bathtub": 0.8604000091552735, + "IoU.railing": 0.39029998779296876, + "IoU.cushion": 0.6131000137329101, + "IoU.base": 0.3390999984741211, + "IoU.box": 0.2777000045776367, + "IoU.column": 0.4881999969482422, + "IoU.signboard": 0.3579000091552734, + "IoU.chest of drawers": 0.3363999938964844, + "IoU.counter": 0.322599983215332, + "IoU.sand": 0.5252000045776367, + "IoU.sink": 0.7225, + "IoU.skyscraper": 0.49849998474121093, + "IoU.fireplace": 0.7256999969482422, + "IoU.refrigerator": 0.7241000366210938, + "IoU.grandstand": 0.510099983215332, + "IoU.path": 0.241200008392334, + "IoU.stairs": 0.27280000686645506, + "IoU.runway": 0.730999984741211, + "IoU.case": 0.5513000106811523, + "IoU.pool table": 0.9202999877929687, + "IoU.pillow": 0.5877999877929687, + "IoU.screen door": 0.6775, + "IoU.stairway": 0.3309000015258789, + "IoU.river": 0.16899999618530273, + "IoU.bridge": 0.5666999816894531, + "IoU.bookcase": 0.32310001373291014, + "IoU.blind": 0.4275, + "IoU.coffee table": 0.5775, + "IoU.toilet": 0.8113999938964844, + "IoU.flower": 0.3813000106811523, + "IoU.book": 0.4688999938964844, + "IoU.hill": 0.130600004196167, + "IoU.bench": 0.5193999862670898, + "IoU.countertop": 0.5793999862670899, + "IoU.stove": 0.7322000122070312, + "IoU.palm": 0.5315999984741211, + "IoU.kitchen island": 0.46130001068115234, + "IoU.computer": 0.7555000305175781, + "IoU.swivel chair": 0.5722999954223633, + "IoU.boat": 0.6556999969482422, + "IoU.bar": 0.571500015258789, + "IoU.arcade machine": 0.8022000122070313, + "IoU.hovel": 0.5197999954223633, + "IoU.bus": 0.9026999664306641, + "IoU.towel": 0.6393999862670898, + "IoU.light": 0.4518000030517578, + "IoU.truck": 0.36279998779296874, + "IoU.tower": 0.32060001373291014, + "IoU.chandelier": 0.6444000244140625, + "IoU.awning": 0.30739999771118165, + "IoU.streetlight": 0.22209999084472656, + "IoU.booth": 0.395, + "IoU.television receiver": 0.6981999969482422, + "IoU.airplane": 0.6461000061035156, + "IoU.dirt track": 0.007699999809265137, + "IoU.apparel": 0.33220001220703127, + "IoU.pole": 0.16850000381469726, + "IoU.land": 0.03619999885559082, + "IoU.bannister": 0.1459000015258789, + "IoU.escalator": 0.5393000030517578, + "IoU.ottoman": 0.4981999969482422, + "IoU.bottle": 0.3434000015258789, + "IoU.buffet": 0.5445000076293945, + "IoU.poster": 0.26059999465942385, + "IoU.stage": 0.17639999389648436, + "IoU.van": 0.3890999984741211, + "IoU.ship": 0.27290000915527346, + "IoU.fountain": 0.2825, + "IoU.conveyer belt": 0.7273000335693359, + "IoU.canopy": 0.26780000686645505, + "IoU.washer": 0.7418000030517579, + "IoU.plaything": 0.4047999954223633, + "IoU.swimming pool": 0.7163999938964843, + "IoU.stool": 0.3877000045776367, + "IoU.barrel": 0.4936999893188477, + "IoU.basket": 0.3615999984741211, + "IoU.waterfall": 0.5047000122070312, + "IoU.tent": 0.9147000122070312, + "IoU.bag": 0.17819999694824218, + "IoU.minibike": 0.7327999877929687, + "IoU.cradle": 0.8158999633789062, + "IoU.oven": 0.33049999237060546, + "IoU.ball": 0.5311999893188477, + "IoU.food": 0.577599983215332, + "IoU.step": 0.11039999961853027, + "IoU.tank": 0.5668999862670898, + "IoU.trade name": 0.21239999771118165, + "IoU.microwave": 0.7616999816894531, + "IoU.pot": 0.5036000061035156, + "IoU.animal": 0.6733999633789063, + "IoU.bicycle": 0.5929000091552734, + "IoU.lake": 0.5845000076293946, + "IoU.dishwasher": 0.6783000183105469, + "IoU.screen": 0.5604999923706054, + "IoU.blanket": 0.19700000762939454, + "IoU.sculpture": 0.7222000122070312, + "IoU.hood": 0.5406999969482422, + "IoU.sconce": 0.3686000061035156, + "IoU.vase": 0.3647999954223633, + "IoU.traffic light": 0.30870000839233397, + "IoU.tray": 0.12300000190734864, + "IoU.ashcan": 0.40950000762939454, + "IoU.fan": 0.5468000030517578, + "IoU.pier": 0.23200000762939454, + "IoU.crt screen": 0.043299999237060544, + "IoU.plate": 0.5127999877929688, + "IoU.monitor": 0.2343000030517578, + "IoU.bulletin board": 0.5079000091552734, + "IoU.shower": 0.013200000524520875, + "IoU.radiator": 0.6281000137329101, + "IoU.glass": 0.1743000030517578, + "IoU.clock": 0.3693000030517578, + "IoU.flag": 0.5120000076293946, + "Acc.wall": 0.879000015258789, + "Acc.building": 0.9345999908447266, + "Acc.sky": 0.9643000030517578, + "Acc.floor": 0.8955999755859375, + "Acc.tree": 0.8913999938964844, + "Acc.ceiling": 0.9059999847412109, + "Acc.road": 0.8987999725341796, + "Acc.bed ": 0.9683000183105469, + "Acc.windowpane": 0.7841999816894532, + "Acc.grass": 0.8048000335693359, + "Acc.cabinet": 0.7544999694824219, + "Acc.sidewalk": 0.810199966430664, + "Acc.person": 0.9316000366210937, + "Acc.earth": 0.5481999969482422, + "Acc.door": 0.6762000274658203, + "Acc.table": 0.7612999725341797, + "Acc.mountain": 0.7119000244140625, + "Acc.plant": 0.5916999816894531, + "Acc.curtain": 0.8779000091552734, + "Acc.chair": 0.725999984741211, + "Acc.car": 0.9330000305175781, + "Acc.water": 0.7306999969482422, + "Acc.painting": 0.8805999755859375, + "Acc.sofa": 0.8411000061035157, + "Acc.shelf": 0.5581999969482422, + "Acc.house": 0.650199966430664, + "Acc.sea": 0.8466000366210937, + "Acc.mirror": 0.8244000244140625, + "Acc.rug": 0.803499984741211, + "Acc.field": 0.590900001525879, + "Acc.armchair": 0.6751000213623047, + "Acc.seat": 0.8552999877929688, + "Acc.fence": 0.6465000152587891, + "Acc.desk": 0.7608000183105469, + "Acc.rock": 0.7180000305175781, + "Acc.wardrobe": 0.732699966430664, + "Acc.lamp": 0.7641000366210937, + "Acc.bathtub": 0.9012999725341797, + "Acc.railing": 0.49740001678466794, + "Acc.cushion": 0.7376000213623047, + "Acc.base": 0.6277999877929688, + "Acc.box": 0.3575, + "Acc.column": 0.5947000122070313, + "Acc.signboard": 0.45419998168945314, + "Acc.chest of drawers": 0.6347000122070312, + "Acc.counter": 0.40759998321533203, + "Acc.sand": 0.7651000213623047, + "Acc.sink": 0.7875, + "Acc.skyscraper": 0.6243000030517578, + "Acc.fireplace": 0.9259999847412109, + "Acc.refrigerator": 0.8231999969482422, + "Acc.grandstand": 0.7270999908447265, + "Acc.path": 0.3554000091552734, + "Acc.stairs": 0.3788000106811523, + "Acc.runway": 0.9611000061035156, + "Acc.case": 0.6961000061035156, + "Acc.pool table": 0.98, + "Acc.pillow": 0.6841000366210938, + "Acc.screen door": 0.7616000366210938, + "Acc.stairway": 0.4993000030517578, + "Acc.river": 0.3472999954223633, + "Acc.bridge": 0.6704000091552734, + "Acc.bookcase": 0.5270000076293946, + "Acc.blind": 0.4779000091552734, + "Acc.coffee table": 0.8627999877929687, + "Acc.toilet": 0.9077999877929688, + "Acc.flower": 0.5402999877929687, + "Acc.book": 0.6798999786376954, + "Acc.hill": 0.2493000030517578, + "Acc.bench": 0.6111999893188477, + "Acc.countertop": 0.7236000061035156, + "Acc.stove": 0.865, + "Acc.palm": 0.7483999633789062, + "Acc.kitchen island": 0.7558000183105469, + "Acc.computer": 0.9066000366210938, + "Acc.swivel chair": 0.7731999969482422, + "Acc.boat": 0.86, + "Acc.bar": 0.7275, + "Acc.arcade machine": 0.8818000030517578, + "Acc.hovel": 0.562599983215332, + "Acc.bus": 0.9523000335693359, + "Acc.towel": 0.8015000152587891, + "Acc.light": 0.5697999954223633, + "Acc.truck": 0.48150001525878905, + "Acc.tower": 0.5127999877929688, + "Acc.chandelier": 0.8088999938964844, + "Acc.awning": 0.3515999984741211, + "Acc.streetlight": 0.34580001831054685, + "Acc.booth": 0.43790000915527344, + "Acc.television receiver": 0.8177999877929687, + "Acc.airplane": 0.7061000061035156, + "Acc.dirt track": 0.035499999523162844, + "Acc.apparel": 0.4333000183105469, + "Acc.pole": 0.22079999923706053, + "Acc.land": 0.07530000209808349, + "Acc.bannister": 0.20260000228881836, + "Acc.escalator": 0.7875, + "Acc.ottoman": 0.7119000244140625, + "Acc.bottle": 0.5252000045776367, + "Acc.buffet": 0.7180000305175781, + "Acc.poster": 0.316200008392334, + "Acc.stage": 0.4161000061035156, + "Acc.van": 0.47830001831054686, + "Acc.ship": 0.2815999984741211, + "Acc.fountain": 0.29069999694824217, + "Acc.conveyer belt": 0.949800033569336, + "Acc.canopy": 0.2982999992370605, + "Acc.washer": 0.7893000030517578, + "Acc.plaything": 0.5691999816894531, + "Acc.swimming pool": 0.8986000061035156, + "Acc.stool": 0.5545000076293946, + "Acc.barrel": 0.6509999847412109, + "Acc.basket": 0.517400016784668, + "Acc.waterfall": 0.7401000213623047, + "Acc.tent": 0.9891999816894531, + "Acc.bag": 0.2015999984741211, + "Acc.minibike": 0.825, + "Acc.cradle": 0.9730999755859375, + "Acc.oven": 0.48709999084472655, + "Acc.ball": 0.6243000030517578, + "Acc.food": 0.6284999847412109, + "Acc.step": 0.15350000381469728, + "Acc.tank": 0.6587000274658203, + "Acc.trade name": 0.2281999969482422, + "Acc.microwave": 0.8463999938964843, + "Acc.pot": 0.6125, + "Acc.animal": 0.7104000091552735, + "Acc.bicycle": 0.7727999877929688, + "Acc.lake": 0.7780999755859375, + "Acc.dishwasher": 0.7976000213623047, + "Acc.screen": 0.7420999908447266, + "Acc.blanket": 0.2475, + "Acc.sculpture": 0.8225, + "Acc.hood": 0.6940000152587891, + "Acc.sconce": 0.4791999816894531, + "Acc.vase": 0.5654000091552734, + "Acc.traffic light": 0.5168999862670899, + "Acc.tray": 0.18239999771118165, + "Acc.ashcan": 0.5661000061035156, + "Acc.fan": 0.7672000122070313, + "Acc.pier": 0.4386000061035156, + "Acc.crt screen": 0.11140000343322753, + "Acc.plate": 0.7356999969482422, + "Acc.monitor": 0.2809000015258789, + "Acc.bulletin board": 0.704800033569336, + "Acc.shower": 0.05, + "Acc.radiator": 0.7506999969482422, + "Acc.glass": 0.19079999923706054, + "Acc.clock": 0.45619998931884764, + "Acc.flag": 0.585999984741211 + } + }, + "2": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8358, + "mIoU": 0.5075999999999999, + "mAcc": 0.6374, + "IoU.wall": 0.7818000030517578, + "IoU.building": 0.8308999633789063, + "IoU.sky": 0.9376000213623047, + "IoU.floor": 0.8225, + "IoU.tree": 0.7412999725341797, + "IoU.ceiling": 0.8387000274658203, + "IoU.road": 0.8325, + "IoU.bed ": 0.9015000152587891, + "IoU.windowpane": 0.6327999877929688, + "IoU.grass": 0.6731999969482422, + "IoU.cabinet": 0.6229999923706054, + "IoU.sidewalk": 0.6604000091552734, + "IoU.person": 0.808499984741211, + "IoU.earth": 0.375, + "IoU.door": 0.5147999954223633, + "IoU.table": 0.605099983215332, + "IoU.mountain": 0.5618999862670898, + "IoU.plant": 0.49639999389648437, + "IoU.curtain": 0.7501000213623047, + "IoU.chair": 0.579900016784668, + "IoU.car": 0.8462999725341797, + "IoU.water": 0.5990999984741211, + "IoU.painting": 0.7194000244140625, + "IoU.sofa": 0.6962999725341796, + "IoU.shelf": 0.42259998321533204, + "IoU.house": 0.49080001831054687, + "IoU.sea": 0.6745999908447265, + "IoU.mirror": 0.7043000030517578, + "IoU.rug": 0.6751000213623047, + "IoU.field": 0.39029998779296876, + "IoU.armchair": 0.46029998779296877, + "IoU.seat": 0.6441000366210937, + "IoU.fence": 0.48060001373291017, + "IoU.desk": 0.49380001068115237, + "IoU.rock": 0.5043000030517578, + "IoU.wardrobe": 0.5684000015258789, + "IoU.lamp": 0.580999984741211, + "IoU.bathtub": 0.8697000122070313, + "IoU.railing": 0.38869998931884764, + "IoU.cushion": 0.6056999969482422, + "IoU.base": 0.337599983215332, + "IoU.box": 0.28049999237060547, + "IoU.column": 0.49779998779296875, + "IoU.signboard": 0.3536000061035156, + "IoU.chest of drawers": 0.3268999862670898, + "IoU.counter": 0.3215000152587891, + "IoU.sand": 0.5393999862670898, + "IoU.sink": 0.7188999938964844, + "IoU.skyscraper": 0.5031999969482421, + "IoU.fireplace": 0.7337000274658203, + "IoU.refrigerator": 0.725999984741211, + "IoU.grandstand": 0.5036999893188476, + "IoU.path": 0.2352000045776367, + "IoU.stairs": 0.27430000305175783, + "IoU.runway": 0.7380999755859375, + "IoU.case": 0.5497999954223632, + "IoU.pool table": 0.9173999786376953, + "IoU.pillow": 0.5915999984741211, + "IoU.screen door": 0.6694999694824219, + "IoU.stairway": 0.3340999984741211, + "IoU.river": 0.18229999542236328, + "IoU.bridge": 0.6070999908447265, + "IoU.bookcase": 0.3218000030517578, + "IoU.blind": 0.42790000915527343, + "IoU.coffee table": 0.5811000061035156, + "IoU.toilet": 0.7980000305175782, + "IoU.flower": 0.3693000030517578, + "IoU.book": 0.46369998931884765, + "IoU.hill": 0.13739999771118164, + "IoU.bench": 0.5081999969482421, + "IoU.countertop": 0.594900016784668, + "IoU.stove": 0.7251999664306641, + "IoU.palm": 0.5131000137329101, + "IoU.kitchen island": 0.4615000152587891, + "IoU.computer": 0.7586000061035156, + "IoU.swivel chair": 0.5377000045776367, + "IoU.boat": 0.6444999694824218, + "IoU.bar": 0.5661999893188476, + "IoU.arcade machine": 0.773499984741211, + "IoU.hovel": 0.49700000762939456, + "IoU.bus": 0.9008999633789062, + "IoU.towel": 0.6297000122070312, + "IoU.light": 0.44459999084472657, + "IoU.truck": 0.2756999969482422, + "IoU.tower": 0.31739999771118166, + "IoU.chandelier": 0.6395000076293945, + "IoU.awning": 0.29329999923706057, + "IoU.streetlight": 0.21829999923706056, + "IoU.booth": 0.41139999389648435, + "IoU.television receiver": 0.7030999755859375, + "IoU.airplane": 0.6429000091552735, + "IoU.dirt track": 0.009599999785423278, + "IoU.apparel": 0.3345999908447266, + "IoU.pole": 0.16030000686645507, + "IoU.land": 0.03490000009536743, + "IoU.bannister": 0.14920000076293946, + "IoU.escalator": 0.514000015258789, + "IoU.ottoman": 0.4933000183105469, + "IoU.bottle": 0.33299999237060546, + "IoU.buffet": 0.5163999938964844, + "IoU.poster": 0.25129999160766603, + "IoU.stage": 0.1815999984741211, + "IoU.van": 0.4079000091552734, + "IoU.ship": 0.3706999969482422, + "IoU.fountain": 0.2588999938964844, + "IoU.conveyer belt": 0.7380000305175781, + "IoU.canopy": 0.2747999954223633, + "IoU.washer": 0.7036000061035156, + "IoU.plaything": 0.2593000030517578, + "IoU.swimming pool": 0.6901999664306641, + "IoU.stool": 0.40560001373291016, + "IoU.barrel": 0.49040000915527343, + "IoU.basket": 0.34970001220703123, + "IoU.waterfall": 0.5022000122070313, + "IoU.tent": 0.8758999633789063, + "IoU.bag": 0.18329999923706056, + "IoU.minibike": 0.7120999908447265, + "IoU.cradle": 0.8420999908447265, + "IoU.oven": 0.3164999961853027, + "IoU.ball": 0.5379999923706055, + "IoU.food": 0.5475, + "IoU.step": 0.11390000343322754, + "IoU.tank": 0.5804999923706055, + "IoU.trade name": 0.21850000381469725, + "IoU.microwave": 0.7494999694824219, + "IoU.pot": 0.48779998779296874, + "IoU.animal": 0.6725, + "IoU.bicycle": 0.5941999816894531, + "IoU.lake": 0.5827999877929687, + "IoU.dishwasher": 0.6776000213623047, + "IoU.screen": 0.5261999893188477, + "IoU.blanket": 0.185, + "IoU.sculpture": 0.677300033569336, + "IoU.hood": 0.5252000045776367, + "IoU.sconce": 0.36880001068115237, + "IoU.vase": 0.36450000762939455, + "IoU.traffic light": 0.29209999084472654, + "IoU.tray": 0.11489999771118165, + "IoU.ashcan": 0.37209999084472656, + "IoU.fan": 0.5511000061035156, + "IoU.pier": 0.20120000839233398, + "IoU.crt screen": 0.08489999771118165, + "IoU.plate": 0.509000015258789, + "IoU.monitor": 0.2725, + "IoU.bulletin board": 0.44970001220703126, + "IoU.shower": 0.011499999761581422, + "IoU.radiator": 0.6361000061035156, + "IoU.glass": 0.1722999954223633, + "IoU.clock": 0.35380001068115235, + "IoU.flag": 0.472400016784668, + "Acc.wall": 0.8761000061035156, + "Acc.building": 0.9330000305175781, + "Acc.sky": 0.9620999908447265, + "Acc.floor": 0.8941000366210937, + "Acc.tree": 0.8930999755859375, + "Acc.ceiling": 0.9031999969482422, + "Acc.road": 0.8930000305175781, + "Acc.bed ": 0.967699966430664, + "Acc.windowpane": 0.7887999725341797, + "Acc.grass": 0.816500015258789, + "Acc.cabinet": 0.7526000213623046, + "Acc.sidewalk": 0.8130000305175781, + "Acc.person": 0.9312999725341797, + "Acc.earth": 0.5233000183105468, + "Acc.door": 0.6708000183105469, + "Acc.table": 0.7616999816894531, + "Acc.mountain": 0.6927999877929687, + "Acc.plant": 0.5856000137329102, + "Acc.curtain": 0.8775, + "Acc.chair": 0.7305999755859375, + "Acc.car": 0.9315000152587891, + "Acc.water": 0.7280000305175781, + "Acc.painting": 0.8787999725341797, + "Acc.sofa": 0.8398999786376953, + "Acc.shelf": 0.5568000030517578, + "Acc.house": 0.6320999908447266, + "Acc.sea": 0.8618000030517579, + "Acc.mirror": 0.8226000213623047, + "Acc.rug": 0.8120999908447266, + "Acc.field": 0.5875, + "Acc.armchair": 0.6368999862670899, + "Acc.seat": 0.8548999786376953, + "Acc.fence": 0.6463999938964844, + "Acc.desk": 0.7508000183105469, + "Acc.rock": 0.7083000183105469, + "Acc.wardrobe": 0.7491000366210937, + "Acc.lamp": 0.7623999786376953, + "Acc.bathtub": 0.9076999664306641, + "Acc.railing": 0.494900016784668, + "Acc.cushion": 0.7333999633789062, + "Acc.base": 0.6138000106811523, + "Acc.box": 0.36529998779296874, + "Acc.column": 0.6052999877929688, + "Acc.signboard": 0.4465999984741211, + "Acc.chest of drawers": 0.6227999877929687, + "Acc.counter": 0.41200000762939454, + "Acc.sand": 0.7709999847412109, + "Acc.sink": 0.7880999755859375, + "Acc.skyscraper": 0.6016999816894532, + "Acc.fireplace": 0.9204000091552734, + "Acc.refrigerator": 0.8277999877929687, + "Acc.grandstand": 0.7227999877929687, + "Acc.path": 0.35080001831054686, + "Acc.stairs": 0.38099998474121094, + "Acc.runway": 0.9625, + "Acc.case": 0.6922000122070312, + "Acc.pool table": 0.9795999908447266, + "Acc.pillow": 0.6919000244140625, + "Acc.screen door": 0.7423000335693359, + "Acc.stairway": 0.5002000045776367, + "Acc.river": 0.3584000015258789, + "Acc.bridge": 0.7137999725341797, + "Acc.bookcase": 0.5263999938964844, + "Acc.blind": 0.475, + "Acc.coffee table": 0.8572000122070312, + "Acc.toilet": 0.9069999694824219, + "Acc.flower": 0.549099998474121, + "Acc.book": 0.6711000061035156, + "Acc.hill": 0.26440000534057617, + "Acc.bench": 0.6031999969482422, + "Acc.countertop": 0.7225, + "Acc.stove": 0.855999984741211, + "Acc.palm": 0.7066000366210937, + "Acc.kitchen island": 0.7655000305175781, + "Acc.computer": 0.9058000183105469, + "Acc.swivel chair": 0.7083999633789062, + "Acc.boat": 0.8381999969482422, + "Acc.bar": 0.7263999938964844, + "Acc.arcade machine": 0.8401999664306641, + "Acc.hovel": 0.5377000045776367, + "Acc.bus": 0.9547000122070313, + "Acc.towel": 0.8020999908447266, + "Acc.light": 0.5697999954223633, + "Acc.truck": 0.36759998321533205, + "Acc.tower": 0.5322999954223633, + "Acc.chandelier": 0.8130000305175781, + "Acc.awning": 0.34330001831054685, + "Acc.streetlight": 0.34189998626708984, + "Acc.booth": 0.4525, + "Acc.television receiver": 0.8230000305175781, + "Acc.airplane": 0.7055000305175781, + "Acc.dirt track": 0.04519999980926514, + "Acc.apparel": 0.42529998779296874, + "Acc.pole": 0.206200008392334, + "Acc.land": 0.06579999923706055, + "Acc.bannister": 0.20709999084472655, + "Acc.escalator": 0.7362000274658204, + "Acc.ottoman": 0.7034999847412109, + "Acc.bottle": 0.5129999923706055, + "Acc.buffet": 0.6854000091552734, + "Acc.poster": 0.29520000457763673, + "Acc.stage": 0.4341999816894531, + "Acc.van": 0.49340000152587893, + "Acc.ship": 0.3938999938964844, + "Acc.fountain": 0.2665999984741211, + "Acc.conveyer belt": 0.9455999755859374, + "Acc.canopy": 0.30620000839233397, + "Acc.washer": 0.7591000366210937, + "Acc.plaything": 0.3833000183105469, + "Acc.swimming pool": 0.8926000213623047, + "Acc.stool": 0.5654999923706054, + "Acc.barrel": 0.6558999633789062, + "Acc.basket": 0.5079999923706054, + "Acc.waterfall": 0.7369999694824219, + "Acc.tent": 0.9895999908447266, + "Acc.bag": 0.20690000534057618, + "Acc.minibike": 0.8273000335693359, + "Acc.cradle": 0.9730000305175781, + "Acc.oven": 0.48150001525878905, + "Acc.ball": 0.6370999908447266, + "Acc.food": 0.6084999847412109, + "Acc.step": 0.15720000267028808, + "Acc.tank": 0.6568000030517578, + "Acc.trade name": 0.235, + "Acc.microwave": 0.8337000274658203, + "Acc.pot": 0.5968999862670898, + "Acc.animal": 0.7080000305175781, + "Acc.bicycle": 0.7754000091552734, + "Acc.lake": 0.7726000213623047, + "Acc.dishwasher": 0.7929000091552735, + "Acc.screen": 0.7284999847412109, + "Acc.blanket": 0.23290000915527342, + "Acc.sculpture": 0.7729000091552735, + "Acc.hood": 0.6777999877929688, + "Acc.sconce": 0.4734999847412109, + "Acc.vase": 0.5475, + "Acc.traffic light": 0.5008000183105469, + "Acc.tray": 0.17540000915527343, + "Acc.ashcan": 0.5209999847412109, + "Acc.fan": 0.7686000061035156, + "Acc.pier": 0.4531999969482422, + "Acc.crt screen": 0.19840000152587892, + "Acc.plate": 0.7355999755859375, + "Acc.monitor": 0.3306999969482422, + "Acc.bulletin board": 0.6806999969482422, + "Acc.shower": 0.05, + "Acc.radiator": 0.7355999755859375, + "Acc.glass": 0.1890999984741211, + "Acc.clock": 0.4165999984741211, + "Acc.flag": 0.5454999923706054 + } + }, + "3": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8355, + "mIoU": 0.5069, + "mAcc": 0.6377, + "IoU.wall": 0.7813999938964844, + "IoU.building": 0.8294000244140625, + "IoU.sky": 0.9387999725341797, + "IoU.floor": 0.8176999664306641, + "IoU.tree": 0.7441000366210937, + "IoU.ceiling": 0.8381999969482422, + "IoU.road": 0.8351000213623047, + "IoU.bed ": 0.9038999938964843, + "IoU.windowpane": 0.6304999923706055, + "IoU.grass": 0.6644000244140625, + "IoU.cabinet": 0.625, + "IoU.sidewalk": 0.6630999755859375, + "IoU.person": 0.8086000061035157, + "IoU.earth": 0.37990001678466795, + "IoU.door": 0.5204999923706055, + "IoU.table": 0.6036999893188476, + "IoU.mountain": 0.5761999893188476, + "IoU.plant": 0.5013000106811524, + "IoU.curtain": 0.7545999908447265, + "IoU.chair": 0.5743000030517578, + "IoU.car": 0.844800033569336, + "IoU.water": 0.6134000015258789, + "IoU.painting": 0.7173999786376953, + "IoU.sofa": 0.7038999938964844, + "IoU.shelf": 0.42529998779296874, + "IoU.house": 0.5, + "IoU.sea": 0.6838999938964844, + "IoU.mirror": 0.6980000305175781, + "IoU.rug": 0.66, + "IoU.field": 0.3706999969482422, + "IoU.armchair": 0.46349998474121096, + "IoU.seat": 0.6455000305175781, + "IoU.fence": 0.4790999984741211, + "IoU.desk": 0.4972999954223633, + "IoU.rock": 0.5002999877929688, + "IoU.wardrobe": 0.5915999984741211, + "IoU.lamp": 0.5793999862670899, + "IoU.bathtub": 0.8584999847412109, + "IoU.railing": 0.38669998168945313, + "IoU.cushion": 0.6068000030517579, + "IoU.base": 0.33529998779296877, + "IoU.box": 0.26110000610351564, + "IoU.column": 0.49139999389648437, + "IoU.signboard": 0.3502000045776367, + "IoU.chest of drawers": 0.3413999938964844, + "IoU.counter": 0.3363999938964844, + "IoU.sand": 0.5399000167846679, + "IoU.sink": 0.721500015258789, + "IoU.skyscraper": 0.5222999954223633, + "IoU.fireplace": 0.7458999633789063, + "IoU.refrigerator": 0.7311000061035157, + "IoU.grandstand": 0.5181999969482421, + "IoU.path": 0.23819999694824218, + "IoU.stairs": 0.26540000915527345, + "IoU.runway": 0.6551000213623047, + "IoU.case": 0.555, + "IoU.pool table": 0.9108999633789062, + "IoU.pillow": 0.5891999816894531, + "IoU.screen door": 0.6495999908447265, + "IoU.stairway": 0.30809999465942384, + "IoU.river": 0.14979999542236327, + "IoU.bridge": 0.6145999908447266, + "IoU.bookcase": 0.3218999862670898, + "IoU.blind": 0.4341999816894531, + "IoU.coffee table": 0.5752999877929688, + "IoU.toilet": 0.7902999877929687, + "IoU.flower": 0.39930000305175783, + "IoU.book": 0.46650001525878904, + "IoU.hill": 0.14010000228881836, + "IoU.bench": 0.48970001220703124, + "IoU.countertop": 0.6097000122070313, + "IoU.stove": 0.7372000122070312, + "IoU.palm": 0.510099983215332, + "IoU.kitchen island": 0.45529998779296876, + "IoU.computer": 0.7654000091552734, + "IoU.swivel chair": 0.5538999938964844, + "IoU.boat": 0.6347999954223633, + "IoU.bar": 0.5697999954223633, + "IoU.arcade machine": 0.7862000274658203, + "IoU.hovel": 0.5483000183105469, + "IoU.bus": 0.9058000183105469, + "IoU.towel": 0.6111999893188477, + "IoU.light": 0.45540000915527346, + "IoU.truck": 0.27049999237060546, + "IoU.tower": 0.2904999923706055, + "IoU.chandelier": 0.6433000183105468, + "IoU.awning": 0.30139999389648436, + "IoU.streetlight": 0.213700008392334, + "IoU.booth": 0.4234999847412109, + "IoU.television receiver": 0.7076000213623047, + "IoU.airplane": 0.6598999786376953, + "IoU.dirt track": 0.013700000047683715, + "IoU.apparel": 0.3559999847412109, + "IoU.pole": 0.1621999931335449, + "IoU.land": 0.04840000152587891, + "IoU.bannister": 0.14949999809265135, + "IoU.escalator": 0.51, + "IoU.ottoman": 0.4809999847412109, + "IoU.bottle": 0.3259000015258789, + "IoU.buffet": 0.4886000061035156, + "IoU.poster": 0.3231999969482422, + "IoU.stage": 0.193799991607666, + "IoU.van": 0.41509998321533204, + "IoU.ship": 0.2331999969482422, + "IoU.fountain": 0.2822999954223633, + "IoU.conveyer belt": 0.7383999633789062, + "IoU.canopy": 0.25510000228881835, + "IoU.washer": 0.7173999786376953, + "IoU.plaything": 0.26920000076293943, + "IoU.swimming pool": 0.6375, + "IoU.stool": 0.3972999954223633, + "IoU.barrel": 0.48009998321533204, + "IoU.basket": 0.35959999084472655, + "IoU.waterfall": 0.49009998321533205, + "IoU.tent": 0.8059999847412109, + "IoU.bag": 0.17659999847412108, + "IoU.minibike": 0.7081999969482422, + "IoU.cradle": 0.8306999969482421, + "IoU.oven": 0.30010000228881833, + "IoU.ball": 0.5527000045776367, + "IoU.food": 0.5531000137329102, + "IoU.step": 0.11170000076293946, + "IoU.tank": 0.5720000076293945, + "IoU.trade name": 0.20709999084472655, + "IoU.microwave": 0.7208000183105469, + "IoU.pot": 0.5070000076293946, + "IoU.animal": 0.6605999755859375, + "IoU.bicycle": 0.5841999816894531, + "IoU.lake": 0.6179000091552734, + "IoU.dishwasher": 0.654000015258789, + "IoU.screen": 0.5615999984741211, + "IoU.blanket": 0.181200008392334, + "IoU.sculpture": 0.7191999816894531, + "IoU.hood": 0.5370999908447266, + "IoU.sconce": 0.36450000762939455, + "IoU.vase": 0.3659999847412109, + "IoU.traffic light": 0.2880999946594238, + "IoU.tray": 0.12140000343322754, + "IoU.ashcan": 0.3604000091552734, + "IoU.fan": 0.5545000076293946, + "IoU.pier": 0.22190000534057616, + "IoU.crt screen": 0.08069999694824219, + "IoU.plate": 0.515, + "IoU.monitor": 0.2402000045776367, + "IoU.bulletin board": 0.47369998931884766, + "IoU.shower": 0.009399999976158141, + "IoU.radiator": 0.637400016784668, + "IoU.glass": 0.17030000686645508, + "IoU.clock": 0.38369998931884763, + "IoU.flag": 0.4458000183105469, + "Acc.wall": 0.8763999938964844, + "Acc.building": 0.9318000030517578, + "Acc.sky": 0.9636000061035156, + "Acc.floor": 0.8918000030517578, + "Acc.tree": 0.8908999633789062, + "Acc.ceiling": 0.9048000335693359, + "Acc.road": 0.8941000366210937, + "Acc.bed ": 0.9693000030517578, + "Acc.windowpane": 0.7875, + "Acc.grass": 0.8013999938964844, + "Acc.cabinet": 0.7477999877929687, + "Acc.sidewalk": 0.8136000061035156, + "Acc.person": 0.9280000305175782, + "Acc.earth": 0.5368999862670898, + "Acc.door": 0.6737000274658204, + "Acc.table": 0.7580000305175781, + "Acc.mountain": 0.7012999725341796, + "Acc.plant": 0.5870999908447265, + "Acc.curtain": 0.8769999694824219, + "Acc.chair": 0.7198999786376953, + "Acc.car": 0.9301000213623047, + "Acc.water": 0.7512999725341797, + "Acc.painting": 0.8763999938964844, + "Acc.sofa": 0.8468000030517578, + "Acc.shelf": 0.564900016784668, + "Acc.house": 0.6423999786376953, + "Acc.sea": 0.8508999633789063, + "Acc.mirror": 0.8127999877929688, + "Acc.rug": 0.8022000122070313, + "Acc.field": 0.6068000030517579, + "Acc.armchair": 0.6468000030517578, + "Acc.seat": 0.8551999664306641, + "Acc.fence": 0.6441999816894531, + "Acc.desk": 0.7545999908447265, + "Acc.rock": 0.6944999694824219, + "Acc.wardrobe": 0.7679000091552735, + "Acc.lamp": 0.759000015258789, + "Acc.bathtub": 0.8991999816894531, + "Acc.railing": 0.4893000030517578, + "Acc.cushion": 0.7283000183105469, + "Acc.base": 0.6186999893188476, + "Acc.box": 0.3402000045776367, + "Acc.column": 0.5893999862670899, + "Acc.signboard": 0.44299999237060544, + "Acc.chest of drawers": 0.6466000366210938, + "Acc.counter": 0.4265999984741211, + "Acc.sand": 0.7520999908447266, + "Acc.sink": 0.7905000305175781, + "Acc.skyscraper": 0.6306000137329102, + "Acc.fireplace": 0.9308000183105469, + "Acc.refrigerator": 0.8426000213623047, + "Acc.grandstand": 0.7216000366210937, + "Acc.path": 0.36529998779296874, + "Acc.stairs": 0.3740999984741211, + "Acc.runway": 0.8665000152587891, + "Acc.case": 0.705, + "Acc.pool table": 0.9766999816894532, + "Acc.pillow": 0.6898999786376954, + "Acc.screen door": 0.7256999969482422, + "Acc.stairway": 0.4638999938964844, + "Acc.river": 0.29329999923706057, + "Acc.bridge": 0.7234999847412109, + "Acc.bookcase": 0.52, + "Acc.blind": 0.48700000762939455, + "Acc.coffee table": 0.8516999816894532, + "Acc.toilet": 0.9081999969482422, + "Acc.flower": 0.5870999908447265, + "Acc.book": 0.6694000244140625, + "Acc.hill": 0.25559999465942385, + "Acc.bench": 0.5818999862670898, + "Acc.countertop": 0.7390000152587891, + "Acc.stove": 0.8637000274658203, + "Acc.palm": 0.7131999969482422, + "Acc.kitchen island": 0.7691000366210937, + "Acc.computer": 0.9091000366210937, + "Acc.swivel chair": 0.7362999725341797, + "Acc.boat": 0.8625, + "Acc.bar": 0.7404000091552735, + "Acc.arcade machine": 0.8527999877929687, + "Acc.hovel": 0.5977000045776367, + "Acc.bus": 0.9588999938964844, + "Acc.towel": 0.788499984741211, + "Acc.light": 0.5806000137329101, + "Acc.truck": 0.3547999954223633, + "Acc.tower": 0.5334000015258789, + "Acc.chandelier": 0.8198999786376953, + "Acc.awning": 0.3427000045776367, + "Acc.streetlight": 0.33529998779296877, + "Acc.booth": 0.45680000305175783, + "Acc.television receiver": 0.8280999755859375, + "Acc.airplane": 0.7222000122070312, + "Acc.dirt track": 0.06920000076293946, + "Acc.apparel": 0.4559000015258789, + "Acc.pole": 0.20659999847412108, + "Acc.land": 0.09930000305175782, + "Acc.bannister": 0.20989999771118165, + "Acc.escalator": 0.7295999908447266, + "Acc.ottoman": 0.6975, + "Acc.bottle": 0.5036000061035156, + "Acc.buffet": 0.6522000122070313, + "Acc.poster": 0.3741999816894531, + "Acc.stage": 0.4527000045776367, + "Acc.van": 0.500099983215332, + "Acc.ship": 0.24309999465942383, + "Acc.fountain": 0.2909000015258789, + "Acc.conveyer belt": 0.9512000274658203, + "Acc.canopy": 0.2725, + "Acc.washer": 0.7694999694824218, + "Acc.plaything": 0.3895999908447266, + "Acc.swimming pool": 0.8868000030517578, + "Acc.stool": 0.5611999893188476, + "Acc.barrel": 0.6508999633789062, + "Acc.basket": 0.5083000183105468, + "Acc.waterfall": 0.7105999755859375, + "Acc.tent": 0.9894999694824219, + "Acc.bag": 0.20110000610351564, + "Acc.minibike": 0.8127999877929688, + "Acc.cradle": 0.9741999816894531, + "Acc.oven": 0.4779999923706055, + "Acc.ball": 0.6381000137329101, + "Acc.food": 0.6141999816894531, + "Acc.step": 0.15760000228881835, + "Acc.tank": 0.6537999725341797, + "Acc.trade name": 0.22170000076293944, + "Acc.microwave": 0.8029000091552735, + "Acc.pot": 0.6165000152587891, + "Acc.animal": 0.6933000183105469, + "Acc.bicycle": 0.7808000183105469, + "Acc.lake": 0.8108999633789062, + "Acc.dishwasher": 0.7809999847412109, + "Acc.screen": 0.7519000244140625, + "Acc.blanket": 0.23100000381469726, + "Acc.sculpture": 0.8123000335693359, + "Acc.hood": 0.6933000183105469, + "Acc.sconce": 0.47310001373291016, + "Acc.vase": 0.5604999923706054, + "Acc.traffic light": 0.48770000457763674, + "Acc.tray": 0.18579999923706056, + "Acc.ashcan": 0.5020000076293946, + "Acc.fan": 0.7712999725341797, + "Acc.pier": 0.44049999237060544, + "Acc.crt screen": 0.20180000305175783, + "Acc.plate": 0.7415000152587891, + "Acc.monitor": 0.2904999923706055, + "Acc.bulletin board": 0.705, + "Acc.shower": 0.05, + "Acc.radiator": 0.7469999694824219, + "Acc.glass": 0.18760000228881835, + "Acc.clock": 0.44290000915527344, + "Acc.flag": 0.5077000045776368 + } + }, + "4": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8340000000000001, + "mIoU": 0.4993, + "mAcc": 0.6288, + "IoU.wall": 0.7787999725341797, + "IoU.building": 0.8283999633789062, + "IoU.sky": 0.9381999969482422, + "IoU.floor": 0.8173000335693359, + "IoU.tree": 0.7451000213623047, + "IoU.ceiling": 0.8362000274658203, + "IoU.road": 0.8358000183105468, + "IoU.bed ": 0.9011000061035156, + "IoU.windowpane": 0.6277000045776367, + "IoU.grass": 0.6805999755859375, + "IoU.cabinet": 0.6222000122070312, + "IoU.sidewalk": 0.658499984741211, + "IoU.person": 0.8061000061035156, + "IoU.earth": 0.3758000183105469, + "IoU.door": 0.5165999984741211, + "IoU.table": 0.591500015258789, + "IoU.mountain": 0.571500015258789, + "IoU.plant": 0.5013999938964844, + "IoU.curtain": 0.7533000183105468, + "IoU.chair": 0.5631000137329102, + "IoU.car": 0.8418000030517578, + "IoU.water": 0.629900016784668, + "IoU.painting": 0.7131999969482422, + "IoU.sofa": 0.6997000122070313, + "IoU.shelf": 0.41200000762939454, + "IoU.house": 0.4672999954223633, + "IoU.sea": 0.6990000152587891, + "IoU.mirror": 0.6961000061035156, + "IoU.rug": 0.6619000244140625, + "IoU.field": 0.3431999969482422, + "IoU.armchair": 0.45240001678466796, + "IoU.seat": 0.6327000045776368, + "IoU.fence": 0.4627000045776367, + "IoU.desk": 0.49290000915527343, + "IoU.rock": 0.445099983215332, + "IoU.wardrobe": 0.6016999816894532, + "IoU.lamp": 0.5684000015258789, + "IoU.bathtub": 0.8612000274658204, + "IoU.railing": 0.3863999938964844, + "IoU.cushion": 0.5934999847412109, + "IoU.base": 0.3256999969482422, + "IoU.box": 0.26079999923706054, + "IoU.column": 0.48720001220703124, + "IoU.signboard": 0.3445999908447266, + "IoU.chest of drawers": 0.33549999237060546, + "IoU.counter": 0.34709999084472654, + "IoU.sand": 0.5193000030517578, + "IoU.sink": 0.7080999755859375, + "IoU.skyscraper": 0.5479999923706055, + "IoU.fireplace": 0.7426000213623047, + "IoU.refrigerator": 0.7504000091552734, + "IoU.grandstand": 0.515, + "IoU.path": 0.24770000457763672, + "IoU.stairs": 0.2811000061035156, + "IoU.runway": 0.6626999664306641, + "IoU.case": 0.5641999816894532, + "IoU.pool table": 0.9148000335693359, + "IoU.pillow": 0.5922999954223633, + "IoU.screen door": 0.638400001525879, + "IoU.stairway": 0.3164999961853027, + "IoU.river": 0.18729999542236328, + "IoU.bridge": 0.5904000091552735, + "IoU.bookcase": 0.32040000915527345, + "IoU.blind": 0.45029998779296876, + "IoU.coffee table": 0.5643000030517578, + "IoU.toilet": 0.7919999694824219, + "IoU.flower": 0.38880001068115233, + "IoU.book": 0.45680000305175783, + "IoU.hill": 0.1427999973297119, + "IoU.bench": 0.4711000061035156, + "IoU.countertop": 0.6072999954223632, + "IoU.stove": 0.7572000122070313, + "IoU.palm": 0.5252999877929687, + "IoU.kitchen island": 0.4375, + "IoU.computer": 0.7448000335693359, + "IoU.swivel chair": 0.5379000091552735, + "IoU.boat": 0.6345999908447265, + "IoU.bar": 0.5145999908447265, + "IoU.arcade machine": 0.8052999877929687, + "IoU.hovel": 0.4602000045776367, + "IoU.bus": 0.902300033569336, + "IoU.towel": 0.6258000183105469, + "IoU.light": 0.44669998168945313, + "IoU.truck": 0.23190000534057617, + "IoU.tower": 0.293700008392334, + "IoU.chandelier": 0.634900016784668, + "IoU.awning": 0.2951000022888184, + "IoU.streetlight": 0.2034000015258789, + "IoU.booth": 0.380099983215332, + "IoU.television receiver": 0.6480000305175782, + "IoU.airplane": 0.6190000152587891, + "IoU.dirt track": 0.03210000038146973, + "IoU.apparel": 0.35259998321533204, + "IoU.pole": 0.15939999580383302, + "IoU.land": 0.0325, + "IoU.bannister": 0.13270000457763673, + "IoU.escalator": 0.5177000045776368, + "IoU.ottoman": 0.49200000762939455, + "IoU.bottle": 0.3160000038146973, + "IoU.buffet": 0.44409999847412107, + "IoU.poster": 0.176200008392334, + "IoU.stage": 0.1775, + "IoU.van": 0.4006999969482422, + "IoU.ship": 0.21040000915527343, + "IoU.fountain": 0.2904000091552734, + "IoU.conveyer belt": 0.7279000091552734, + "IoU.canopy": 0.2823999977111816, + "IoU.washer": 0.702699966430664, + "IoU.plaything": 0.2393000030517578, + "IoU.swimming pool": 0.6805999755859375, + "IoU.stool": 0.3747999954223633, + "IoU.barrel": 0.47189998626708984, + "IoU.basket": 0.34889999389648435, + "IoU.waterfall": 0.4495000076293945, + "IoU.tent": 0.8716999816894532, + "IoU.bag": 0.18870000839233397, + "IoU.minibike": 0.7077999877929687, + "IoU.cradle": 0.819800033569336, + "IoU.oven": 0.3288999938964844, + "IoU.ball": 0.5215000152587891, + "IoU.food": 0.5247000122070312, + "IoU.step": 0.12, + "IoU.tank": 0.5668000030517578, + "IoU.trade name": 0.2027000045776367, + "IoU.microwave": 0.6645999908447265, + "IoU.pot": 0.48439998626708985, + "IoU.animal": 0.650199966430664, + "IoU.bicycle": 0.5693999862670899, + "IoU.lake": 0.6040000152587891, + "IoU.dishwasher": 0.5993999862670898, + "IoU.screen": 0.5515999984741211, + "IoU.blanket": 0.15619999885559083, + "IoU.sculpture": 0.675199966430664, + "IoU.hood": 0.5629999923706055, + "IoU.sconce": 0.3708000183105469, + "IoU.vase": 0.36380001068115236, + "IoU.traffic light": 0.2745999908447266, + "IoU.tray": 0.099399995803833, + "IoU.ashcan": 0.4006999969482422, + "IoU.fan": 0.5322999954223633, + "IoU.pier": 0.2343000030517578, + "IoU.crt screen": 0.07690000057220459, + "IoU.plate": 0.5034999847412109, + "IoU.monitor": 0.20600000381469727, + "IoU.bulletin board": 0.4818000030517578, + "IoU.shower": 0.009399999976158141, + "IoU.radiator": 0.6040000152587891, + "IoU.glass": 0.16709999084472657, + "IoU.clock": 0.3636000061035156, + "IoU.flag": 0.4366999816894531, + "Acc.wall": 0.8747000122070312, + "Acc.building": 0.9341000366210938, + "Acc.sky": 0.9625, + "Acc.floor": 0.893499984741211, + "Acc.tree": 0.8941000366210937, + "Acc.ceiling": 0.9038999938964843, + "Acc.road": 0.8944000244140625, + "Acc.bed ": 0.9680999755859375, + "Acc.windowpane": 0.7830999755859375, + "Acc.grass": 0.8286000061035156, + "Acc.cabinet": 0.7498000335693359, + "Acc.sidewalk": 0.8152999877929688, + "Acc.person": 0.9265000152587891, + "Acc.earth": 0.5365000152587891, + "Acc.door": 0.658499984741211, + "Acc.table": 0.7494999694824219, + "Acc.mountain": 0.7051999664306641, + "Acc.plant": 0.595, + "Acc.curtain": 0.875, + "Acc.chair": 0.7131999969482422, + "Acc.car": 0.9291000366210938, + "Acc.water": 0.7706999969482422, + "Acc.painting": 0.8706999969482422, + "Acc.sofa": 0.8437000274658203, + "Acc.shelf": 0.5479000091552735, + "Acc.house": 0.5920000076293945, + "Acc.sea": 0.8461000061035157, + "Acc.mirror": 0.8048999786376954, + "Acc.rug": 0.8018000030517578, + "Acc.field": 0.500999984741211, + "Acc.armchair": 0.6361999893188477, + "Acc.seat": 0.8473999786376953, + "Acc.fence": 0.6218999862670899, + "Acc.desk": 0.7445999908447266, + "Acc.rock": 0.6315000152587891, + "Acc.wardrobe": 0.7844999694824218, + "Acc.lamp": 0.7476000213623046, + "Acc.bathtub": 0.9026999664306641, + "Acc.railing": 0.48630001068115236, + "Acc.cushion": 0.7151000213623047, + "Acc.base": 0.6068999862670899, + "Acc.box": 0.34200000762939453, + "Acc.column": 0.5886000061035156, + "Acc.signboard": 0.4333000183105469, + "Acc.chest of drawers": 0.6343999862670898, + "Acc.counter": 0.44049999237060544, + "Acc.sand": 0.7455999755859375, + "Acc.sink": 0.7791000366210937, + "Acc.skyscraper": 0.6497000122070312, + "Acc.fireplace": 0.9265000152587891, + "Acc.refrigerator": 0.8637000274658203, + "Acc.grandstand": 0.7083999633789062, + "Acc.path": 0.37200000762939456, + "Acc.stairs": 0.3818999862670898, + "Acc.runway": 0.8705999755859375, + "Acc.case": 0.7152999877929688, + "Acc.pool table": 0.9713999938964843, + "Acc.pillow": 0.6979000091552734, + "Acc.screen door": 0.7345999908447266, + "Acc.stairway": 0.47189998626708984, + "Acc.river": 0.3838999938964844, + "Acc.bridge": 0.6975, + "Acc.bookcase": 0.5368999862670898, + "Acc.blind": 0.5136000061035156, + "Acc.coffee table": 0.8411000061035157, + "Acc.toilet": 0.9047000122070312, + "Acc.flower": 0.5420000076293945, + "Acc.book": 0.667300033569336, + "Acc.hill": 0.25239999771118166, + "Acc.bench": 0.5629000091552734, + "Acc.countertop": 0.729800033569336, + "Acc.stove": 0.8715000152587891, + "Acc.palm": 0.7461000061035157, + "Acc.kitchen island": 0.7430000305175781, + "Acc.computer": 0.9081999969482422, + "Acc.swivel chair": 0.7204000091552735, + "Acc.boat": 0.8277999877929687, + "Acc.bar": 0.6823999786376953, + "Acc.arcade machine": 0.9087000274658203, + "Acc.hovel": 0.5025, + "Acc.bus": 0.9619000244140625, + "Acc.towel": 0.8022000122070313, + "Acc.light": 0.5734999847412109, + "Acc.truck": 0.32, + "Acc.tower": 0.4941999816894531, + "Acc.chandelier": 0.8219000244140625, + "Acc.awning": 0.34180000305175784, + "Acc.streetlight": 0.3127000045776367, + "Acc.booth": 0.42150001525878905, + "Acc.television receiver": 0.7830999755859375, + "Acc.airplane": 0.6815000152587891, + "Acc.dirt track": 0.11020000457763672, + "Acc.apparel": 0.44799999237060545, + "Acc.pole": 0.2075, + "Acc.land": 0.06510000228881836, + "Acc.bannister": 0.18829999923706053, + "Acc.escalator": 0.7715000152587891, + "Acc.ottoman": 0.6966999816894531, + "Acc.bottle": 0.4906999969482422, + "Acc.buffet": 0.5888999938964844, + "Acc.poster": 0.20059999465942382, + "Acc.stage": 0.41819999694824217, + "Acc.van": 0.4925, + "Acc.ship": 0.22520000457763673, + "Acc.fountain": 0.30059999465942383, + "Acc.conveyer belt": 0.9420999908447265, + "Acc.canopy": 0.3028000068664551, + "Acc.washer": 0.7437000274658203, + "Acc.plaything": 0.36279998779296874, + "Acc.swimming pool": 0.8791000366210937, + "Acc.stool": 0.5556000137329101, + "Acc.barrel": 0.6479000091552735, + "Acc.basket": 0.48330001831054686, + "Acc.waterfall": 0.648499984741211, + "Acc.tent": 0.9881999969482422, + "Acc.bag": 0.21420000076293946, + "Acc.minibike": 0.8066999816894531, + "Acc.cradle": 0.9752999877929688, + "Acc.oven": 0.5663000106811523, + "Acc.ball": 0.6068999862670899, + "Acc.food": 0.5888999938964844, + "Acc.step": 0.163799991607666, + "Acc.tank": 0.6486000061035156, + "Acc.trade name": 0.21600000381469728, + "Acc.microwave": 0.739800033569336, + "Acc.pot": 0.5916999816894531, + "Acc.animal": 0.6826999664306641, + "Acc.bicycle": 0.7793000030517578, + "Acc.lake": 0.7702999877929687, + "Acc.dishwasher": 0.7461000061035157, + "Acc.screen": 0.7526000213623046, + "Acc.blanket": 0.19969999313354492, + "Acc.sculpture": 0.7808000183105469, + "Acc.hood": 0.7156999969482422, + "Acc.sconce": 0.48970001220703124, + "Acc.vase": 0.5702000045776368, + "Acc.traffic light": 0.4647999954223633, + "Acc.tray": 0.14979999542236327, + "Acc.ashcan": 0.5391999816894532, + "Acc.fan": 0.7691000366210937, + "Acc.pier": 0.4388999938964844, + "Acc.crt screen": 0.19350000381469726, + "Acc.plate": 0.7323999786376953, + "Acc.monitor": 0.26360000610351564, + "Acc.bulletin board": 0.6779000091552735, + "Acc.shower": 0.05, + "Acc.radiator": 0.7058999633789063, + "Acc.glass": 0.1843000030517578, + "Acc.clock": 0.41150001525878904, + "Acc.flag": 0.49459999084472656 + } + }, + "5": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8317, + "mIoU": 0.4928, + "mAcc": 0.6242, + "IoU.wall": 0.7730999755859375, + "IoU.building": 0.8304000091552735, + "IoU.sky": 0.9380999755859375, + "IoU.floor": 0.8141999816894532, + "IoU.tree": 0.7462999725341797, + "IoU.ceiling": 0.8308999633789063, + "IoU.road": 0.8305999755859375, + "IoU.bed ": 0.8923999786376953, + "IoU.windowpane": 0.6247999954223633, + "IoU.grass": 0.6926999664306641, + "IoU.cabinet": 0.6077999877929687, + "IoU.sidewalk": 0.6463999938964844, + "IoU.person": 0.7994999694824219, + "IoU.earth": 0.3793000030517578, + "IoU.door": 0.5011000061035156, + "IoU.table": 0.5838999938964844, + "IoU.mountain": 0.5908000183105468, + "IoU.plant": 0.4988000106811523, + "IoU.curtain": 0.7208000183105469, + "IoU.chair": 0.552599983215332, + "IoU.car": 0.8383999633789062, + "IoU.water": 0.5933000183105469, + "IoU.painting": 0.7105000305175782, + "IoU.sofa": 0.7119000244140625, + "IoU.shelf": 0.40759998321533203, + "IoU.house": 0.552400016784668, + "IoU.sea": 0.6833000183105469, + "IoU.mirror": 0.7012000274658203, + "IoU.rug": 0.6616999816894531, + "IoU.field": 0.36259998321533204, + "IoU.armchair": 0.46900001525878904, + "IoU.seat": 0.635099983215332, + "IoU.fence": 0.4384000015258789, + "IoU.desk": 0.48759998321533204, + "IoU.rock": 0.45299999237060545, + "IoU.wardrobe": 0.5722000122070312, + "IoU.lamp": 0.5527999877929688, + "IoU.bathtub": 0.8570999908447265, + "IoU.railing": 0.37810001373291013, + "IoU.cushion": 0.591500015258789, + "IoU.base": 0.33040000915527346, + "IoU.box": 0.24709999084472656, + "IoU.column": 0.4620000076293945, + "IoU.signboard": 0.34240001678466797, + "IoU.chest of drawers": 0.31489999771118166, + "IoU.counter": 0.33310001373291015, + "IoU.sand": 0.44959999084472657, + "IoU.sink": 0.6966000366210937, + "IoU.skyscraper": 0.5270000076293946, + "IoU.fireplace": 0.7319999694824219, + "IoU.refrigerator": 0.7808000183105469, + "IoU.grandstand": 0.5179000091552735, + "IoU.path": 0.24649999618530274, + "IoU.stairs": 0.2671999931335449, + "IoU.runway": 0.7005999755859375, + "IoU.case": 0.5409999847412109, + "IoU.pool table": 0.914000015258789, + "IoU.pillow": 0.5793999862670899, + "IoU.screen door": 0.6527999877929688, + "IoU.stairway": 0.3104999923706055, + "IoU.river": 0.18950000762939453, + "IoU.bridge": 0.6381999969482421, + "IoU.bookcase": 0.3454000091552734, + "IoU.blind": 0.4363000106811523, + "IoU.coffee table": 0.5668999862670898, + "IoU.toilet": 0.7751000213623047, + "IoU.flower": 0.33, + "IoU.book": 0.4506999969482422, + "IoU.hill": 0.13189999580383302, + "IoU.bench": 0.48639999389648436, + "IoU.countertop": 0.5906999969482422, + "IoU.stove": 0.7645999908447265, + "IoU.palm": 0.5291999816894531, + "IoU.kitchen island": 0.4465999984741211, + "IoU.computer": 0.750999984741211, + "IoU.swivel chair": 0.47970001220703123, + "IoU.boat": 0.6727999877929688, + "IoU.bar": 0.5115999984741211, + "IoU.arcade machine": 0.6987000274658203, + "IoU.hovel": 0.47939998626708985, + "IoU.bus": 0.8980999755859375, + "IoU.towel": 0.6420999908447266, + "IoU.light": 0.4325, + "IoU.truck": 0.3006999969482422, + "IoU.tower": 0.31379999160766603, + "IoU.chandelier": 0.6163000106811524, + "IoU.awning": 0.28049999237060547, + "IoU.streetlight": 0.19440000534057617, + "IoU.booth": 0.42220001220703124, + "IoU.television receiver": 0.6477999877929688, + "IoU.airplane": 0.6002000045776367, + "IoU.dirt track": 0.023399999141693117, + "IoU.apparel": 0.37659999847412107, + "IoU.pole": 0.13550000190734862, + "IoU.land": 0.03980000019073486, + "IoU.bannister": 0.10819999694824219, + "IoU.escalator": 0.49130001068115237, + "IoU.ottoman": 0.4515999984741211, + "IoU.bottle": 0.3436000061035156, + "IoU.buffet": 0.48869998931884767, + "IoU.poster": 0.22959999084472657, + "IoU.stage": 0.16430000305175782, + "IoU.van": 0.4234000015258789, + "IoU.ship": 0.2645999908447266, + "IoU.fountain": 0.23799999237060546, + "IoU.conveyer belt": 0.6887999725341797, + "IoU.canopy": 0.20530000686645508, + "IoU.washer": 0.7252999877929688, + "IoU.plaything": 0.22010000228881835, + "IoU.swimming pool": 0.5827999877929687, + "IoU.stool": 0.3509999847412109, + "IoU.barrel": 0.4759000015258789, + "IoU.basket": 0.3193000030517578, + "IoU.waterfall": 0.5154000091552734, + "IoU.tent": 0.7961000061035156, + "IoU.bag": 0.16430000305175782, + "IoU.minibike": 0.6758000183105469, + "IoU.cradle": 0.8129000091552734, + "IoU.oven": 0.3609000015258789, + "IoU.ball": 0.49189998626708986, + "IoU.food": 0.4881999969482422, + "IoU.step": 0.043600001335144044, + "IoU.tank": 0.5515000152587891, + "IoU.trade name": 0.2234000015258789, + "IoU.microwave": 0.721500015258789, + "IoU.pot": 0.435, + "IoU.animal": 0.619900016784668, + "IoU.bicycle": 0.5672000122070312, + "IoU.lake": 0.6836000061035157, + "IoU.dishwasher": 0.634000015258789, + "IoU.screen": 0.5018999862670899, + "IoU.blanket": 0.17290000915527343, + "IoU.sculpture": 0.6383000183105468, + "IoU.hood": 0.5191999816894531, + "IoU.sconce": 0.37209999084472656, + "IoU.vase": 0.349900016784668, + "IoU.traffic light": 0.27100000381469724, + "IoU.tray": 0.08210000038146972, + "IoU.ashcan": 0.40189998626708984, + "IoU.fan": 0.5356000137329101, + "IoU.pier": 0.18639999389648437, + "IoU.crt screen": 0.059699997901916504, + "IoU.plate": 0.4825, + "IoU.monitor": 0.20649999618530274, + "IoU.bulletin board": 0.4659000015258789, + "IoU.shower": 0.009399999976158141, + "IoU.radiator": 0.5818000030517578, + "IoU.glass": 0.1340999984741211, + "IoU.clock": 0.3079999923706055, + "IoU.flag": 0.429900016784668, + "Acc.wall": 0.8719999694824219, + "Acc.building": 0.9370999908447266, + "Acc.sky": 0.9612999725341796, + "Acc.floor": 0.8887000274658203, + "Acc.tree": 0.8959999847412109, + "Acc.ceiling": 0.8980999755859375, + "Acc.road": 0.8883999633789063, + "Acc.bed ": 0.9623000335693359, + "Acc.windowpane": 0.7676999664306641, + "Acc.grass": 0.8343000030517578, + "Acc.cabinet": 0.732699966430664, + "Acc.sidewalk": 0.8116999816894531, + "Acc.person": 0.9211000061035156, + "Acc.earth": 0.5379000091552735, + "Acc.door": 0.6581999969482422, + "Acc.table": 0.7454000091552735, + "Acc.mountain": 0.7201000213623047, + "Acc.plant": 0.5845000076293946, + "Acc.curtain": 0.8643000030517578, + "Acc.chair": 0.7016000366210937, + "Acc.car": 0.9298000335693359, + "Acc.water": 0.7351000213623047, + "Acc.painting": 0.8673999786376954, + "Acc.sofa": 0.856500015258789, + "Acc.shelf": 0.5459999847412109, + "Acc.house": 0.6644000244140625, + "Acc.sea": 0.8654000091552735, + "Acc.mirror": 0.8105000305175781, + "Acc.rug": 0.8056999969482422, + "Acc.field": 0.5156999969482422, + "Acc.armchair": 0.6520999908447266, + "Acc.seat": 0.8425, + "Acc.fence": 0.6029999923706054, + "Acc.desk": 0.76, + "Acc.rock": 0.6233000183105468, + "Acc.wardrobe": 0.7698999786376953, + "Acc.lamp": 0.7406999969482422, + "Acc.bathtub": 0.909000015258789, + "Acc.railing": 0.48580001831054687, + "Acc.cushion": 0.7144000244140625, + "Acc.base": 0.6270000076293946, + "Acc.box": 0.31739999771118166, + "Acc.column": 0.5497000122070312, + "Acc.signboard": 0.42900001525878906, + "Acc.chest of drawers": 0.6227000045776367, + "Acc.counter": 0.427400016784668, + "Acc.sand": 0.7122000122070312, + "Acc.sink": 0.7655999755859375, + "Acc.skyscraper": 0.6195000076293945, + "Acc.fireplace": 0.9305999755859375, + "Acc.refrigerator": 0.8859999847412109, + "Acc.grandstand": 0.7183000183105469, + "Acc.path": 0.3675, + "Acc.stairs": 0.3716999816894531, + "Acc.runway": 0.9163999938964844, + "Acc.case": 0.6705000305175781, + "Acc.pool table": 0.9733999633789062, + "Acc.pillow": 0.6908999633789062, + "Acc.screen door": 0.7341000366210938, + "Acc.stairway": 0.45669998168945314, + "Acc.river": 0.36020000457763673, + "Acc.bridge": 0.7630999755859375, + "Acc.bookcase": 0.5227000045776368, + "Acc.blind": 0.5188000106811523, + "Acc.coffee table": 0.8477999877929687, + "Acc.toilet": 0.9055999755859375, + "Acc.flower": 0.4975, + "Acc.book": 0.6523999786376953, + "Acc.hill": 0.24899999618530275, + "Acc.bench": 0.5863000106811523, + "Acc.countertop": 0.7147000122070313, + "Acc.stove": 0.8801999664306641, + "Acc.palm": 0.7498999786376953, + "Acc.kitchen island": 0.7501999664306641, + "Acc.computer": 0.908499984741211, + "Acc.swivel chair": 0.6934999847412109, + "Acc.boat": 0.8537999725341797, + "Acc.bar": 0.6612000274658203, + "Acc.arcade machine": 0.7919999694824219, + "Acc.hovel": 0.5229000091552735, + "Acc.bus": 0.9644999694824219, + "Acc.towel": 0.7947000122070312, + "Acc.light": 0.5531000137329102, + "Acc.truck": 0.41459999084472654, + "Acc.tower": 0.5236000061035156, + "Acc.chandelier": 0.797300033569336, + "Acc.awning": 0.3315000152587891, + "Acc.streetlight": 0.29489999771118164, + "Acc.booth": 0.45490001678466796, + "Acc.television receiver": 0.784000015258789, + "Acc.airplane": 0.6630999755859375, + "Acc.dirt track": 0.10970000267028808, + "Acc.apparel": 0.48889999389648436, + "Acc.pole": 0.17020000457763673, + "Acc.land": 0.07989999771118164, + "Acc.bannister": 0.14819999694824218, + "Acc.escalator": 0.7238999938964844, + "Acc.ottoman": 0.6691000366210937, + "Acc.bottle": 0.5554000091552734, + "Acc.buffet": 0.6451000213623047, + "Acc.poster": 0.2705999946594238, + "Acc.stage": 0.38799999237060545, + "Acc.van": 0.5147999954223633, + "Acc.ship": 0.2755999946594238, + "Acc.fountain": 0.24559999465942384, + "Acc.conveyer belt": 0.9279000091552735, + "Acc.canopy": 0.25190000534057616, + "Acc.washer": 0.7441000366210937, + "Acc.plaything": 0.34400001525878904, + "Acc.swimming pool": 0.8851000213623047, + "Acc.stool": 0.5059999847412109, + "Acc.barrel": 0.6505999755859375, + "Acc.basket": 0.4509000015258789, + "Acc.waterfall": 0.6936000061035156, + "Acc.tent": 0.9891000366210938, + "Acc.bag": 0.18860000610351563, + "Acc.minibike": 0.7862000274658203, + "Acc.cradle": 0.9736000061035156, + "Acc.oven": 0.552400016784668, + "Acc.ball": 0.5708000183105468, + "Acc.food": 0.5445000076293945, + "Acc.step": 0.06420000076293945, + "Acc.tank": 0.6513999938964844, + "Acc.trade name": 0.2402000045776367, + "Acc.microwave": 0.8066999816894531, + "Acc.pot": 0.5316999816894531, + "Acc.animal": 0.650199966430664, + "Acc.bicycle": 0.7545999908447265, + "Acc.lake": 0.7663999938964844, + "Acc.dishwasher": 0.7438999938964844, + "Acc.screen": 0.7702999877929687, + "Acc.blanket": 0.21059999465942383, + "Acc.sculpture": 0.7922000122070313, + "Acc.hood": 0.6722000122070313, + "Acc.sconce": 0.46419998168945314, + "Acc.vase": 0.5559000015258789, + "Acc.traffic light": 0.4656999969482422, + "Acc.tray": 0.15369999885559082, + "Acc.ashcan": 0.5420999908447266, + "Acc.fan": 0.7448000335693359, + "Acc.pier": 0.4508000183105469, + "Acc.crt screen": 0.16540000915527345, + "Acc.plate": 0.7037999725341797, + "Acc.monitor": 0.24729999542236328, + "Acc.bulletin board": 0.7038999938964844, + "Acc.shower": 0.05, + "Acc.radiator": 0.6970999908447265, + "Acc.glass": 0.14710000038146973, + "Acc.clock": 0.36700000762939455, + "Acc.flag": 0.504900016784668 + } + }, + "6": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8264, + "mIoU": 0.4792, + "mAcc": 0.6101, + "IoU.wall": 0.7711000061035156, + "IoU.building": 0.8220999908447265, + "IoU.sky": 0.9380000305175781, + "IoU.floor": 0.8031999969482422, + "IoU.tree": 0.7433999633789062, + "IoU.ceiling": 0.829800033569336, + "IoU.road": 0.8272000122070312, + "IoU.bed ": 0.8837999725341796, + "IoU.windowpane": 0.6172999954223632, + "IoU.grass": 0.6887999725341797, + "IoU.cabinet": 0.5947999954223633, + "IoU.sidewalk": 0.6480000305175782, + "IoU.person": 0.7918000030517578, + "IoU.earth": 0.36759998321533205, + "IoU.door": 0.49720001220703125, + "IoU.table": 0.5731000137329102, + "IoU.mountain": 0.5690000152587891, + "IoU.plant": 0.4966999816894531, + "IoU.curtain": 0.7155000305175782, + "IoU.chair": 0.5413999938964844, + "IoU.car": 0.8343000030517578, + "IoU.water": 0.5504999923706054, + "IoU.painting": 0.6983999633789062, + "IoU.sofa": 0.6761000061035156, + "IoU.shelf": 0.41720001220703123, + "IoU.house": 0.494900016784668, + "IoU.sea": 0.6363000106811524, + "IoU.mirror": 0.6583000183105469, + "IoU.rug": 0.6412999725341797, + "IoU.field": 0.3103000068664551, + "IoU.armchair": 0.4420000076293945, + "IoU.seat": 0.6165999984741211, + "IoU.fence": 0.3933000183105469, + "IoU.desk": 0.4884999847412109, + "IoU.rock": 0.4463999938964844, + "IoU.wardrobe": 0.5515000152587891, + "IoU.lamp": 0.5527999877929688, + "IoU.bathtub": 0.8538999938964844, + "IoU.railing": 0.3479000091552734, + "IoU.cushion": 0.5541999816894532, + "IoU.base": 0.3122999954223633, + "IoU.box": 0.24459999084472656, + "IoU.column": 0.46939998626708984, + "IoU.signboard": 0.3315999984741211, + "IoU.chest of drawers": 0.3447000122070312, + "IoU.counter": 0.38159999847412107, + "IoU.sand": 0.47450000762939454, + "IoU.sink": 0.6827999877929688, + "IoU.skyscraper": 0.5409000015258789, + "IoU.fireplace": 0.7565000152587891, + "IoU.refrigerator": 0.7530999755859376, + "IoU.grandstand": 0.500099983215332, + "IoU.path": 0.26, + "IoU.stairs": 0.2459000015258789, + "IoU.runway": 0.6754000091552734, + "IoU.case": 0.5311999893188477, + "IoU.pool table": 0.9109999847412109, + "IoU.pillow": 0.5356999969482422, + "IoU.screen door": 0.6641999816894532, + "IoU.stairway": 0.2772999954223633, + "IoU.river": 0.18040000915527343, + "IoU.bridge": 0.6729000091552735, + "IoU.bookcase": 0.3468000030517578, + "IoU.blind": 0.43740001678466794, + "IoU.coffee table": 0.550099983215332, + "IoU.toilet": 0.7811000061035156, + "IoU.flower": 0.31260000228881835, + "IoU.book": 0.4465000152587891, + "IoU.hill": 0.12640000343322755, + "IoU.bench": 0.42939998626708986, + "IoU.countertop": 0.5931999969482422, + "IoU.stove": 0.7083999633789062, + "IoU.palm": 0.5045000076293945, + "IoU.kitchen island": 0.4443999862670898, + "IoU.computer": 0.7287999725341797, + "IoU.swivel chair": 0.48189998626708985, + "IoU.boat": 0.6708999633789062, + "IoU.bar": 0.49990001678466794, + "IoU.arcade machine": 0.7244999694824219, + "IoU.hovel": 0.3143000030517578, + "IoU.bus": 0.8637999725341797, + "IoU.towel": 0.619900016784668, + "IoU.light": 0.40900001525878904, + "IoU.truck": 0.22389999389648438, + "IoU.tower": 0.2706999969482422, + "IoU.chandelier": 0.6191999816894531, + "IoU.awning": 0.26149999618530273, + "IoU.streetlight": 0.18440000534057618, + "IoU.booth": 0.395099983215332, + "IoU.television receiver": 0.6693000030517579, + "IoU.airplane": 0.5713000106811523, + "IoU.dirt track": 0.0005999999865889549, + "IoU.apparel": 0.33529998779296877, + "IoU.pole": 0.11939999580383301, + "IoU.land": 0.03980000019073486, + "IoU.bannister": 0.10090000152587891, + "IoU.escalator": 0.49709999084472656, + "IoU.ottoman": 0.4490000152587891, + "IoU.bottle": 0.34369998931884765, + "IoU.buffet": 0.527400016784668, + "IoU.poster": 0.19930000305175782, + "IoU.stage": 0.19299999237060547, + "IoU.van": 0.43529998779296875, + "IoU.ship": 0.20469999313354492, + "IoU.fountain": 0.21139999389648437, + "IoU.conveyer belt": 0.6973999786376953, + "IoU.canopy": 0.21559999465942384, + "IoU.washer": 0.7370999908447265, + "IoU.plaything": 0.23260000228881836, + "IoU.swimming pool": 0.6086000061035156, + "IoU.stool": 0.37599998474121094, + "IoU.barrel": 0.36, + "IoU.basket": 0.313700008392334, + "IoU.waterfall": 0.6309999847412109, + "IoU.tent": 0.7976999664306641, + "IoU.bag": 0.13739999771118164, + "IoU.minibike": 0.6852999877929687, + "IoU.cradle": 0.7955000305175781, + "IoU.oven": 0.3213000106811523, + "IoU.ball": 0.4840999984741211, + "IoU.food": 0.5209000015258789, + "IoU.step": 0.084399995803833, + "IoU.tank": 0.5420999908447266, + "IoU.trade name": 0.20700000762939452, + "IoU.microwave": 0.7355999755859375, + "IoU.pot": 0.42150001525878905, + "IoU.animal": 0.6077000045776367, + "IoU.bicycle": 0.5770999908447265, + "IoU.lake": 0.14239999771118164, + "IoU.dishwasher": 0.5518000030517578, + "IoU.screen": 0.561500015258789, + "IoU.blanket": 0.11739999771118165, + "IoU.sculpture": 0.5747999954223633, + "IoU.hood": 0.5318999862670899, + "IoU.sconce": 0.3397999954223633, + "IoU.vase": 0.335, + "IoU.traffic light": 0.26129999160766604, + "IoU.tray": 0.07760000228881836, + "IoU.ashcan": 0.4061000061035156, + "IoU.fan": 0.5177000045776368, + "IoU.pier": 0.15760000228881835, + "IoU.crt screen": 0.06730000019073486, + "IoU.plate": 0.4915999984741211, + "IoU.monitor": 0.23420000076293945, + "IoU.bulletin board": 0.45689998626708983, + "IoU.shower": 0.012999999523162841, + "IoU.radiator": 0.5275999832153321, + "IoU.glass": 0.12020000457763672, + "IoU.clock": 0.2953000068664551, + "IoU.flag": 0.5261999893188477, + "Acc.wall": 0.8762999725341797, + "Acc.building": 0.9287999725341797, + "Acc.sky": 0.9616000366210937, + "Acc.floor": 0.8859999847412109, + "Acc.tree": 0.8947000122070312, + "Acc.ceiling": 0.8943000030517578, + "Acc.road": 0.8881999969482421, + "Acc.bed ": 0.961500015258789, + "Acc.windowpane": 0.7627999877929688, + "Acc.grass": 0.8413999938964843, + "Acc.cabinet": 0.7231999969482422, + "Acc.sidewalk": 0.8080999755859375, + "Acc.person": 0.9180999755859375, + "Acc.earth": 0.519000015258789, + "Acc.door": 0.635, + "Acc.table": 0.7362999725341797, + "Acc.mountain": 0.705, + "Acc.plant": 0.5808000183105468, + "Acc.curtain": 0.8579000091552734, + "Acc.chair": 0.6883999633789063, + "Acc.car": 0.9266000366210938, + "Acc.water": 0.6861000061035156, + "Acc.painting": 0.8526000213623047, + "Acc.sofa": 0.8330000305175781, + "Acc.shelf": 0.5708000183105468, + "Acc.house": 0.600999984741211, + "Acc.sea": 0.8166999816894531, + "Acc.mirror": 0.7870999908447266, + "Acc.rug": 0.7808000183105469, + "Acc.field": 0.465, + "Acc.armchair": 0.6245999908447266, + "Acc.seat": 0.8483000183105469, + "Acc.fence": 0.5406000137329101, + "Acc.desk": 0.7662000274658203, + "Acc.rock": 0.6059000015258789, + "Acc.wardrobe": 0.763499984741211, + "Acc.lamp": 0.7488999938964844, + "Acc.bathtub": 0.9108999633789062, + "Acc.railing": 0.4609000015258789, + "Acc.cushion": 0.6919999694824219, + "Acc.base": 0.6004999923706055, + "Acc.box": 0.3170000076293945, + "Acc.column": 0.5718999862670898, + "Acc.signboard": 0.4161000061035156, + "Acc.chest of drawers": 0.6372000122070313, + "Acc.counter": 0.48709999084472655, + "Acc.sand": 0.7155999755859375, + "Acc.sink": 0.764000015258789, + "Acc.skyscraper": 0.6458000183105469, + "Acc.fireplace": 0.905, + "Acc.refrigerator": 0.8537999725341797, + "Acc.grandstand": 0.720999984741211, + "Acc.path": 0.385099983215332, + "Acc.stairs": 0.35900001525878905, + "Acc.runway": 0.8959999847412109, + "Acc.case": 0.6705999755859375, + "Acc.pool table": 0.974800033569336, + "Acc.pillow": 0.6379000091552735, + "Acc.screen door": 0.7711000061035156, + "Acc.stairway": 0.41759998321533204, + "Acc.river": 0.43740001678466794, + "Acc.bridge": 0.8188999938964844, + "Acc.bookcase": 0.5558000183105469, + "Acc.blind": 0.5306999969482422, + "Acc.coffee table": 0.8566999816894532, + "Acc.toilet": 0.8968000030517578, + "Acc.flower": 0.48619998931884767, + "Acc.book": 0.6295000076293945, + "Acc.hill": 0.21260000228881837, + "Acc.bench": 0.5347000122070312, + "Acc.countertop": 0.7423000335693359, + "Acc.stove": 0.8373000335693359, + "Acc.palm": 0.7172000122070312, + "Acc.kitchen island": 0.8191999816894531, + "Acc.computer": 0.8854000091552734, + "Acc.swivel chair": 0.6963999938964843, + "Acc.boat": 0.8408999633789063, + "Acc.bar": 0.602599983215332, + "Acc.arcade machine": 0.8159999847412109, + "Acc.hovel": 0.34180000305175784, + "Acc.bus": 0.9458999633789062, + "Acc.towel": 0.7733000183105468, + "Acc.light": 0.5152000045776367, + "Acc.truck": 0.2993000030517578, + "Acc.tower": 0.48130001068115236, + "Acc.chandelier": 0.7993000030517579, + "Acc.awning": 0.32869998931884764, + "Acc.streetlight": 0.27170000076293943, + "Acc.booth": 0.43349998474121093, + "Acc.television receiver": 0.769000015258789, + "Acc.airplane": 0.6565000152587891, + "Acc.dirt track": 0.0014000000059604645, + "Acc.apparel": 0.427400016784668, + "Acc.pole": 0.15199999809265136, + "Acc.land": 0.06380000114440917, + "Acc.bannister": 0.14859999656677247, + "Acc.escalator": 0.6836000061035157, + "Acc.ottoman": 0.6548000335693359, + "Acc.bottle": 0.5654999923706054, + "Acc.buffet": 0.7104000091552735, + "Acc.poster": 0.2563999938964844, + "Acc.stage": 0.4266999816894531, + "Acc.van": 0.5220999908447266, + "Acc.ship": 0.21389999389648437, + "Acc.fountain": 0.22239999771118163, + "Acc.conveyer belt": 0.9458999633789062, + "Acc.canopy": 0.2972999954223633, + "Acc.washer": 0.7633999633789063, + "Acc.plaything": 0.3452000045776367, + "Acc.swimming pool": 0.8651000213623047, + "Acc.stool": 0.49810001373291013, + "Acc.barrel": 0.6512000274658203, + "Acc.basket": 0.4086000061035156, + "Acc.waterfall": 0.8658999633789063, + "Acc.tent": 0.99, + "Acc.bag": 0.149399995803833, + "Acc.minibike": 0.7905999755859375, + "Acc.cradle": 0.9733000183105469, + "Acc.oven": 0.46, + "Acc.ball": 0.5263999938964844, + "Acc.food": 0.5838000106811524, + "Acc.step": 0.11060000419616699, + "Acc.tank": 0.6452999877929687, + "Acc.trade name": 0.2225, + "Acc.microwave": 0.8325, + "Acc.pot": 0.5063999938964844, + "Acc.animal": 0.6402999877929687, + "Acc.bicycle": 0.7430000305175781, + "Acc.lake": 0.16399999618530273, + "Acc.dishwasher": 0.712699966430664, + "Acc.screen": 0.8037000274658204, + "Acc.blanket": 0.13350000381469726, + "Acc.sculpture": 0.685199966430664, + "Acc.hood": 0.6934999847412109, + "Acc.sconce": 0.4265999984741211, + "Acc.vase": 0.5243000030517578, + "Acc.traffic light": 0.4677000045776367, + "Acc.tray": 0.140600004196167, + "Acc.ashcan": 0.5361999893188476, + "Acc.fan": 0.7076000213623047, + "Acc.pier": 0.4259999847412109, + "Acc.crt screen": 0.17889999389648437, + "Acc.plate": 0.6969999694824218, + "Acc.monitor": 0.2731999969482422, + "Acc.bulletin board": 0.6894000244140625, + "Acc.shower": 0.05, + "Acc.radiator": 0.6379999923706055, + "Acc.glass": 0.13199999809265137, + "Acc.clock": 0.33950000762939453, + "Acc.flag": 0.5970999908447265 + } + }, + "7": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8198000000000001, + "mIoU": 0.4654, + "mAcc": 0.5963, + "IoU.wall": 0.759000015258789, + "IoU.building": 0.8193000030517578, + "IoU.sky": 0.9370999908447266, + "IoU.floor": 0.8036000061035157, + "IoU.tree": 0.7391000366210938, + "IoU.ceiling": 0.8236000061035156, + "IoU.road": 0.8269000244140625, + "IoU.bed ": 0.8706999969482422, + "IoU.windowpane": 0.6074000167846679, + "IoU.grass": 0.6933999633789063, + "IoU.cabinet": 0.5654000091552734, + "IoU.sidewalk": 0.642300033569336, + "IoU.person": 0.7822000122070313, + "IoU.earth": 0.3618000030517578, + "IoU.door": 0.4361999893188477, + "IoU.table": 0.5518000030517578, + "IoU.mountain": 0.582400016784668, + "IoU.plant": 0.49939998626708987, + "IoU.curtain": 0.7066000366210937, + "IoU.chair": 0.519000015258789, + "IoU.car": 0.8176999664306641, + "IoU.water": 0.5547000122070312, + "IoU.painting": 0.6887000274658203, + "IoU.sofa": 0.6473999786376953, + "IoU.shelf": 0.4204000091552734, + "IoU.house": 0.5247000122070312, + "IoU.sea": 0.6555999755859375, + "IoU.mirror": 0.6343999862670898, + "IoU.rug": 0.675, + "IoU.field": 0.28850000381469726, + "IoU.armchair": 0.4218000030517578, + "IoU.seat": 0.6084000015258789, + "IoU.fence": 0.38540000915527345, + "IoU.desk": 0.48439998626708985, + "IoU.rock": 0.4495000076293945, + "IoU.wardrobe": 0.5133000183105468, + "IoU.lamp": 0.5370999908447266, + "IoU.bathtub": 0.8068000030517578, + "IoU.railing": 0.34040000915527346, + "IoU.cushion": 0.5458000183105469, + "IoU.base": 0.29950000762939455, + "IoU.box": 0.23100000381469726, + "IoU.column": 0.455, + "IoU.signboard": 0.3243000030517578, + "IoU.chest of drawers": 0.32110000610351563, + "IoU.counter": 0.332599983215332, + "IoU.sand": 0.39180000305175783, + "IoU.sink": 0.6570999908447266, + "IoU.skyscraper": 0.5552999877929687, + "IoU.fireplace": 0.7202999877929688, + "IoU.refrigerator": 0.6870999908447266, + "IoU.grandstand": 0.4929999923706055, + "IoU.path": 0.2175, + "IoU.stairs": 0.22690000534057617, + "IoU.runway": 0.725999984741211, + "IoU.case": 0.5097000122070312, + "IoU.pool table": 0.9130999755859375, + "IoU.pillow": 0.5365999984741211, + "IoU.screen door": 0.5788000106811524, + "IoU.stairway": 0.28200000762939453, + "IoU.river": 0.17540000915527343, + "IoU.bridge": 0.6423999786376953, + "IoU.bookcase": 0.3093000030517578, + "IoU.blind": 0.3972999954223633, + "IoU.coffee table": 0.5338999938964843, + "IoU.toilet": 0.8187000274658203, + "IoU.flower": 0.37849998474121094, + "IoU.book": 0.4306999969482422, + "IoU.hill": 0.0709000015258789, + "IoU.bench": 0.39369998931884764, + "IoU.countertop": 0.5302000045776367, + "IoU.stove": 0.6901999664306641, + "IoU.palm": 0.47310001373291016, + "IoU.kitchen island": 0.34700000762939454, + "IoU.computer": 0.6313999938964844, + "IoU.swivel chair": 0.4856999969482422, + "IoU.boat": 0.6430000305175781, + "IoU.bar": 0.487400016784668, + "IoU.arcade machine": 0.40369998931884765, + "IoU.hovel": 0.3104999923706055, + "IoU.bus": 0.8441000366210938, + "IoU.towel": 0.5788000106811524, + "IoU.light": 0.4331000137329102, + "IoU.truck": 0.2115999984741211, + "IoU.tower": 0.29139999389648436, + "IoU.chandelier": 0.5922000122070312, + "IoU.awning": 0.2865999984741211, + "IoU.streetlight": 0.17979999542236327, + "IoU.booth": 0.42770000457763674, + "IoU.television receiver": 0.6444999694824218, + "IoU.airplane": 0.5590000152587891, + "IoU.dirt track": 0.14420000076293946, + "IoU.apparel": 0.3159000015258789, + "IoU.pole": 0.12020000457763672, + "IoU.land": 0.08229999542236328, + "IoU.bannister": 0.10609999656677246, + "IoU.escalator": 0.34650001525878904, + "IoU.ottoman": 0.4013999938964844, + "IoU.bottle": 0.16100000381469726, + "IoU.buffet": 0.5041999816894531, + "IoU.poster": 0.2510000038146973, + "IoU.stage": 0.16590000152587892, + "IoU.van": 0.3958000183105469, + "IoU.ship": 0.6819999694824219, + "IoU.fountain": 0.20459999084472658, + "IoU.conveyer belt": 0.5808000183105468, + "IoU.canopy": 0.22430000305175782, + "IoU.washer": 0.7451000213623047, + "IoU.plaything": 0.23469999313354492, + "IoU.swimming pool": 0.7, + "IoU.stool": 0.3333000183105469, + "IoU.barrel": 0.392599983215332, + "IoU.basket": 0.24299999237060546, + "IoU.waterfall": 0.6843000030517579, + "IoU.tent": 0.8537000274658203, + "IoU.bag": 0.11670000076293946, + "IoU.minibike": 0.6719000244140625, + "IoU.cradle": 0.8094000244140624, + "IoU.oven": 0.26850000381469724, + "IoU.ball": 0.3956999969482422, + "IoU.food": 0.505099983215332, + "IoU.step": 0.10270000457763671, + "IoU.tank": 0.47650001525878904, + "IoU.trade name": 0.22540000915527345, + "IoU.microwave": 0.67, + "IoU.pot": 0.3306999969482422, + "IoU.animal": 0.6104000091552735, + "IoU.bicycle": 0.5763999938964843, + "IoU.lake": 0.14229999542236327, + "IoU.dishwasher": 0.5036000061035156, + "IoU.screen": 0.5320999908447266, + "IoU.blanket": 0.1331999969482422, + "IoU.sculpture": 0.5102000045776367, + "IoU.hood": 0.49779998779296875, + "IoU.sconce": 0.3163999938964844, + "IoU.vase": 0.3114999961853027, + "IoU.traffic light": 0.26549999237060545, + "IoU.tray": 0.06420000076293945, + "IoU.ashcan": 0.38540000915527345, + "IoU.fan": 0.4993000030517578, + "IoU.pier": 0.19510000228881835, + "IoU.crt screen": 0.028900001049041748, + "IoU.plate": 0.4734000015258789, + "IoU.monitor": 0.17459999084472655, + "IoU.bulletin board": 0.44790000915527345, + "IoU.shower": 0.009100000262260438, + "IoU.radiator": 0.5354999923706054, + "IoU.glass": 0.08319999694824219, + "IoU.clock": 0.27780000686645506, + "IoU.flag": 0.595999984741211, + "Acc.wall": 0.8675, + "Acc.building": 0.9252999877929687, + "Acc.sky": 0.9595999908447266, + "Acc.floor": 0.884000015258789, + "Acc.tree": 0.8919000244140625, + "Acc.ceiling": 0.8909999847412109, + "Acc.road": 0.8823999786376953, + "Acc.bed ": 0.9616000366210937, + "Acc.windowpane": 0.7708999633789062, + "Acc.grass": 0.829800033569336, + "Acc.cabinet": 0.695, + "Acc.sidewalk": 0.8180999755859375, + "Acc.person": 0.9138999938964844, + "Acc.earth": 0.5206000137329102, + "Acc.door": 0.5765999984741211, + "Acc.table": 0.7218000030517578, + "Acc.mountain": 0.7340000152587891, + "Acc.plant": 0.5918999862670898, + "Acc.curtain": 0.8437999725341797, + "Acc.chair": 0.6670999908447266, + "Acc.car": 0.9226999664306641, + "Acc.water": 0.699800033569336, + "Acc.painting": 0.8502999877929688, + "Acc.sofa": 0.8280999755859375, + "Acc.shelf": 0.5852000045776368, + "Acc.house": 0.6576000213623047, + "Acc.sea": 0.8080000305175781, + "Acc.mirror": 0.7537000274658203, + "Acc.rug": 0.8031999969482422, + "Acc.field": 0.40900001525878904, + "Acc.armchair": 0.5897000122070313, + "Acc.seat": 0.8187000274658203, + "Acc.fence": 0.5504000091552734, + "Acc.desk": 0.785199966430664, + "Acc.rock": 0.600999984741211, + "Acc.wardrobe": 0.6993000030517578, + "Acc.lamp": 0.7320999908447265, + "Acc.bathtub": 0.855, + "Acc.railing": 0.4520000076293945, + "Acc.cushion": 0.6841000366210938, + "Acc.base": 0.5602999877929687, + "Acc.box": 0.311299991607666, + "Acc.column": 0.5706999969482421, + "Acc.signboard": 0.4031999969482422, + "Acc.chest of drawers": 0.6523999786376953, + "Acc.counter": 0.4306999969482422, + "Acc.sand": 0.619900016784668, + "Acc.sink": 0.7445999908447266, + "Acc.skyscraper": 0.6513999938964844, + "Acc.fireplace": 0.8863999938964844, + "Acc.refrigerator": 0.8106999969482422, + "Acc.grandstand": 0.7216999816894532, + "Acc.path": 0.3177000045776367, + "Acc.stairs": 0.36020000457763673, + "Acc.runway": 0.9673999786376953, + "Acc.case": 0.6773999786376953, + "Acc.pool table": 0.9725, + "Acc.pillow": 0.6179999923706054, + "Acc.screen door": 0.6466000366210938, + "Acc.stairway": 0.41959999084472654, + "Acc.river": 0.42689998626708986, + "Acc.bridge": 0.8573000335693359, + "Acc.bookcase": 0.49770000457763675, + "Acc.blind": 0.4545999908447266, + "Acc.coffee table": 0.8579000091552734, + "Acc.toilet": 0.8975, + "Acc.flower": 0.5406999969482422, + "Acc.book": 0.61, + "Acc.hill": 0.13420000076293945, + "Acc.bench": 0.5043999862670898, + "Acc.countertop": 0.6554000091552734, + "Acc.stove": 0.813499984741211, + "Acc.palm": 0.7125, + "Acc.kitchen island": 0.8076000213623047, + "Acc.computer": 0.7643000030517578, + "Acc.swivel chair": 0.6737000274658204, + "Acc.boat": 0.8505000305175782, + "Acc.bar": 0.6662000274658203, + "Acc.arcade machine": 0.4659000015258789, + "Acc.hovel": 0.31510000228881835, + "Acc.bus": 0.9344999694824219, + "Acc.towel": 0.7337000274658203, + "Acc.light": 0.5306999969482422, + "Acc.truck": 0.3004999923706055, + "Acc.tower": 0.49270000457763674, + "Acc.chandelier": 0.775, + "Acc.awning": 0.36220001220703124, + "Acc.streetlight": 0.25540000915527344, + "Acc.booth": 0.47869998931884766, + "Acc.television receiver": 0.7679000091552735, + "Acc.airplane": 0.6544999694824218, + "Acc.dirt track": 0.21360000610351562, + "Acc.apparel": 0.41450000762939454, + "Acc.pole": 0.15069999694824218, + "Acc.land": 0.13430000305175782, + "Acc.bannister": 0.17389999389648436, + "Acc.escalator": 0.44310001373291014, + "Acc.ottoman": 0.6106999969482422, + "Acc.bottle": 0.18729999542236328, + "Acc.buffet": 0.7094000244140625, + "Acc.poster": 0.31239999771118165, + "Acc.stage": 0.4513999938964844, + "Acc.van": 0.4672000122070312, + "Acc.ship": 0.7004000091552735, + "Acc.fountain": 0.21729999542236328, + "Acc.conveyer belt": 0.9262000274658203, + "Acc.canopy": 0.3145000076293945, + "Acc.washer": 0.7616000366210938, + "Acc.plaything": 0.34830001831054686, + "Acc.swimming pool": 0.8541000366210938, + "Acc.stool": 0.4961999893188477, + "Acc.barrel": 0.7368000030517579, + "Acc.basket": 0.3422999954223633, + "Acc.waterfall": 0.8779000091552734, + "Acc.tent": 0.9941999816894531, + "Acc.bag": 0.12789999961853027, + "Acc.minibike": 0.773499984741211, + "Acc.cradle": 0.9783000183105469, + "Acc.oven": 0.49200000762939455, + "Acc.ball": 0.4393000030517578, + "Acc.food": 0.577400016784668, + "Acc.step": 0.1361999988555908, + "Acc.tank": 0.567400016784668, + "Acc.trade name": 0.24469999313354493, + "Acc.microwave": 0.7580000305175781, + "Acc.pot": 0.38729999542236326, + "Acc.animal": 0.6448000335693359, + "Acc.bicycle": 0.7537999725341797, + "Acc.lake": 0.17700000762939452, + "Acc.dishwasher": 0.6145999908447266, + "Acc.screen": 0.7558000183105469, + "Acc.blanket": 0.15689999580383301, + "Acc.sculpture": 0.6181000137329101, + "Acc.hood": 0.6356999969482422, + "Acc.sconce": 0.39430000305175783, + "Acc.vase": 0.5011999893188477, + "Acc.traffic light": 0.47619998931884766, + "Acc.tray": 0.11170000076293946, + "Acc.ashcan": 0.5445999908447265, + "Acc.fan": 0.7040000152587891, + "Acc.pier": 0.4311999893188477, + "Acc.crt screen": 0.10390000343322754, + "Acc.plate": 0.6476999664306641, + "Acc.monitor": 0.20629999160766602, + "Acc.bulletin board": 0.6891000366210938, + "Acc.shower": 0.053400001525878905, + "Acc.radiator": 0.6566000366210938, + "Acc.glass": 0.08989999771118164, + "Acc.clock": 0.330099983215332, + "Acc.flag": 0.6551000213623047 + } + }, + "8": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8144, + "mIoU": 0.4545, + "mAcc": 0.5852, + "IoU.wall": 0.7551000213623047, + "IoU.building": 0.8169999694824219, + "IoU.sky": 0.9372000122070312, + "IoU.floor": 0.8027999877929688, + "IoU.tree": 0.7266999816894532, + "IoU.ceiling": 0.8233999633789062, + "IoU.road": 0.8105000305175781, + "IoU.bed ": 0.8644999694824219, + "IoU.windowpane": 0.6088000106811523, + "IoU.grass": 0.6626999664306641, + "IoU.cabinet": 0.5670999908447265, + "IoU.sidewalk": 0.6288000106811523, + "IoU.person": 0.7712000274658203, + "IoU.earth": 0.34099998474121096, + "IoU.door": 0.4358000183105469, + "IoU.table": 0.5504000091552734, + "IoU.mountain": 0.5752999877929688, + "IoU.plant": 0.49810001373291013, + "IoU.curtain": 0.7240000152587891, + "IoU.chair": 0.5015999984741211, + "IoU.car": 0.8129000091552734, + "IoU.water": 0.5056000137329102, + "IoU.painting": 0.6709999847412109, + "IoU.sofa": 0.6154999923706055, + "IoU.shelf": 0.4361999893188477, + "IoU.house": 0.5140999984741211, + "IoU.sea": 0.5481000137329102, + "IoU.mirror": 0.6393000030517578, + "IoU.rug": 0.6652999877929687, + "IoU.field": 0.26510000228881836, + "IoU.armchair": 0.34970001220703123, + "IoU.seat": 0.5947000122070313, + "IoU.fence": 0.34439998626708984, + "IoU.desk": 0.4356999969482422, + "IoU.rock": 0.46180000305175783, + "IoU.wardrobe": 0.5409000015258789, + "IoU.lamp": 0.5156000137329102, + "IoU.bathtub": 0.7944999694824219, + "IoU.railing": 0.3184000015258789, + "IoU.cushion": 0.5013999938964844, + "IoU.base": 0.306299991607666, + "IoU.box": 0.22049999237060547, + "IoU.column": 0.44970001220703126, + "IoU.signboard": 0.32099998474121094, + "IoU.chest of drawers": 0.31040000915527344, + "IoU.counter": 0.2868000030517578, + "IoU.sand": 0.4020999908447266, + "IoU.sink": 0.6441999816894531, + "IoU.skyscraper": 0.6122999954223632, + "IoU.fireplace": 0.6851000213623046, + "IoU.refrigerator": 0.6905000305175781, + "IoU.grandstand": 0.38869998931884764, + "IoU.path": 0.24110000610351562, + "IoU.stairs": 0.18600000381469728, + "IoU.runway": 0.7195999908447266, + "IoU.case": 0.4791999816894531, + "IoU.pool table": 0.9091000366210937, + "IoU.pillow": 0.4986000061035156, + "IoU.screen door": 0.5936000061035156, + "IoU.stairway": 0.23350000381469727, + "IoU.river": 0.154399995803833, + "IoU.bridge": 0.6168000030517579, + "IoU.bookcase": 0.3206999969482422, + "IoU.blind": 0.367400016784668, + "IoU.coffee table": 0.509000015258789, + "IoU.toilet": 0.8105000305175781, + "IoU.flower": 0.365, + "IoU.book": 0.4161000061035156, + "IoU.hill": 0.06570000171661378, + "IoU.bench": 0.3934000015258789, + "IoU.countertop": 0.4822999954223633, + "IoU.stove": 0.6726000213623047, + "IoU.palm": 0.44970001220703126, + "IoU.kitchen island": 0.34130001068115234, + "IoU.computer": 0.6556999969482422, + "IoU.swivel chair": 0.43560001373291013, + "IoU.boat": 0.5870000076293945, + "IoU.bar": 0.4931000137329102, + "IoU.arcade machine": 0.3970000076293945, + "IoU.hovel": 0.4704000091552734, + "IoU.bus": 0.8041000366210938, + "IoU.towel": 0.5481999969482422, + "IoU.light": 0.4154000091552734, + "IoU.truck": 0.2640999984741211, + "IoU.tower": 0.3090999984741211, + "IoU.chandelier": 0.5829000091552734, + "IoU.awning": 0.28459999084472654, + "IoU.streetlight": 0.17610000610351562, + "IoU.booth": 0.42790000915527343, + "IoU.television receiver": 0.6074000167846679, + "IoU.airplane": 0.5599000167846679, + "IoU.dirt track": 0.13079999923706054, + "IoU.apparel": 0.3081999969482422, + "IoU.pole": 0.13989999771118164, + "IoU.land": 0.07960000038146972, + "IoU.bannister": 0.07010000228881835, + "IoU.escalator": 0.2893000030517578, + "IoU.ottoman": 0.3818000030517578, + "IoU.bottle": 0.225, + "IoU.buffet": 0.5875, + "IoU.poster": 0.2959000015258789, + "IoU.stage": 0.1361999988555908, + "IoU.van": 0.3938999938964844, + "IoU.ship": 0.890199966430664, + "IoU.fountain": 0.10930000305175781, + "IoU.conveyer belt": 0.6762000274658203, + "IoU.canopy": 0.2865999984741211, + "IoU.washer": 0.6934999847412109, + "IoU.plaything": 0.20659999847412108, + "IoU.swimming pool": 0.5938999938964844, + "IoU.stool": 0.276200008392334, + "IoU.barrel": 0.4956999969482422, + "IoU.basket": 0.1809000015258789, + "IoU.waterfall": 0.6272000122070313, + "IoU.tent": 0.875999984741211, + "IoU.bag": 0.13039999961853027, + "IoU.minibike": 0.5836000061035156, + "IoU.cradle": 0.7879000091552735, + "IoU.oven": 0.16100000381469726, + "IoU.ball": 0.5013999938964844, + "IoU.food": 0.524900016784668, + "IoU.step": 0.10529999732971192, + "IoU.tank": 0.4947999954223633, + "IoU.trade name": 0.21239999771118165, + "IoU.microwave": 0.36709999084472655, + "IoU.pot": 0.3616999816894531, + "IoU.animal": 0.5616999816894531, + "IoU.bicycle": 0.5154000091552734, + "IoU.lake": 0.5820999908447265, + "IoU.dishwasher": 0.4613999938964844, + "IoU.screen": 0.5813000106811523, + "IoU.blanket": 0.09720000267028808, + "IoU.sculpture": 0.5458000183105469, + "IoU.hood": 0.4995000076293945, + "IoU.sconce": 0.24920000076293947, + "IoU.vase": 0.29209999084472654, + "IoU.traffic light": 0.24459999084472656, + "IoU.tray": 0.03640000104904175, + "IoU.ashcan": 0.35389999389648436, + "IoU.fan": 0.47209999084472654, + "IoU.pier": 0.21030000686645509, + "IoU.crt screen": 0.03630000114440918, + "IoU.plate": 0.47200000762939454, + "IoU.monitor": 0.11319999694824219, + "IoU.bulletin board": 0.3665999984741211, + "IoU.shower": 0.010299999713897705, + "IoU.radiator": 0.5611999893188476, + "IoU.glass": 0.06440000057220459, + "IoU.clock": 0.26649999618530273, + "IoU.flag": 0.3106999969482422, + "Acc.wall": 0.8672000122070312, + "Acc.building": 0.9191000366210937, + "Acc.sky": 0.9605000305175782, + "Acc.floor": 0.8858999633789062, + "Acc.tree": 0.8845999908447265, + "Acc.ceiling": 0.8856999969482422, + "Acc.road": 0.8701999664306641, + "Acc.bed ": 0.9605000305175782, + "Acc.windowpane": 0.7637000274658203, + "Acc.grass": 0.8219000244140625, + "Acc.cabinet": 0.7101000213623047, + "Acc.sidewalk": 0.8151999664306641, + "Acc.person": 0.9077999877929688, + "Acc.earth": 0.47639999389648435, + "Acc.door": 0.5915999984741211, + "Acc.table": 0.7081999969482422, + "Acc.mountain": 0.7256999969482422, + "Acc.plant": 0.5959000015258789, + "Acc.curtain": 0.8597000122070313, + "Acc.chair": 0.6505999755859375, + "Acc.car": 0.9175, + "Acc.water": 0.6183000183105469, + "Acc.painting": 0.8313999938964843, + "Acc.sofa": 0.8241999816894531, + "Acc.shelf": 0.6084000015258789, + "Acc.house": 0.6506999969482422, + "Acc.sea": 0.7661000061035156, + "Acc.mirror": 0.7633000183105468, + "Acc.rug": 0.7706999969482422, + "Acc.field": 0.4179999923706055, + "Acc.armchair": 0.4734999847412109, + "Acc.seat": 0.8223999786376953, + "Acc.fence": 0.4633000183105469, + "Acc.desk": 0.7386000061035156, + "Acc.rock": 0.6231000137329101, + "Acc.wardrobe": 0.772300033569336, + "Acc.lamp": 0.7091999816894531, + "Acc.bathtub": 0.8679000091552734, + "Acc.railing": 0.43759998321533206, + "Acc.cushion": 0.6265000152587891, + "Acc.base": 0.5827000045776367, + "Acc.box": 0.276299991607666, + "Acc.column": 0.5522999954223633, + "Acc.signboard": 0.4020999908447266, + "Acc.chest of drawers": 0.6211000061035157, + "Acc.counter": 0.4052999877929688, + "Acc.sand": 0.6043999862670898, + "Acc.sink": 0.7311000061035157, + "Acc.skyscraper": 0.7876999664306641, + "Acc.fireplace": 0.8741000366210937, + "Acc.refrigerator": 0.8231999969482422, + "Acc.grandstand": 0.7052999877929688, + "Acc.path": 0.37340000152587893, + "Acc.stairs": 0.2801000022888184, + "Acc.runway": 0.9387000274658203, + "Acc.case": 0.6508000183105469, + "Acc.pool table": 0.9723000335693359, + "Acc.pillow": 0.590900001525879, + "Acc.screen door": 0.7138999938964844, + "Acc.stairway": 0.3472999954223633, + "Acc.river": 0.3970000076293945, + "Acc.bridge": 0.7704000091552734, + "Acc.bookcase": 0.49509998321533205, + "Acc.blind": 0.4122999954223633, + "Acc.coffee table": 0.8443000030517578, + "Acc.toilet": 0.8945999908447265, + "Acc.flower": 0.513400001525879, + "Acc.book": 0.5934000015258789, + "Acc.hill": 0.11800000190734863, + "Acc.bench": 0.5188999938964843, + "Acc.countertop": 0.6562999725341797, + "Acc.stove": 0.8151000213623046, + "Acc.palm": 0.6969000244140625, + "Acc.kitchen island": 0.7504000091552734, + "Acc.computer": 0.8222000122070312, + "Acc.swivel chair": 0.592599983215332, + "Acc.boat": 0.8380999755859375, + "Acc.bar": 0.6829000091552735, + "Acc.arcade machine": 0.4577000045776367, + "Acc.hovel": 0.519000015258789, + "Acc.bus": 0.9345999908447266, + "Acc.towel": 0.736500015258789, + "Acc.light": 0.49020000457763674, + "Acc.truck": 0.3609999847412109, + "Acc.tower": 0.40299999237060546, + "Acc.chandelier": 0.7427999877929687, + "Acc.awning": 0.3397000122070313, + "Acc.streetlight": 0.23719999313354492, + "Acc.booth": 0.5258000183105469, + "Acc.television receiver": 0.7412000274658204, + "Acc.airplane": 0.6705000305175781, + "Acc.dirt track": 0.1568000030517578, + "Acc.apparel": 0.4265999984741211, + "Acc.pole": 0.1647999954223633, + "Acc.land": 0.1425, + "Acc.bannister": 0.11890000343322754, + "Acc.escalator": 0.3363999938964844, + "Acc.ottoman": 0.5963999938964843, + "Acc.bottle": 0.2843000030517578, + "Acc.buffet": 0.7662999725341797, + "Acc.poster": 0.37340000152587893, + "Acc.stage": 0.4065999984741211, + "Acc.van": 0.4797999954223633, + "Acc.ship": 0.9330000305175781, + "Acc.fountain": 0.11340000152587891, + "Acc.conveyer belt": 0.9241999816894532, + "Acc.canopy": 0.3616999816894531, + "Acc.washer": 0.7208000183105469, + "Acc.plaything": 0.31549999237060544, + "Acc.swimming pool": 0.814800033569336, + "Acc.stool": 0.4147999954223633, + "Acc.barrel": 0.6454000091552734, + "Acc.basket": 0.24489999771118165, + "Acc.waterfall": 0.8027999877929688, + "Acc.tent": 0.9933999633789062, + "Acc.bag": 0.15600000381469725, + "Acc.minibike": 0.705999984741211, + "Acc.cradle": 0.9744000244140625, + "Acc.oven": 0.44290000915527344, + "Acc.ball": 0.6405000305175781, + "Acc.food": 0.6025, + "Acc.step": 0.13609999656677246, + "Acc.tank": 0.5877000045776367, + "Acc.trade name": 0.22450000762939454, + "Acc.microwave": 0.41, + "Acc.pot": 0.4225, + "Acc.animal": 0.5988999938964844, + "Acc.bicycle": 0.7255000305175782, + "Acc.lake": 0.760199966430664, + "Acc.dishwasher": 0.5652000045776367, + "Acc.screen": 0.89, + "Acc.blanket": 0.10880000114440919, + "Acc.sculpture": 0.6727999877929688, + "Acc.hood": 0.5388000106811524, + "Acc.sconce": 0.3177000045776367, + "Acc.vase": 0.4413000106811523, + "Acc.traffic light": 0.4266999816894531, + "Acc.tray": 0.055300002098083494, + "Acc.ashcan": 0.44799999237060545, + "Acc.fan": 0.6830000305175781, + "Acc.pier": 0.500999984741211, + "Acc.crt screen": 0.11109999656677245, + "Acc.plate": 0.6125, + "Acc.monitor": 0.1356999969482422, + "Acc.bulletin board": 0.5513999938964844, + "Acc.shower": 0.04519999980926514, + "Acc.radiator": 0.6626999664306641, + "Acc.glass": 0.0696999979019165, + "Acc.clock": 0.3031999969482422, + "Acc.flag": 0.3540999984741211 + } + }, + "9": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8147, + "mIoU": 0.45049999999999996, + "mAcc": 0.5851, + "IoU.wall": 0.7548999786376953, + "IoU.building": 0.8201000213623046, + "IoU.sky": 0.9377999877929688, + "IoU.floor": 0.8030999755859375, + "IoU.tree": 0.7331999969482422, + "IoU.ceiling": 0.8238999938964844, + "IoU.road": 0.8201000213623046, + "IoU.bed ": 0.8647000122070313, + "IoU.windowpane": 0.6058000183105469, + "IoU.grass": 0.6405000305175781, + "IoU.cabinet": 0.5897999954223633, + "IoU.sidewalk": 0.624000015258789, + "IoU.person": 0.7687999725341796, + "IoU.earth": 0.34450000762939453, + "IoU.door": 0.4286000061035156, + "IoU.table": 0.5215999984741211, + "IoU.mountain": 0.5756999969482421, + "IoU.plant": 0.502400016784668, + "IoU.curtain": 0.7091999816894531, + "IoU.chair": 0.49840000152587893, + "IoU.car": 0.7966000366210938, + "IoU.water": 0.5338999938964843, + "IoU.painting": 0.658499984741211, + "IoU.sofa": 0.6204999923706055, + "IoU.shelf": 0.4240999984741211, + "IoU.house": 0.49959999084472656, + "IoU.sea": 0.5891999816894531, + "IoU.mirror": 0.6426000213623047, + "IoU.rug": 0.6563999938964844, + "IoU.field": 0.27260000228881837, + "IoU.armchair": 0.38979999542236327, + "IoU.seat": 0.5911000061035157, + "IoU.fence": 0.32349998474121094, + "IoU.desk": 0.419900016784668, + "IoU.rock": 0.4361000061035156, + "IoU.wardrobe": 0.5220000076293946, + "IoU.lamp": 0.5156000137329102, + "IoU.bathtub": 0.7591999816894531, + "IoU.railing": 0.3086000061035156, + "IoU.cushion": 0.5131000137329101, + "IoU.base": 0.27299999237060546, + "IoU.box": 0.21629999160766603, + "IoU.column": 0.43520000457763675, + "IoU.signboard": 0.3313999938964844, + "IoU.chest of drawers": 0.3597999954223633, + "IoU.counter": 0.2504999923706055, + "IoU.sand": 0.3522999954223633, + "IoU.sink": 0.6716999816894531, + "IoU.skyscraper": 0.6113999938964844, + "IoU.fireplace": 0.6887000274658203, + "IoU.refrigerator": 0.6886000061035156, + "IoU.grandstand": 0.4381999969482422, + "IoU.path": 0.2459000015258789, + "IoU.stairs": 0.25799999237060545, + "IoU.runway": 0.702699966430664, + "IoU.case": 0.5325, + "IoU.pool table": 0.9147000122070312, + "IoU.pillow": 0.5291999816894531, + "IoU.screen door": 0.6297000122070312, + "IoU.stairway": 0.28569999694824216, + "IoU.river": 0.27430000305175783, + "IoU.bridge": 0.6029999923706054, + "IoU.bookcase": 0.32729999542236327, + "IoU.blind": 0.42400001525878905, + "IoU.coffee table": 0.5054999923706055, + "IoU.toilet": 0.7548999786376953, + "IoU.flower": 0.32290000915527345, + "IoU.book": 0.41400001525878904, + "IoU.hill": 0.07070000171661377, + "IoU.bench": 0.40630001068115235, + "IoU.countertop": 0.5270999908447266, + "IoU.stove": 0.6393000030517578, + "IoU.palm": 0.45869998931884765, + "IoU.kitchen island": 0.27040000915527346, + "IoU.computer": 0.6797000122070312, + "IoU.swivel chair": 0.3761999893188477, + "IoU.boat": 0.6481999969482422, + "IoU.bar": 0.44810001373291014, + "IoU.arcade machine": 0.5358000183105469, + "IoU.hovel": 0.46599998474121096, + "IoU.bus": 0.7659999847412109, + "IoU.towel": 0.5386999893188477, + "IoU.light": 0.4008000183105469, + "IoU.truck": 0.18600000381469728, + "IoU.tower": 0.345, + "IoU.chandelier": 0.5754000091552735, + "IoU.awning": 0.32619998931884764, + "IoU.streetlight": 0.17290000915527343, + "IoU.booth": 0.4241999816894531, + "IoU.television receiver": 0.6136999893188476, + "IoU.airplane": 0.5306999969482422, + "IoU.dirt track": 0.3231999969482422, + "IoU.apparel": 0.3268999862670898, + "IoU.pole": 0.135, + "IoU.land": 0.08399999618530274, + "IoU.bannister": 0.09329999923706055, + "IoU.escalator": 0.27760000228881837, + "IoU.ottoman": 0.44779998779296876, + "IoU.bottle": 0.33560001373291015, + "IoU.buffet": 0.6426000213623047, + "IoU.poster": 0.2902000045776367, + "IoU.stage": 0.15060000419616698, + "IoU.van": 0.4015000152587891, + "IoU.ship": 0.7163999938964843, + "IoU.fountain": 0.20829999923706055, + "IoU.conveyer belt": 0.6698000335693359, + "IoU.canopy": 0.2578000068664551, + "IoU.washer": 0.6812999725341797, + "IoU.plaything": 0.23100000381469726, + "IoU.swimming pool": 0.5786000061035156, + "IoU.stool": 0.23760000228881836, + "IoU.barrel": 0.3443000030517578, + "IoU.basket": 0.18629999160766603, + "IoU.waterfall": 0.5431000137329102, + "IoU.tent": 0.8263999938964843, + "IoU.bag": 0.09989999771118165, + "IoU.minibike": 0.514000015258789, + "IoU.cradle": 0.7758999633789062, + "IoU.oven": 0.16229999542236329, + "IoU.ball": 0.3895999908447266, + "IoU.food": 0.47119998931884766, + "IoU.step": 0.11199999809265136, + "IoU.tank": 0.4866999816894531, + "IoU.trade name": 0.26329999923706054, + "IoU.microwave": 0.34560001373291016, + "IoU.pot": 0.3572999954223633, + "IoU.animal": 0.5561999893188476, + "IoU.bicycle": 0.4734999847412109, + "IoU.lake": 0.3265999984741211, + "IoU.dishwasher": 0.45110000610351564, + "IoU.screen": 0.6302999877929687, + "IoU.blanket": 0.08560000419616699, + "IoU.sculpture": 0.47009998321533203, + "IoU.hood": 0.4915999984741211, + "IoU.sconce": 0.2745999908447266, + "IoU.vase": 0.2669000053405762, + "IoU.traffic light": 0.24520000457763672, + "IoU.tray": 0.044800000190734865, + "IoU.ashcan": 0.2730999946594238, + "IoU.fan": 0.4672000122070312, + "IoU.pier": 0.2115999984741211, + "IoU.crt screen": 0.030999999046325683, + "IoU.plate": 0.44, + "IoU.monitor": 0.019299999475479127, + "IoU.bulletin board": 0.3622999954223633, + "IoU.shower": 0.004600000083446503, + "IoU.radiator": 0.5572000122070313, + "IoU.glass": 0.07400000095367432, + "IoU.clock": 0.23629999160766602, + "IoU.flag": 0.3233000183105469, + "Acc.wall": 0.8644000244140625, + "Acc.building": 0.9205999755859375, + "Acc.sky": 0.9594000244140625, + "Acc.floor": 0.8829000091552734, + "Acc.tree": 0.8927999877929688, + "Acc.ceiling": 0.8876999664306641, + "Acc.road": 0.8855999755859375, + "Acc.bed ": 0.9570999908447265, + "Acc.windowpane": 0.7541000366210937, + "Acc.grass": 0.8041000366210938, + "Acc.cabinet": 0.7333999633789062, + "Acc.sidewalk": 0.8073999786376953, + "Acc.person": 0.912300033569336, + "Acc.earth": 0.47319999694824216, + "Acc.door": 0.575900001525879, + "Acc.table": 0.6768000030517578, + "Acc.mountain": 0.7411000061035157, + "Acc.plant": 0.5940999984741211, + "Acc.curtain": 0.8476000213623047, + "Acc.chair": 0.6630999755859375, + "Acc.car": 0.8969000244140625, + "Acc.water": 0.6583000183105469, + "Acc.painting": 0.859800033569336, + "Acc.sofa": 0.8163999938964843, + "Acc.shelf": 0.5768999862670898, + "Acc.house": 0.6336999893188476, + "Acc.sea": 0.8391000366210938, + "Acc.mirror": 0.7630999755859375, + "Acc.rug": 0.8015000152587891, + "Acc.field": 0.46130001068115234, + "Acc.armchair": 0.5527999877929688, + "Acc.seat": 0.7977999877929688, + "Acc.fence": 0.4354999923706055, + "Acc.desk": 0.7197000122070313, + "Acc.rock": 0.5845000076293946, + "Acc.wardrobe": 0.7051000213623047, + "Acc.lamp": 0.7141999816894531, + "Acc.bathtub": 0.8112999725341797, + "Acc.railing": 0.42759998321533205, + "Acc.cushion": 0.6422000122070313, + "Acc.base": 0.5147000122070312, + "Acc.box": 0.27600000381469725, + "Acc.column": 0.5443000030517579, + "Acc.signboard": 0.42939998626708986, + "Acc.chest of drawers": 0.5945000076293945, + "Acc.counter": 0.35720001220703124, + "Acc.sand": 0.6056999969482422, + "Acc.sink": 0.7548999786376953, + "Acc.skyscraper": 0.7733000183105468, + "Acc.fireplace": 0.885, + "Acc.refrigerator": 0.8626000213623047, + "Acc.grandstand": 0.6927999877929687, + "Acc.path": 0.3745000076293945, + "Acc.stairs": 0.36900001525878906, + "Acc.runway": 0.8731999969482422, + "Acc.case": 0.6994999694824219, + "Acc.pool table": 0.9733999633789062, + "Acc.pillow": 0.6309000015258789, + "Acc.screen door": 0.7426000213623047, + "Acc.stairway": 0.39720001220703127, + "Acc.river": 0.5465999984741211, + "Acc.bridge": 0.8344999694824219, + "Acc.bookcase": 0.48450000762939455, + "Acc.blind": 0.5027999877929688, + "Acc.coffee table": 0.8195999908447266, + "Acc.toilet": 0.9062000274658203, + "Acc.flower": 0.49080001831054687, + "Acc.book": 0.6170000076293946, + "Acc.hill": 0.12970000267028808, + "Acc.bench": 0.5095999908447265, + "Acc.countertop": 0.667300033569336, + "Acc.stove": 0.7688999938964843, + "Acc.palm": 0.7016000366210937, + "Acc.kitchen island": 0.650199966430664, + "Acc.computer": 0.8530000305175781, + "Acc.swivel chair": 0.4968000030517578, + "Acc.boat": 0.8512000274658204, + "Acc.bar": 0.5993999862670898, + "Acc.arcade machine": 0.7191000366210938, + "Acc.hovel": 0.5281999969482422, + "Acc.bus": 0.8912999725341797, + "Acc.towel": 0.7341999816894531, + "Acc.light": 0.48700000762939455, + "Acc.truck": 0.27430000305175783, + "Acc.tower": 0.5291999816894531, + "Acc.chandelier": 0.7222000122070312, + "Acc.awning": 0.3988999938964844, + "Acc.streetlight": 0.2484000015258789, + "Acc.booth": 0.5265999984741211, + "Acc.television receiver": 0.7602999877929687, + "Acc.airplane": 0.6512999725341797, + "Acc.dirt track": 0.4225, + "Acc.apparel": 0.4675, + "Acc.pole": 0.16920000076293945, + "Acc.land": 0.1418000030517578, + "Acc.bannister": 0.13840000152587892, + "Acc.escalator": 0.33419998168945314, + "Acc.ottoman": 0.6512000274658203, + "Acc.bottle": 0.5220000076293946, + "Acc.buffet": 0.7777999877929688, + "Acc.poster": 0.34970001220703123, + "Acc.stage": 0.3659000015258789, + "Acc.van": 0.47389999389648435, + "Acc.ship": 0.7316999816894532, + "Acc.fountain": 0.2143000030517578, + "Acc.conveyer belt": 0.9294000244140626, + "Acc.canopy": 0.29989999771118164, + "Acc.washer": 0.7158000183105468, + "Acc.plaything": 0.3491999816894531, + "Acc.swimming pool": 0.8518000030517578, + "Acc.stool": 0.34619998931884766, + "Acc.barrel": 0.6458999633789062, + "Acc.basket": 0.24629999160766602, + "Acc.waterfall": 0.6433999633789063, + "Acc.tent": 0.9933000183105469, + "Acc.bag": 0.11140000343322753, + "Acc.minibike": 0.6265999984741211, + "Acc.cradle": 0.9705999755859375, + "Acc.oven": 0.43459999084472656, + "Acc.ball": 0.4704000091552734, + "Acc.food": 0.5549000167846679, + "Acc.step": 0.1381999969482422, + "Acc.tank": 0.5779999923706055, + "Acc.trade name": 0.3004999923706055, + "Acc.microwave": 0.3965999984741211, + "Acc.pot": 0.42119998931884767, + "Acc.animal": 0.5979000091552734, + "Acc.bicycle": 0.7522000122070313, + "Acc.lake": 0.4054000091552734, + "Acc.dishwasher": 0.6238999938964844, + "Acc.screen": 0.8726000213623046, + "Acc.blanket": 0.09699999809265136, + "Acc.sculpture": 0.6923999786376953, + "Acc.hood": 0.5563000106811523, + "Acc.sconce": 0.3686000061035156, + "Acc.vase": 0.41619998931884766, + "Acc.traffic light": 0.47259998321533203, + "Acc.tray": 0.06610000133514404, + "Acc.ashcan": 0.3611000061035156, + "Acc.fan": 0.726500015258789, + "Acc.pier": 0.5043000030517578, + "Acc.crt screen": 0.10699999809265137, + "Acc.plate": 0.5879999923706055, + "Acc.monitor": 0.020999999046325685, + "Acc.bulletin board": 0.5665999984741211, + "Acc.shower": 0.022899999618530273, + "Acc.radiator": 0.6712999725341797, + "Acc.glass": 0.08149999618530274, + "Acc.clock": 0.2715999984741211, + "Acc.flag": 0.3584000015258789 + } + }, + "10": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8129000000000001, + "mIoU": 0.4393, + "mAcc": 0.5682, + "IoU.wall": 0.7526000213623046, + "IoU.building": 0.8119000244140625, + "IoU.sky": 0.9395999908447266, + "IoU.floor": 0.8044999694824219, + "IoU.tree": 0.7336000061035156, + "IoU.ceiling": 0.8208000183105468, + "IoU.road": 0.8102999877929687, + "IoU.bed ": 0.8670999908447266, + "IoU.windowpane": 0.6063000106811524, + "IoU.grass": 0.6776000213623047, + "IoU.cabinet": 0.5786999893188477, + "IoU.sidewalk": 0.6172000122070312, + "IoU.person": 0.7715000152587891, + "IoU.earth": 0.3509999847412109, + "IoU.door": 0.445099983215332, + "IoU.table": 0.5431999969482422, + "IoU.mountain": 0.5884999847412109, + "IoU.plant": 0.4961000061035156, + "IoU.curtain": 0.7151999664306641, + "IoU.chair": 0.49639999389648437, + "IoU.car": 0.7958000183105469, + "IoU.water": 0.5065999984741211, + "IoU.painting": 0.652300033569336, + "IoU.sofa": 0.6190000152587891, + "IoU.shelf": 0.4191999816894531, + "IoU.house": 0.43490001678466794, + "IoU.sea": 0.5365000152587891, + "IoU.mirror": 0.6322000122070313, + "IoU.rug": 0.6698999786376953, + "IoU.field": 0.3093000030517578, + "IoU.armchair": 0.40560001373291016, + "IoU.seat": 0.6097000122070313, + "IoU.fence": 0.3579999923706055, + "IoU.desk": 0.4079000091552734, + "IoU.rock": 0.39549999237060546, + "IoU.wardrobe": 0.47169998168945315, + "IoU.lamp": 0.512400016784668, + "IoU.bathtub": 0.7726999664306641, + "IoU.railing": 0.3114999961853027, + "IoU.cushion": 0.5072000122070313, + "IoU.base": 0.2695000076293945, + "IoU.box": 0.2125, + "IoU.column": 0.4438999938964844, + "IoU.signboard": 0.33560001373291015, + "IoU.chest of drawers": 0.33680000305175783, + "IoU.counter": 0.2567000007629395, + "IoU.sand": 0.3691999816894531, + "IoU.sink": 0.6338000106811523, + "IoU.skyscraper": 0.5081000137329101, + "IoU.fireplace": 0.6906999969482421, + "IoU.refrigerator": 0.7201000213623047, + "IoU.grandstand": 0.417400016784668, + "IoU.path": 0.23600000381469727, + "IoU.stairs": 0.27260000228881837, + "IoU.runway": 0.7137000274658203, + "IoU.case": 0.4886000061035156, + "IoU.pool table": 0.8731999969482422, + "IoU.pillow": 0.5152000045776367, + "IoU.screen door": 0.6359000015258789, + "IoU.stairway": 0.2731999969482422, + "IoU.river": 0.1568000030517578, + "IoU.bridge": 0.592599983215332, + "IoU.bookcase": 0.3209000015258789, + "IoU.blind": 0.37900001525878907, + "IoU.coffee table": 0.5145999908447265, + "IoU.toilet": 0.7705999755859375, + "IoU.flower": 0.34709999084472654, + "IoU.book": 0.41560001373291017, + "IoU.hill": 0.09460000038146972, + "IoU.bench": 0.45610000610351564, + "IoU.countertop": 0.5711000061035156, + "IoU.stove": 0.6481999969482422, + "IoU.palm": 0.4590999984741211, + "IoU.kitchen island": 0.322599983215332, + "IoU.computer": 0.5650999832153321, + "IoU.swivel chair": 0.4168000030517578, + "IoU.boat": 0.6468000030517578, + "IoU.bar": 0.44720001220703126, + "IoU.arcade machine": 0.3352000045776367, + "IoU.hovel": 0.3818999862670898, + "IoU.bus": 0.7819000244140625, + "IoU.towel": 0.5304999923706055, + "IoU.light": 0.4166999816894531, + "IoU.truck": 0.1347000026702881, + "IoU.tower": 0.34110000610351565, + "IoU.chandelier": 0.5668000030517578, + "IoU.awning": 0.3704999923706055, + "IoU.streetlight": 0.17540000915527343, + "IoU.booth": 0.4179000091552734, + "IoU.television receiver": 0.6086999893188476, + "IoU.airplane": 0.5938000106811523, + "IoU.dirt track": 0.39669998168945314, + "IoU.apparel": 0.2996999931335449, + "IoU.pole": 0.18389999389648437, + "IoU.land": 0.05300000190734863, + "IoU.bannister": 0.09829999923706055, + "IoU.escalator": 0.33029998779296876, + "IoU.ottoman": 0.45360000610351564, + "IoU.bottle": 0.13279999732971193, + "IoU.buffet": 0.5797999954223633, + "IoU.poster": 0.26780000686645505, + "IoU.stage": 0.12439999580383301, + "IoU.van": 0.38869998931884764, + "IoU.ship": 0.6794999694824219, + "IoU.fountain": 0.21200000762939453, + "IoU.conveyer belt": 0.5256000137329102, + "IoU.canopy": 0.21780000686645506, + "IoU.washer": 0.6283000183105468, + "IoU.plaything": 0.2306999969482422, + "IoU.swimming pool": 0.6133000183105469, + "IoU.stool": 0.2184000015258789, + "IoU.barrel": 0.07739999771118164, + "IoU.basket": 0.1706999969482422, + "IoU.waterfall": 0.5986000061035156, + "IoU.tent": 0.8276000213623047, + "IoU.bag": 0.08109999656677246, + "IoU.minibike": 0.6075, + "IoU.cradle": 0.772300033569336, + "IoU.oven": 0.17079999923706055, + "IoU.ball": 0.3909000015258789, + "IoU.food": 0.5445999908447265, + "IoU.step": 0.12369999885559083, + "IoU.tank": 0.47830001831054686, + "IoU.trade name": 0.203700008392334, + "IoU.microwave": 0.32540000915527345, + "IoU.pot": 0.3388999938964844, + "IoU.animal": 0.5431000137329102, + "IoU.bicycle": 0.5213999938964844, + "IoU.lake": 0.02109999895095825, + "IoU.dishwasher": 0.4284999847412109, + "IoU.screen": 0.5670999908447265, + "IoU.blanket": 0.08329999923706055, + "IoU.sculpture": 0.4956999969482422, + "IoU.hood": 0.42779998779296874, + "IoU.sconce": 0.2798999977111816, + "IoU.vase": 0.26059999465942385, + "IoU.traffic light": 0.2603000068664551, + "IoU.tray": 0.06550000190734863, + "IoU.ashcan": 0.2531999969482422, + "IoU.fan": 0.44310001373291014, + "IoU.pier": 0.27299999237060546, + "IoU.crt screen": 0.014099999666213989, + "IoU.plate": 0.4134999847412109, + "IoU.monitor": 0.02190000057220459, + "IoU.bulletin board": 0.29219999313354494, + "IoU.shower": 0.015, + "IoU.radiator": 0.5349000167846679, + "IoU.glass": 0.05809999942779541, + "IoU.clock": 0.18209999084472656, + "IoU.flag": 0.3609999847412109, + "Acc.wall": 0.8586000061035156, + "Acc.building": 0.9229000091552735, + "Acc.sky": 0.9641000366210938, + "Acc.floor": 0.8897000122070312, + "Acc.tree": 0.8838999938964843, + "Acc.ceiling": 0.8786000061035156, + "Acc.road": 0.8783999633789062, + "Acc.bed ": 0.9569999694824218, + "Acc.windowpane": 0.7598000335693359, + "Acc.grass": 0.8454000091552735, + "Acc.cabinet": 0.7102999877929688, + "Acc.sidewalk": 0.8213999938964843, + "Acc.person": 0.910199966430664, + "Acc.earth": 0.4772999954223633, + "Acc.door": 0.6172999954223632, + "Acc.table": 0.6955999755859374, + "Acc.mountain": 0.736500015258789, + "Acc.plant": 0.5891999816894531, + "Acc.curtain": 0.8618000030517579, + "Acc.chair": 0.6513999938964844, + "Acc.car": 0.9158000183105469, + "Acc.water": 0.6497000122070312, + "Acc.painting": 0.8656999969482422, + "Acc.sofa": 0.7837000274658203, + "Acc.shelf": 0.6011000061035157, + "Acc.house": 0.5379000091552735, + "Acc.sea": 0.7933999633789063, + "Acc.mirror": 0.7666000366210938, + "Acc.rug": 0.7915000152587891, + "Acc.field": 0.4865999984741211, + "Acc.armchair": 0.6211999893188477, + "Acc.seat": 0.8147000122070313, + "Acc.fence": 0.5059000015258789, + "Acc.desk": 0.7384999847412109, + "Acc.rock": 0.6022999954223632, + "Acc.wardrobe": 0.6777999877929688, + "Acc.lamp": 0.6919999694824219, + "Acc.bathtub": 0.829800033569336, + "Acc.railing": 0.4518000030517578, + "Acc.cushion": 0.634900016784668, + "Acc.base": 0.5047999954223633, + "Acc.box": 0.27680000305175784, + "Acc.column": 0.548499984741211, + "Acc.signboard": 0.4570999908447266, + "Acc.chest of drawers": 0.5438000106811524, + "Acc.counter": 0.36139999389648436, + "Acc.sand": 0.5695999908447266, + "Acc.sink": 0.7348999786376953, + "Acc.skyscraper": 0.6412999725341797, + "Acc.fireplace": 0.9009999847412109, + "Acc.refrigerator": 0.8491000366210938, + "Acc.grandstand": 0.6597000122070312, + "Acc.path": 0.3240999984741211, + "Acc.stairs": 0.37759998321533206, + "Acc.runway": 0.8622000122070312, + "Acc.case": 0.5954999923706055, + "Acc.pool table": 0.97, + "Acc.pillow": 0.6090000152587891, + "Acc.screen door": 0.7211000061035157, + "Acc.stairway": 0.39369998931884764, + "Acc.river": 0.29219999313354494, + "Acc.bridge": 0.8038999938964844, + "Acc.bookcase": 0.49840000152587893, + "Acc.blind": 0.4322999954223633, + "Acc.coffee table": 0.8462999725341797, + "Acc.toilet": 0.8923999786376953, + "Acc.flower": 0.5327999877929688, + "Acc.book": 0.6234000015258789, + "Acc.hill": 0.17920000076293946, + "Acc.bench": 0.5347000122070312, + "Acc.countertop": 0.7237999725341797, + "Acc.stove": 0.8072000122070313, + "Acc.palm": 0.6944999694824219, + "Acc.kitchen island": 0.7118000030517578, + "Acc.computer": 0.7218000030517578, + "Acc.swivel chair": 0.5916999816894531, + "Acc.boat": 0.8112000274658203, + "Acc.bar": 0.5975, + "Acc.arcade machine": 0.4022000122070313, + "Acc.hovel": 0.4947999954223633, + "Acc.bus": 0.8941999816894531, + "Acc.towel": 0.6933000183105469, + "Acc.light": 0.52, + "Acc.truck": 0.193799991607666, + "Acc.tower": 0.5070000076293946, + "Acc.chandelier": 0.7680000305175781, + "Acc.awning": 0.46939998626708984, + "Acc.streetlight": 0.24219999313354493, + "Acc.booth": 0.5636999893188477, + "Acc.television receiver": 0.7833000183105469, + "Acc.airplane": 0.7077999877929687, + "Acc.dirt track": 0.45279998779296876, + "Acc.apparel": 0.42130001068115236, + "Acc.pole": 0.23670000076293946, + "Acc.land": 0.09350000381469727, + "Acc.bannister": 0.13890000343322753, + "Acc.escalator": 0.42009998321533204, + "Acc.ottoman": 0.6556999969482422, + "Acc.bottle": 0.15979999542236328, + "Acc.buffet": 0.7294000244140625, + "Acc.poster": 0.3090999984741211, + "Acc.stage": 0.3196999931335449, + "Acc.van": 0.4622999954223633, + "Acc.ship": 0.7255999755859375, + "Acc.fountain": 0.21729999542236328, + "Acc.conveyer belt": 0.7288999938964844, + "Acc.canopy": 0.2738999938964844, + "Acc.washer": 0.6912000274658203, + "Acc.plaything": 0.3890999984741211, + "Acc.swimming pool": 0.8245999908447266, + "Acc.stool": 0.3111000061035156, + "Acc.barrel": 0.33279998779296877, + "Acc.basket": 0.21540000915527344, + "Acc.waterfall": 0.685199966430664, + "Acc.tent": 0.9941999816894531, + "Acc.bag": 0.09630000114440918, + "Acc.minibike": 0.754000015258789, + "Acc.cradle": 0.9638999938964844, + "Acc.oven": 0.429900016784668, + "Acc.ball": 0.5038999938964843, + "Acc.food": 0.6238999938964844, + "Acc.step": 0.14609999656677247, + "Acc.tank": 0.539099998474121, + "Acc.trade name": 0.22739999771118163, + "Acc.microwave": 0.35889999389648436, + "Acc.pot": 0.40189998626708984, + "Acc.animal": 0.6106000137329102, + "Acc.bicycle": 0.6880999755859375, + "Acc.lake": 0.03150000095367431, + "Acc.dishwasher": 0.5881000137329102, + "Acc.screen": 0.7941000366210937, + "Acc.blanket": 0.0934000015258789, + "Acc.sculpture": 0.668499984741211, + "Acc.hood": 0.48439998626708985, + "Acc.sconce": 0.3534999847412109, + "Acc.vase": 0.40369998931884765, + "Acc.traffic light": 0.43099998474121093, + "Acc.tray": 0.11609999656677246, + "Acc.ashcan": 0.37470001220703125, + "Acc.fan": 0.7088999938964844, + "Acc.pier": 0.48869998931884767, + "Acc.crt screen": 0.04429999828338623, + "Acc.plate": 0.5572999954223633, + "Acc.monitor": 0.03200000047683716, + "Acc.bulletin board": 0.45540000915527346, + "Acc.shower": 0.05239999771118164, + "Acc.radiator": 0.635, + "Acc.glass": 0.06309999942779541, + "Acc.clock": 0.20729999542236327, + "Acc.flag": 0.40759998321533203 + } + }, + "11": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8158, + "mIoU": 0.4539, + "mAcc": 0.5865, + "IoU.wall": 0.7583000183105468, + "IoU.building": 0.816500015258789, + "IoU.sky": 0.9393000030517578, + "IoU.floor": 0.8038999938964844, + "IoU.tree": 0.7383999633789062, + "IoU.ceiling": 0.8273999786376953, + "IoU.road": 0.8151000213623046, + "IoU.bed ": 0.8631999969482422, + "IoU.windowpane": 0.6097999954223633, + "IoU.grass": 0.6658999633789062, + "IoU.cabinet": 0.5718000030517578, + "IoU.sidewalk": 0.6286999893188476, + "IoU.person": 0.760199966430664, + "IoU.earth": 0.3268000030517578, + "IoU.door": 0.460099983215332, + "IoU.table": 0.5402999877929687, + "IoU.mountain": 0.5763000106811523, + "IoU.plant": 0.4940999984741211, + "IoU.curtain": 0.7148999786376953, + "IoU.chair": 0.5047999954223633, + "IoU.car": 0.7976000213623047, + "IoU.water": 0.5304999923706055, + "IoU.painting": 0.663499984741211, + "IoU.sofa": 0.6234000015258789, + "IoU.shelf": 0.4165999984741211, + "IoU.house": 0.49939998626708987, + "IoU.sea": 0.6063000106811524, + "IoU.mirror": 0.6470999908447266, + "IoU.rug": 0.6530000305175782, + "IoU.field": 0.3097999954223633, + "IoU.armchair": 0.4265999984741211, + "IoU.seat": 0.5829999923706055, + "IoU.fence": 0.40950000762939454, + "IoU.desk": 0.44009998321533206, + "IoU.rock": 0.38299999237060545, + "IoU.wardrobe": 0.5029000091552734, + "IoU.lamp": 0.5238999938964843, + "IoU.bathtub": 0.7675, + "IoU.railing": 0.30200000762939455, + "IoU.cushion": 0.49650001525878906, + "IoU.base": 0.27510000228881837, + "IoU.box": 0.21209999084472655, + "IoU.column": 0.4363000106811523, + "IoU.signboard": 0.34240001678466797, + "IoU.chest of drawers": 0.35509998321533204, + "IoU.counter": 0.28829999923706057, + "IoU.sand": 0.3743000030517578, + "IoU.sink": 0.6602999877929687, + "IoU.skyscraper": 0.5281000137329102, + "IoU.fireplace": 0.6994999694824219, + "IoU.refrigerator": 0.7038999938964844, + "IoU.grandstand": 0.41720001220703123, + "IoU.path": 0.2246999931335449, + "IoU.stairs": 0.28450000762939454, + "IoU.runway": 0.7281999969482422, + "IoU.case": 0.5006000137329102, + "IoU.pool table": 0.8923999786376953, + "IoU.pillow": 0.54, + "IoU.screen door": 0.6444000244140625, + "IoU.stairway": 0.268799991607666, + "IoU.river": 0.22270000457763672, + "IoU.bridge": 0.6858000183105468, + "IoU.bookcase": 0.3225, + "IoU.blind": 0.3984000015258789, + "IoU.coffee table": 0.5447999954223632, + "IoU.toilet": 0.8079000091552735, + "IoU.flower": 0.3390000152587891, + "IoU.book": 0.41369998931884766, + "IoU.hill": 0.08350000381469727, + "IoU.bench": 0.4247999954223633, + "IoU.countertop": 0.5486000061035157, + "IoU.stove": 0.6770999908447266, + "IoU.palm": 0.48060001373291017, + "IoU.kitchen island": 0.3072999954223633, + "IoU.computer": 0.5995999908447266, + "IoU.swivel chair": 0.44479999542236326, + "IoU.boat": 0.5993000030517578, + "IoU.bar": 0.4588999938964844, + "IoU.arcade machine": 0.37799999237060544, + "IoU.hovel": 0.4358000183105469, + "IoU.bus": 0.6929000091552734, + "IoU.towel": 0.5520000076293945, + "IoU.light": 0.39740001678466796, + "IoU.truck": 0.18069999694824218, + "IoU.tower": 0.24860000610351562, + "IoU.chandelier": 0.5822999954223633, + "IoU.awning": 0.2790999984741211, + "IoU.streetlight": 0.18520000457763672, + "IoU.booth": 0.3370999908447266, + "IoU.television receiver": 0.5884000015258789, + "IoU.airplane": 0.659000015258789, + "IoU.dirt track": 0.21690000534057619, + "IoU.apparel": 0.3057999992370605, + "IoU.pole": 0.25440000534057616, + "IoU.land": 0.0671999979019165, + "IoU.bannister": 0.11909999847412109, + "IoU.escalator": 0.26049999237060545, + "IoU.ottoman": 0.48119998931884767, + "IoU.bottle": 0.2802000045776367, + "IoU.buffet": 0.5441999816894532, + "IoU.poster": 0.225, + "IoU.stage": 0.12279999732971192, + "IoU.van": 0.4068000030517578, + "IoU.ship": 0.7229000091552734, + "IoU.fountain": 0.18629999160766603, + "IoU.conveyer belt": 0.5613999938964844, + "IoU.canopy": 0.19760000228881835, + "IoU.washer": 0.6966999816894531, + "IoU.plaything": 0.22840000152587892, + "IoU.swimming pool": 0.6541999816894531, + "IoU.stool": 0.27829999923706056, + "IoU.barrel": 0.47369998931884766, + "IoU.basket": 0.20969999313354493, + "IoU.waterfall": 0.6665000152587891, + "IoU.tent": 0.8908000183105469, + "IoU.bag": 0.13649999618530273, + "IoU.minibike": 0.5779999923706055, + "IoU.cradle": 0.7579000091552734, + "IoU.oven": 0.15739999771118163, + "IoU.ball": 0.36279998779296874, + "IoU.food": 0.5493000030517579, + "IoU.step": 0.1434000015258789, + "IoU.tank": 0.4697000122070312, + "IoU.trade name": 0.25940000534057617, + "IoU.microwave": 0.33310001373291015, + "IoU.pot": 0.3302000045776367, + "IoU.animal": 0.5715999984741211, + "IoU.bicycle": 0.5008000183105469, + "IoU.lake": 0.55, + "IoU.dishwasher": 0.51, + "IoU.screen": 0.5379999923706055, + "IoU.blanket": 0.06050000190734863, + "IoU.sculpture": 0.4356999969482422, + "IoU.hood": 0.48900001525878906, + "IoU.sconce": 0.35520000457763673, + "IoU.vase": 0.25159999847412107, + "IoU.traffic light": 0.25879999160766604, + "IoU.tray": 0.05599999904632568, + "IoU.ashcan": 0.2798999977111816, + "IoU.fan": 0.4822999954223633, + "IoU.pier": 0.3856999969482422, + "IoU.crt screen": 9.999999776482581e-05, + "IoU.plate": 0.41819999694824217, + "IoU.monitor": 0.023399999141693117, + "IoU.bulletin board": 0.29959999084472655, + "IoU.shower": 0.006100000143051148, + "IoU.radiator": 0.5706999969482421, + "IoU.glass": 0.07079999923706054, + "IoU.clock": 0.2745000076293945, + "IoU.flag": 0.31629999160766603, + "Acc.wall": 0.8662000274658204, + "Acc.building": 0.9162000274658203, + "Acc.sky": 0.9648999786376953, + "Acc.floor": 0.8894999694824218, + "Acc.tree": 0.8836000061035156, + "Acc.ceiling": 0.8879000091552735, + "Acc.road": 0.8856999969482422, + "Acc.bed ": 0.9561000061035156, + "Acc.windowpane": 0.7583999633789062, + "Acc.grass": 0.8179000091552734, + "Acc.cabinet": 0.6904000091552734, + "Acc.sidewalk": 0.8105000305175781, + "Acc.person": 0.9254000091552734, + "Acc.earth": 0.43090000152587893, + "Acc.door": 0.6238999938964844, + "Acc.table": 0.7026000213623047, + "Acc.mountain": 0.7297000122070313, + "Acc.plant": 0.590900001525879, + "Acc.curtain": 0.8620999908447265, + "Acc.chair": 0.6611000061035156, + "Acc.car": 0.919000015258789, + "Acc.water": 0.6512999725341797, + "Acc.painting": 0.847699966430664, + "Acc.sofa": 0.797300033569336, + "Acc.shelf": 0.615099983215332, + "Acc.house": 0.6175, + "Acc.sea": 0.8712999725341797, + "Acc.mirror": 0.7829000091552735, + "Acc.rug": 0.7043000030517578, + "Acc.field": 0.535, + "Acc.armchair": 0.6408999633789062, + "Acc.seat": 0.8261000061035156, + "Acc.fence": 0.5863000106811523, + "Acc.desk": 0.7008999633789063, + "Acc.rock": 0.6356000137329102, + "Acc.wardrobe": 0.7034999847412109, + "Acc.lamp": 0.6755999755859375, + "Acc.bathtub": 0.8197000122070313, + "Acc.railing": 0.4506999969482422, + "Acc.cushion": 0.6140000152587891, + "Acc.base": 0.4459000015258789, + "Acc.box": 0.2568000030517578, + "Acc.column": 0.5602000045776367, + "Acc.signboard": 0.4590000152587891, + "Acc.chest of drawers": 0.6145000076293945, + "Acc.counter": 0.3813000106811523, + "Acc.sand": 0.5095999908447265, + "Acc.sink": 0.7555000305175781, + "Acc.skyscraper": 0.6718000030517578, + "Acc.fireplace": 0.909000015258789, + "Acc.refrigerator": 0.8669999694824219, + "Acc.grandstand": 0.6483000183105468, + "Acc.path": 0.3085000038146973, + "Acc.stairs": 0.40099998474121096, + "Acc.runway": 0.9480000305175781, + "Acc.case": 0.7097000122070313, + "Acc.pool table": 0.9716000366210937, + "Acc.pillow": 0.6730999755859375, + "Acc.screen door": 0.7277999877929687, + "Acc.stairway": 0.3715999984741211, + "Acc.river": 0.43970001220703125, + "Acc.bridge": 0.8501000213623047, + "Acc.bookcase": 0.5538999938964844, + "Acc.blind": 0.4647000122070313, + "Acc.coffee table": 0.7966999816894531, + "Acc.toilet": 0.890199966430664, + "Acc.flower": 0.5206000137329102, + "Acc.book": 0.6072000122070312, + "Acc.hill": 0.1815999984741211, + "Acc.bench": 0.5161000061035156, + "Acc.countertop": 0.7029000091552734, + "Acc.stove": 0.8130999755859375, + "Acc.palm": 0.6977999877929687, + "Acc.kitchen island": 0.7280999755859375, + "Acc.computer": 0.7330999755859375, + "Acc.swivel chair": 0.6354000091552734, + "Acc.boat": 0.7188999938964844, + "Acc.bar": 0.6190000152587891, + "Acc.arcade machine": 0.4216999816894531, + "Acc.hovel": 0.5006999969482422, + "Acc.bus": 0.9069999694824219, + "Acc.towel": 0.7108000183105468, + "Acc.light": 0.4640999984741211, + "Acc.truck": 0.2570000076293945, + "Acc.tower": 0.37790000915527344, + "Acc.chandelier": 0.7630000305175781, + "Acc.awning": 0.34889999389648435, + "Acc.streetlight": 0.2331999969482422, + "Acc.booth": 0.562400016784668, + "Acc.television receiver": 0.7769000244140625, + "Acc.airplane": 0.7641999816894531, + "Acc.dirt track": 0.31559999465942384, + "Acc.apparel": 0.4379999923706055, + "Acc.pole": 0.38459999084472657, + "Acc.land": 0.14619999885559082, + "Acc.bannister": 0.17700000762939452, + "Acc.escalator": 0.3497999954223633, + "Acc.ottoman": 0.6393999862670898, + "Acc.bottle": 0.4040999984741211, + "Acc.buffet": 0.6956999969482421, + "Acc.poster": 0.2825, + "Acc.stage": 0.32540000915527345, + "Acc.van": 0.47959999084472654, + "Acc.ship": 0.8818000030517578, + "Acc.fountain": 0.20420000076293945, + "Acc.conveyer belt": 0.8898999786376953, + "Acc.canopy": 0.26860000610351564, + "Acc.washer": 0.705, + "Acc.plaything": 0.35650001525878905, + "Acc.swimming pool": 0.8498999786376953, + "Acc.stool": 0.3933000183105469, + "Acc.barrel": 0.615, + "Acc.basket": 0.28399999618530275, + "Acc.waterfall": 0.7738999938964843, + "Acc.tent": 0.9886000061035156, + "Acc.bag": 0.17420000076293946, + "Acc.minibike": 0.7466999816894532, + "Acc.cradle": 0.9712000274658203, + "Acc.oven": 0.4141999816894531, + "Acc.ball": 0.45689998626708983, + "Acc.food": 0.6423999786376953, + "Acc.step": 0.18129999160766602, + "Acc.tank": 0.5756999969482421, + "Acc.trade name": 0.3060000038146973, + "Acc.microwave": 0.3761999893188477, + "Acc.pot": 0.3915999984741211, + "Acc.animal": 0.6165000152587891, + "Acc.bicycle": 0.7262999725341797, + "Acc.lake": 0.6347999954223633, + "Acc.dishwasher": 0.6366999816894531, + "Acc.screen": 0.8798999786376953, + "Acc.blanket": 0.06539999961853027, + "Acc.sculpture": 0.599900016784668, + "Acc.hood": 0.5408000183105469, + "Acc.sconce": 0.46950000762939453, + "Acc.vase": 0.38349998474121094, + "Acc.traffic light": 0.41439998626708985, + "Acc.tray": 0.10140000343322754, + "Acc.ashcan": 0.40630001068115235, + "Acc.fan": 0.6856999969482422, + "Acc.pier": 0.6333000183105468, + "Acc.crt screen": 0.00029999999329447744, + "Acc.plate": 0.5399000167846679, + "Acc.monitor": 0.03349999904632568, + "Acc.bulletin board": 0.4315999984741211, + "Acc.shower": 0.029000000953674318, + "Acc.radiator": 0.7162000274658203, + "Acc.glass": 0.0784000015258789, + "Acc.clock": 0.32479999542236326, + "Acc.flag": 0.34400001525878904 + } + }, + "12": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8173999999999999, + "mIoU": 0.4481, + "mAcc": 0.5479999999999999, + "IoU.wall": 0.757300033569336, + "IoU.building": 0.8162000274658203, + "IoU.sky": 0.9380000305175781, + "IoU.floor": 0.7969000244140625, + "IoU.tree": 0.7323999786376953, + "IoU.ceiling": 0.8230999755859375, + "IoU.road": 0.8173000335693359, + "IoU.bed ": 0.8605999755859375, + "IoU.windowpane": 0.606500015258789, + "IoU.grass": 0.6543000030517578, + "IoU.cabinet": 0.581500015258789, + "IoU.sidewalk": 0.6284000015258789, + "IoU.person": 0.7731999969482422, + "IoU.earth": 0.3597999954223633, + "IoU.door": 0.4558000183105469, + "IoU.table": 0.5483000183105469, + "IoU.mountain": 0.5781000137329102, + "IoU.plant": 0.48700000762939455, + "IoU.curtain": 0.7269000244140625, + "IoU.chair": 0.5075, + "IoU.car": 0.8213999938964843, + "IoU.water": 0.5291999816894531, + "IoU.painting": 0.6773999786376953, + "IoU.sofa": 0.6175, + "IoU.shelf": 0.41439998626708985, + "IoU.house": 0.46680000305175784, + "IoU.sea": 0.6129000091552734, + "IoU.mirror": 0.6466000366210938, + "IoU.rug": 0.6306000137329102, + "IoU.field": 0.3, + "IoU.armchair": 0.41049999237060547, + "IoU.seat": 0.5981000137329101, + "IoU.fence": 0.37849998474121094, + "IoU.desk": 0.46669998168945315, + "IoU.rock": 0.4140999984741211, + "IoU.wardrobe": 0.5011999893188477, + "IoU.lamp": 0.5288999938964843, + "IoU.bathtub": 0.7390000152587891, + "IoU.railing": 0.29399999618530276, + "IoU.cushion": 0.5061999893188477, + "IoU.base": 0.2513999938964844, + "IoU.box": 0.22430000305175782, + "IoU.column": 0.43560001373291013, + "IoU.signboard": 0.3283000183105469, + "IoU.chest of drawers": 0.33099998474121095, + "IoU.counter": 0.2520000076293945, + "IoU.sand": 0.39560001373291015, + "IoU.sink": 0.6631999969482422, + "IoU.skyscraper": 0.534099998474121, + "IoU.fireplace": 0.7151999664306641, + "IoU.refrigerator": 0.7562999725341797, + "IoU.grandstand": 0.42450000762939455, + "IoU.path": 0.21200000762939453, + "IoU.stairs": 0.2811000061035156, + "IoU.runway": 0.6469999694824219, + "IoU.case": 0.48889999389648436, + "IoU.pool table": 0.9175, + "IoU.pillow": 0.5088000106811523, + "IoU.screen door": 0.6498999786376953, + "IoU.stairway": 0.3046999931335449, + "IoU.river": 0.2059000015258789, + "IoU.bridge": 0.7137000274658203, + "IoU.bookcase": 0.3516999816894531, + "IoU.blind": 0.38560001373291014, + "IoU.coffee table": 0.5691999816894531, + "IoU.toilet": 0.8166000366210937, + "IoU.flower": 0.3189999961853027, + "IoU.book": 0.41939998626708985, + "IoU.hill": 0.07599999904632568, + "IoU.bench": 0.44369998931884763, + "IoU.countertop": 0.5113999938964844, + "IoU.stove": 0.6905999755859376, + "IoU.palm": 0.42380001068115236, + "IoU.kitchen island": 0.2830999946594238, + "IoU.computer": 0.6486000061035156, + "IoU.swivel chair": 0.41049999237060547, + "IoU.boat": 0.7123000335693359, + "IoU.bar": 0.4402000045776367, + "IoU.arcade machine": 0.3375, + "IoU.hovel": 0.40700000762939453, + "IoU.bus": 0.8234999847412109, + "IoU.towel": 0.5456999969482422, + "IoU.light": 0.2645000076293945, + "IoU.truck": 0.21069999694824218, + "IoU.tower": 0.2955999946594238, + "IoU.chandelier": 0.5697999954223633, + "IoU.awning": 0.2240999984741211, + "IoU.streetlight": 0.14039999961853028, + "IoU.booth": 0.4102999877929687, + "IoU.television receiver": 0.5983000183105469, + "IoU.airplane": 0.6275, + "IoU.dirt track": 0.18420000076293946, + "IoU.apparel": 0.3036000061035156, + "IoU.pole": 0.24030000686645508, + "IoU.land": 0.016699999570846558, + "IoU.bannister": 0.08079999923706055, + "IoU.escalator": 0.20510000228881836, + "IoU.ottoman": 0.48380001068115236, + "IoU.bottle": 0.29319999694824217, + "IoU.buffet": 0.4061000061035156, + "IoU.poster": 0.22430000305175782, + "IoU.stage": 0.19690000534057617, + "IoU.van": 0.38610000610351564, + "IoU.ship": 0.7844000244140625, + "IoU.fountain": 0.20059999465942382, + "IoU.conveyer belt": 0.5933000183105469, + "IoU.canopy": 0.20799999237060546, + "IoU.washer": 0.7173999786376953, + "IoU.plaything": 0.2440999984741211, + "IoU.swimming pool": 0.5940999984741211, + "IoU.stool": 0.24, + "IoU.barrel": 0.5379000091552735, + "IoU.basket": 0.19940000534057617, + "IoU.waterfall": 0.6936000061035156, + "IoU.tent": 0.9216000366210938, + "IoU.bag": 0.08619999885559082, + "IoU.minibike": 0.5429000091552735, + "IoU.cradle": 0.7402999877929688, + "IoU.oven": 0.20399999618530273, + "IoU.ball": 0.37279998779296875, + "IoU.food": 0.5370000076293945, + "IoU.step": 0.11670000076293946, + "IoU.tank": 0.45, + "IoU.trade name": 0.17389999389648436, + "IoU.microwave": 0.3215000152587891, + "IoU.pot": 0.34080001831054685, + "IoU.animal": 0.5458000183105469, + "IoU.bicycle": 0.4622000122070313, + "IoU.lake": 0.27780000686645506, + "IoU.dishwasher": 0.5320999908447266, + "IoU.screen": 0.6686000061035157, + "IoU.blanket": 0.03859999895095825, + "IoU.sculpture": 0.40599998474121096, + "IoU.hood": 0.3890999984741211, + "IoU.sconce": 0.29989999771118164, + "IoU.vase": 0.2675, + "IoU.traffic light": 0.22280000686645507, + "IoU.tray": 0.020899999141693115, + "IoU.ashcan": 0.3188999938964844, + "IoU.fan": 0.472400016784668, + "IoU.pier": 0.31729999542236326, + "IoU.crt screen": 0.0, + "IoU.plate": 0.4229999923706055, + "IoU.monitor": 0.04320000171661377, + "IoU.bulletin board": 0.34669998168945315, + "IoU.shower": 0.0009000000357627869, + "IoU.radiator": 0.5084000015258789, + "IoU.glass": 0.06050000190734863, + "IoU.clock": 0.2306999969482422, + "IoU.flag": 0.31540000915527344, + "Acc.wall": 0.8948999786376953, + "Acc.building": 0.9269999694824219, + "Acc.sky": 0.9775, + "Acc.floor": 0.9087000274658203, + "Acc.tree": 0.8747000122070312, + "Acc.ceiling": 0.8937999725341796, + "Acc.road": 0.9008999633789062, + "Acc.bed ": 0.9493000030517578, + "Acc.windowpane": 0.7333000183105469, + "Acc.grass": 0.8076000213623047, + "Acc.cabinet": 0.7293000030517578, + "Acc.sidewalk": 0.7880000305175782, + "Acc.person": 0.8948999786376953, + "Acc.earth": 0.5252000045776367, + "Acc.door": 0.6225, + "Acc.table": 0.7155999755859375, + "Acc.mountain": 0.7020999908447265, + "Acc.plant": 0.5731999969482422, + "Acc.curtain": 0.8220999908447265, + "Acc.chair": 0.6412000274658203, + "Acc.car": 0.8983000183105468, + "Acc.water": 0.6612999725341797, + "Acc.painting": 0.8304000091552735, + "Acc.sofa": 0.7833000183105469, + "Acc.shelf": 0.6045999908447266, + "Acc.house": 0.5640999984741211, + "Acc.sea": 0.8308999633789063, + "Acc.mirror": 0.7197000122070313, + "Acc.rug": 0.6675, + "Acc.field": 0.504000015258789, + "Acc.armchair": 0.6231999969482422, + "Acc.seat": 0.7962000274658203, + "Acc.fence": 0.5122999954223633, + "Acc.desk": 0.6744000244140625, + "Acc.rock": 0.615099983215332, + "Acc.wardrobe": 0.6555000305175781, + "Acc.lamp": 0.6320999908447266, + "Acc.bathtub": 0.775199966430664, + "Acc.railing": 0.43560001373291013, + "Acc.cushion": 0.6006000137329102, + "Acc.base": 0.42580001831054687, + "Acc.box": 0.29690000534057615, + "Acc.column": 0.5472999954223633, + "Acc.signboard": 0.4127000045776367, + "Acc.chest of drawers": 0.5804999923706055, + "Acc.counter": 0.337599983215332, + "Acc.sand": 0.4941999816894531, + "Acc.sink": 0.7037999725341797, + "Acc.skyscraper": 0.6234999847412109, + "Acc.fireplace": 0.8858999633789062, + "Acc.refrigerator": 0.8405000305175782, + "Acc.grandstand": 0.6493000030517578, + "Acc.path": 0.28319999694824216, + "Acc.stairs": 0.36889999389648437, + "Acc.runway": 0.8080000305175781, + "Acc.case": 0.6154999923706055, + "Acc.pool table": 0.9529000091552734, + "Acc.pillow": 0.5929999923706055, + "Acc.screen door": 0.6920999908447265, + "Acc.stairway": 0.41470001220703123, + "Acc.river": 0.45680000305175783, + "Acc.bridge": 0.8347000122070313, + "Acc.bookcase": 0.5700999832153321, + "Acc.blind": 0.4284000015258789, + "Acc.coffee table": 0.7576000213623046, + "Acc.toilet": 0.875999984741211, + "Acc.flower": 0.4418000030517578, + "Acc.book": 0.542599983215332, + "Acc.hill": 0.14710000038146973, + "Acc.bench": 0.5222999954223633, + "Acc.countertop": 0.675, + "Acc.stove": 0.7530000305175781, + "Acc.palm": 0.547400016784668, + "Acc.kitchen island": 0.47950000762939454, + "Acc.computer": 0.7547000122070312, + "Acc.swivel chair": 0.5058000183105469, + "Acc.boat": 0.8230000305175781, + "Acc.bar": 0.5763000106811523, + "Acc.arcade machine": 0.36950000762939456, + "Acc.hovel": 0.4291999816894531, + "Acc.bus": 0.8976999664306641, + "Acc.towel": 0.6605999755859375, + "Acc.light": 0.27780000686645506, + "Acc.truck": 0.27760000228881837, + "Acc.tower": 0.40119998931884765, + "Acc.chandelier": 0.69, + "Acc.awning": 0.24440000534057618, + "Acc.streetlight": 0.1534000015258789, + "Acc.booth": 0.5056000137329102, + "Acc.television receiver": 0.7211000061035157, + "Acc.airplane": 0.6769999694824219, + "Acc.dirt track": 0.26, + "Acc.apparel": 0.43, + "Acc.pole": 0.3252000045776367, + "Acc.land": 0.025099999904632568, + "Acc.bannister": 0.10140000343322754, + "Acc.escalator": 0.23950000762939452, + "Acc.ottoman": 0.6136000061035156, + "Acc.bottle": 0.4259000015258789, + "Acc.buffet": 0.46740001678466797, + "Acc.poster": 0.3168000030517578, + "Acc.stage": 0.30450000762939455, + "Acc.van": 0.4625, + "Acc.ship": 0.8325, + "Acc.fountain": 0.2068000030517578, + "Acc.conveyer belt": 0.749000015258789, + "Acc.canopy": 0.2928000068664551, + "Acc.washer": 0.7266999816894532, + "Acc.plaything": 0.3940999984741211, + "Acc.swimming pool": 0.7355000305175782, + "Acc.stool": 0.2843000030517578, + "Acc.barrel": 0.6054999923706055, + "Acc.basket": 0.23739999771118164, + "Acc.waterfall": 0.7566999816894531, + "Acc.tent": 0.9883000183105469, + "Acc.bag": 0.09819999694824219, + "Acc.minibike": 0.6340999984741211, + "Acc.cradle": 0.9594999694824219, + "Acc.oven": 0.5677000045776367, + "Acc.ball": 0.4506999969482422, + "Acc.food": 0.6272999954223633, + "Acc.step": 0.13239999771118163, + "Acc.tank": 0.5170999908447266, + "Acc.trade name": 0.18489999771118165, + "Acc.microwave": 0.3359000015258789, + "Acc.pot": 0.38279998779296875, + "Acc.animal": 0.5718999862670898, + "Acc.bicycle": 0.6762000274658203, + "Acc.lake": 0.2902000045776367, + "Acc.dishwasher": 0.6216999816894532, + "Acc.screen": 0.9073999786376953, + "Acc.blanket": 0.04190000057220459, + "Acc.sculpture": 0.5611000061035156, + "Acc.hood": 0.4109000015258789, + "Acc.sconce": 0.3465999984741211, + "Acc.vase": 0.34549999237060547, + "Acc.traffic light": 0.2721999931335449, + "Acc.tray": 0.027100000381469726, + "Acc.ashcan": 0.47630001068115235, + "Acc.fan": 0.5647000122070313, + "Acc.pier": 0.4079000091552734, + "Acc.crt screen": 0.0, + "Acc.plate": 0.5090999984741211, + "Acc.monitor": 0.05320000171661377, + "Acc.bulletin board": 0.419900016784668, + "Acc.shower": 0.001599999964237213, + "Acc.radiator": 0.5706999969482421, + "Acc.glass": 0.06429999828338623, + "Acc.clock": 0.26680000305175783, + "Acc.flag": 0.3320999908447266 + } + }, + "14": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8175, + "mIoU": 0.4535, + "mAcc": 0.5695, + "IoU.wall": 0.759000015258789, + "IoU.building": 0.8169000244140625, + "IoU.sky": 0.9373999786376953, + "IoU.floor": 0.8040000152587891, + "IoU.tree": 0.7370999908447265, + "IoU.ceiling": 0.826500015258789, + "IoU.road": 0.8169999694824219, + "IoU.bed ": 0.8656999969482422, + "IoU.windowpane": 0.6034000015258789, + "IoU.grass": 0.665199966430664, + "IoU.cabinet": 0.5866999816894531, + "IoU.sidewalk": 0.6304000091552734, + "IoU.person": 0.7693000030517578, + "IoU.earth": 0.3234000015258789, + "IoU.door": 0.4552000045776367, + "IoU.table": 0.5513000106811523, + "IoU.mountain": 0.5659999847412109, + "IoU.plant": 0.49540000915527344, + "IoU.curtain": 0.7262999725341797, + "IoU.chair": 0.5086000061035156, + "IoU.car": 0.816500015258789, + "IoU.water": 0.5233000183105468, + "IoU.painting": 0.6744999694824219, + "IoU.sofa": 0.6116999816894532, + "IoU.shelf": 0.42389999389648436, + "IoU.house": 0.4743000030517578, + "IoU.sea": 0.6518000030517578, + "IoU.mirror": 0.653499984741211, + "IoU.rug": 0.6615000152587891, + "IoU.field": 0.3022999954223633, + "IoU.armchair": 0.39470001220703127, + "IoU.seat": 0.5958000183105469, + "IoU.fence": 0.36759998321533205, + "IoU.desk": 0.46619998931884765, + "IoU.rock": 0.4038999938964844, + "IoU.wardrobe": 0.4520999908447266, + "IoU.lamp": 0.534000015258789, + "IoU.bathtub": 0.7981999969482422, + "IoU.railing": 0.30420000076293946, + "IoU.cushion": 0.5213999938964844, + "IoU.base": 0.26309999465942385, + "IoU.box": 0.21190000534057618, + "IoU.column": 0.42369998931884767, + "IoU.signboard": 0.3356999969482422, + "IoU.chest of drawers": 0.35279998779296873, + "IoU.counter": 0.2929999923706055, + "IoU.sand": 0.38119998931884763, + "IoU.sink": 0.6643000030517578, + "IoU.skyscraper": 0.5216999816894531, + "IoU.fireplace": 0.7122000122070312, + "IoU.refrigerator": 0.7355000305175782, + "IoU.grandstand": 0.4361999893188477, + "IoU.path": 0.22450000762939454, + "IoU.stairs": 0.2528000068664551, + "IoU.runway": 0.6406999969482422, + "IoU.case": 0.5118000030517578, + "IoU.pool table": 0.9105999755859375, + "IoU.pillow": 0.5513000106811523, + "IoU.screen door": 0.665, + "IoU.stairway": 0.30510000228881834, + "IoU.river": 0.20420000076293945, + "IoU.bridge": 0.6819999694824219, + "IoU.bookcase": 0.3447999954223633, + "IoU.blind": 0.3895000076293945, + "IoU.coffee table": 0.5875, + "IoU.toilet": 0.8106999969482422, + "IoU.flower": 0.3277000045776367, + "IoU.book": 0.4356999969482422, + "IoU.hill": 0.07300000190734864, + "IoU.bench": 0.4008000183105469, + "IoU.countertop": 0.5695999908447266, + "IoU.stove": 0.7187999725341797, + "IoU.palm": 0.45720001220703127, + "IoU.kitchen island": 0.34700000762939454, + "IoU.computer": 0.6022000122070312, + "IoU.swivel chair": 0.45279998779296876, + "IoU.boat": 0.6809999847412109, + "IoU.bar": 0.48959999084472655, + "IoU.arcade machine": 0.34939998626708985, + "IoU.hovel": 0.4558000183105469, + "IoU.bus": 0.8327999877929687, + "IoU.towel": 0.5529999923706055, + "IoU.light": 0.32810001373291015, + "IoU.truck": 0.2115999984741211, + "IoU.tower": 0.3277000045776367, + "IoU.chandelier": 0.5884000015258789, + "IoU.awning": 0.3568000030517578, + "IoU.streetlight": 0.146899995803833, + "IoU.booth": 0.3843000030517578, + "IoU.television receiver": 0.6193000030517578, + "IoU.airplane": 0.6188999938964844, + "IoU.dirt track": 0.09310000419616699, + "IoU.apparel": 0.34189998626708984, + "IoU.pole": 0.1768000030517578, + "IoU.land": 0.015199999809265136, + "IoU.bannister": 0.07889999866485596, + "IoU.escalator": 0.2698999977111816, + "IoU.ottoman": 0.47139999389648435, + "IoU.bottle": 0.33, + "IoU.buffet": 0.3743000030517578, + "IoU.poster": 0.21379999160766602, + "IoU.stage": 0.23090000152587892, + "IoU.van": 0.3963999938964844, + "IoU.ship": 0.6423999786376953, + "IoU.fountain": 0.20079999923706054, + "IoU.conveyer belt": 0.7183999633789062, + "IoU.canopy": 0.21329999923706056, + "IoU.washer": 0.7112999725341796, + "IoU.plaything": 0.24969999313354493, + "IoU.swimming pool": 0.5627000045776367, + "IoU.stool": 0.2340999984741211, + "IoU.barrel": 0.5495000076293945, + "IoU.basket": 0.205, + "IoU.waterfall": 0.540099983215332, + "IoU.tent": 0.910199966430664, + "IoU.bag": 0.08960000038146973, + "IoU.minibike": 0.5797999954223633, + "IoU.cradle": 0.7727999877929688, + "IoU.oven": 0.1947999954223633, + "IoU.ball": 0.36459999084472655, + "IoU.food": 0.5586999893188477, + "IoU.step": 0.12510000228881835, + "IoU.tank": 0.48270000457763673, + "IoU.trade name": 0.23530000686645508, + "IoU.microwave": 0.39130001068115233, + "IoU.pot": 0.35200000762939454, + "IoU.animal": 0.5886000061035156, + "IoU.bicycle": 0.4856000137329102, + "IoU.lake": 0.155, + "IoU.dishwasher": 0.5463000106811523, + "IoU.screen": 0.6197999954223633, + "IoU.blanket": 0.12739999771118163, + "IoU.sculpture": 0.43900001525878907, + "IoU.hood": 0.48270000457763673, + "IoU.sconce": 0.34060001373291016, + "IoU.vase": 0.24450000762939453, + "IoU.traffic light": 0.2631999969482422, + "IoU.tray": 0.018600000143051146, + "IoU.ashcan": 0.31829999923706054, + "IoU.fan": 0.4931000137329102, + "IoU.pier": 0.3193000030517578, + "IoU.crt screen": 0.0, + "IoU.plate": 0.39549999237060546, + "IoU.monitor": 0.026800000667572023, + "IoU.bulletin board": 0.31489999771118166, + "IoU.shower": 0.004300000071525574, + "IoU.radiator": 0.5125, + "IoU.glass": 0.06800000190734863, + "IoU.clock": 0.21809999465942384, + "IoU.flag": 0.34369998931884765, + "Acc.wall": 0.8804000091552734, + "Acc.building": 0.9163999938964844, + "Acc.sky": 0.9769000244140625, + "Acc.floor": 0.9062999725341797, + "Acc.tree": 0.8666999816894532, + "Acc.ceiling": 0.9194000244140625, + "Acc.road": 0.8976999664306641, + "Acc.bed ": 0.9447000122070313, + "Acc.windowpane": 0.7551000213623047, + "Acc.grass": 0.8056999969482422, + "Acc.cabinet": 0.6916999816894531, + "Acc.sidewalk": 0.7756999969482422, + "Acc.person": 0.9170999908447266, + "Acc.earth": 0.44159999847412107, + "Acc.door": 0.6179999923706054, + "Acc.table": 0.7138999938964844, + "Acc.mountain": 0.7026000213623047, + "Acc.plant": 0.6174000167846679, + "Acc.curtain": 0.8402999877929688, + "Acc.chair": 0.6334999847412109, + "Acc.car": 0.9101000213623047, + "Acc.water": 0.685199966430664, + "Acc.painting": 0.8498999786376953, + "Acc.sofa": 0.7676999664306641, + "Acc.shelf": 0.6263000106811524, + "Acc.house": 0.6041999816894531, + "Acc.sea": 0.8865000152587891, + "Acc.mirror": 0.7405999755859375, + "Acc.rug": 0.7269999694824218, + "Acc.field": 0.5406999969482422, + "Acc.armchair": 0.6195999908447266, + "Acc.seat": 0.8016000366210938, + "Acc.fence": 0.5081000137329101, + "Acc.desk": 0.6875, + "Acc.rock": 0.6448999786376953, + "Acc.wardrobe": 0.5931000137329101, + "Acc.lamp": 0.6569999694824219, + "Acc.bathtub": 0.8573000335693359, + "Acc.railing": 0.4577000045776367, + "Acc.cushion": 0.6730999755859375, + "Acc.base": 0.42450000762939455, + "Acc.box": 0.29649999618530276, + "Acc.column": 0.5381999969482422, + "Acc.signboard": 0.4588999938964844, + "Acc.chest of drawers": 0.5513000106811523, + "Acc.counter": 0.40180000305175784, + "Acc.sand": 0.5533000183105469, + "Acc.sink": 0.7537000274658203, + "Acc.skyscraper": 0.6125, + "Acc.fireplace": 0.8947000122070312, + "Acc.refrigerator": 0.8356999969482422, + "Acc.grandstand": 0.6968000030517578, + "Acc.path": 0.30840000152587893, + "Acc.stairs": 0.3325, + "Acc.runway": 0.8227999877929687, + "Acc.case": 0.6505999755859375, + "Acc.pool table": 0.96, + "Acc.pillow": 0.6412999725341797, + "Acc.screen door": 0.7556999969482422, + "Acc.stairway": 0.4175, + "Acc.river": 0.37909999847412107, + "Acc.bridge": 0.8441000366210938, + "Acc.bookcase": 0.543499984741211, + "Acc.blind": 0.46180000305175783, + "Acc.coffee table": 0.7952999877929687, + "Acc.toilet": 0.8823999786376953, + "Acc.flower": 0.5381999969482422, + "Acc.book": 0.5979999923706054, + "Acc.hill": 0.145600004196167, + "Acc.bench": 0.47889999389648436, + "Acc.countertop": 0.7258999633789063, + "Acc.stove": 0.7937000274658204, + "Acc.palm": 0.6630000305175782, + "Acc.kitchen island": 0.6823999786376953, + "Acc.computer": 0.732699966430664, + "Acc.swivel chair": 0.6131000137329101, + "Acc.boat": 0.8437999725341797, + "Acc.bar": 0.6354000091552734, + "Acc.arcade machine": 0.3875, + "Acc.hovel": 0.5161000061035156, + "Acc.bus": 0.905, + "Acc.towel": 0.6980999755859375, + "Acc.light": 0.35509998321533204, + "Acc.truck": 0.28389999389648435, + "Acc.tower": 0.46360000610351565, + "Acc.chandelier": 0.7508000183105469, + "Acc.awning": 0.425, + "Acc.streetlight": 0.16520000457763673, + "Acc.booth": 0.44209999084472656, + "Acc.television receiver": 0.7295999908447266, + "Acc.airplane": 0.6977999877929687, + "Acc.dirt track": 0.17379999160766602, + "Acc.apparel": 0.47470001220703123, + "Acc.pole": 0.22120000839233397, + "Acc.land": 0.020099999904632567, + "Acc.bannister": 0.10369999885559082, + "Acc.escalator": 0.305, + "Acc.ottoman": 0.6243000030517578, + "Acc.bottle": 0.5690999984741211, + "Acc.buffet": 0.42720001220703124, + "Acc.poster": 0.3135000038146973, + "Acc.stage": 0.36770000457763674, + "Acc.van": 0.49689998626708987, + "Acc.ship": 0.7544000244140625, + "Acc.fountain": 0.20879999160766602, + "Acc.conveyer belt": 0.8725, + "Acc.canopy": 0.3160000038146973, + "Acc.washer": 0.7306999969482422, + "Acc.plaything": 0.4179999923706055, + "Acc.swimming pool": 0.7615000152587891, + "Acc.stool": 0.2934000015258789, + "Acc.barrel": 0.6256999969482422, + "Acc.basket": 0.2570000076293945, + "Acc.waterfall": 0.6054000091552735, + "Acc.tent": 0.9904000091552735, + "Acc.bag": 0.1075, + "Acc.minibike": 0.7119999694824218, + "Acc.cradle": 0.9636000061035156, + "Acc.oven": 0.4968000030517578, + "Acc.ball": 0.44009998321533206, + "Acc.food": 0.7045999908447266, + "Acc.step": 0.15170000076293946, + "Acc.tank": 0.6079000091552734, + "Acc.trade name": 0.2790999984741211, + "Acc.microwave": 0.43599998474121093, + "Acc.pot": 0.4065999984741211, + "Acc.animal": 0.6297000122070312, + "Acc.bicycle": 0.7205999755859375, + "Acc.lake": 0.16120000839233398, + "Acc.dishwasher": 0.6309999847412109, + "Acc.screen": 0.9237000274658204, + "Acc.blanket": 0.1397000026702881, + "Acc.sculpture": 0.6034000015258789, + "Acc.hood": 0.5266999816894531, + "Acc.sconce": 0.4191999816894531, + "Acc.vase": 0.332599983215332, + "Acc.traffic light": 0.4456999969482422, + "Acc.tray": 0.025899999141693116, + "Acc.ashcan": 0.4525, + "Acc.fan": 0.6436000061035156, + "Acc.pier": 0.4936999893188477, + "Acc.crt screen": 0.0, + "Acc.plate": 0.5159999847412109, + "Acc.monitor": 0.03789999961853027, + "Acc.bulletin board": 0.42060001373291017, + "Acc.shower": 0.012100000381469727, + "Acc.radiator": 0.586500015258789, + "Acc.glass": 0.07150000095367431, + "Acc.clock": 0.24799999237060547, + "Acc.flag": 0.39990001678466797 + } + }, + "15": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8183, + "mIoU": 0.4549, + "mAcc": 0.5711999999999999, + "IoU.wall": 0.7577999877929688, + "IoU.building": 0.8212999725341796, + "IoU.sky": 0.9373000335693359, + "IoU.floor": 0.8019000244140625, + "IoU.tree": 0.7379000091552734, + "IoU.ceiling": 0.8277999877929687, + "IoU.road": 0.8193000030517578, + "IoU.bed ": 0.8648999786376953, + "IoU.windowpane": 0.6016999816894532, + "IoU.grass": 0.6565000152587891, + "IoU.cabinet": 0.5861000061035156, + "IoU.sidewalk": 0.6286999893188476, + "IoU.person": 0.7711000061035156, + "IoU.earth": 0.33130001068115233, + "IoU.door": 0.460099983215332, + "IoU.table": 0.5536000061035157, + "IoU.mountain": 0.5718000030517578, + "IoU.plant": 0.5022000122070313, + "IoU.curtain": 0.7269000244140625, + "IoU.chair": 0.5115999984741211, + "IoU.car": 0.8187999725341797, + "IoU.water": 0.5302999877929687, + "IoU.painting": 0.6788999938964844, + "IoU.sofa": 0.6265000152587891, + "IoU.shelf": 0.41619998931884766, + "IoU.house": 0.4815999984741211, + "IoU.sea": 0.6416000366210938, + "IoU.mirror": 0.639000015258789, + "IoU.rug": 0.6647000122070312, + "IoU.field": 0.2834000015258789, + "IoU.armchair": 0.4336999893188477, + "IoU.seat": 0.5915999984741211, + "IoU.fence": 0.3675, + "IoU.desk": 0.45619998931884764, + "IoU.rock": 0.40380001068115234, + "IoU.wardrobe": 0.44470001220703126, + "IoU.lamp": 0.5404999923706054, + "IoU.bathtub": 0.7494000244140625, + "IoU.railing": 0.3115999984741211, + "IoU.cushion": 0.5345000076293945, + "IoU.base": 0.2734000015258789, + "IoU.box": 0.23139999389648438, + "IoU.column": 0.4402000045776367, + "IoU.signboard": 0.33299999237060546, + "IoU.chest of drawers": 0.3477000045776367, + "IoU.counter": 0.28280000686645507, + "IoU.sand": 0.38880001068115233, + "IoU.sink": 0.6683000183105469, + "IoU.skyscraper": 0.562400016784668, + "IoU.fireplace": 0.7118000030517578, + "IoU.refrigerator": 0.7473000335693359, + "IoU.grandstand": 0.47169998168945315, + "IoU.path": 0.21319999694824218, + "IoU.stairs": 0.26479999542236327, + "IoU.runway": 0.6593000030517578, + "IoU.case": 0.5043000030517578, + "IoU.pool table": 0.9116000366210938, + "IoU.pillow": 0.5549000167846679, + "IoU.screen door": 0.610999984741211, + "IoU.stairway": 0.28149999618530275, + "IoU.river": 0.2315999984741211, + "IoU.bridge": 0.6766000366210938, + "IoU.bookcase": 0.3370000076293945, + "IoU.blind": 0.4075, + "IoU.coffee table": 0.585, + "IoU.toilet": 0.8137000274658203, + "IoU.flower": 0.35520000457763673, + "IoU.book": 0.4377000045776367, + "IoU.hill": 0.07960000038146972, + "IoU.bench": 0.4040999984741211, + "IoU.countertop": 0.5872999954223633, + "IoU.stove": 0.7002999877929688, + "IoU.palm": 0.46919998168945315, + "IoU.kitchen island": 0.35189998626708985, + "IoU.computer": 0.6074000167846679, + "IoU.swivel chair": 0.4256999969482422, + "IoU.boat": 0.7141999816894531, + "IoU.bar": 0.5115999984741211, + "IoU.arcade machine": 0.3547999954223633, + "IoU.hovel": 0.5404999923706054, + "IoU.bus": 0.7811000061035156, + "IoU.towel": 0.5427000045776367, + "IoU.light": 0.32790000915527345, + "IoU.truck": 0.1881999969482422, + "IoU.tower": 0.29100000381469726, + "IoU.chandelier": 0.5920999908447265, + "IoU.awning": 0.3763999938964844, + "IoU.streetlight": 0.155, + "IoU.booth": 0.3504000091552734, + "IoU.television receiver": 0.6284000015258789, + "IoU.airplane": 0.5988999938964844, + "IoU.dirt track": 0.05449999809265137, + "IoU.apparel": 0.33680000305175783, + "IoU.pole": 0.18770000457763672, + "IoU.land": 0.03640000104904175, + "IoU.bannister": 0.08430000305175782, + "IoU.escalator": 0.2628000068664551, + "IoU.ottoman": 0.479900016784668, + "IoU.bottle": 0.34099998474121096, + "IoU.buffet": 0.37810001373291013, + "IoU.poster": 0.20170000076293945, + "IoU.stage": 0.18459999084472656, + "IoU.van": 0.42020000457763673, + "IoU.ship": 0.59, + "IoU.fountain": 0.19969999313354492, + "IoU.conveyer belt": 0.5940000152587891, + "IoU.canopy": 0.22860000610351563, + "IoU.washer": 0.7118000030517578, + "IoU.plaything": 0.23399999618530273, + "IoU.swimming pool": 0.5947999954223633, + "IoU.stool": 0.22020000457763672, + "IoU.barrel": 0.4854000091552734, + "IoU.basket": 0.24459999084472656, + "IoU.waterfall": 0.5625, + "IoU.tent": 0.9166000366210938, + "IoU.bag": 0.10760000228881836, + "IoU.minibike": 0.5477999877929688, + "IoU.cradle": 0.7695999908447265, + "IoU.oven": 0.1706999969482422, + "IoU.ball": 0.36970001220703125, + "IoU.food": 0.5513999938964844, + "IoU.step": 0.1256999969482422, + "IoU.tank": 0.49759998321533205, + "IoU.trade name": 0.23950000762939452, + "IoU.microwave": 0.3579999923706055, + "IoU.pot": 0.34189998626708984, + "IoU.animal": 0.5997000122070313, + "IoU.bicycle": 0.4684000015258789, + "IoU.lake": 0.3958000183105469, + "IoU.dishwasher": 0.5377000045776367, + "IoU.screen": 0.6165999984741211, + "IoU.blanket": 0.12479999542236328, + "IoU.sculpture": 0.4804000091552734, + "IoU.hood": 0.4859999847412109, + "IoU.sconce": 0.3422000122070312, + "IoU.vase": 0.24520000457763672, + "IoU.traffic light": 0.2602000045776367, + "IoU.tray": 0.016100000143051147, + "IoU.ashcan": 0.33049999237060546, + "IoU.fan": 0.49520000457763674, + "IoU.pier": 0.3156999969482422, + "IoU.crt screen": 0.0, + "IoU.plate": 0.39189998626708983, + "IoU.monitor": 0.05289999961853027, + "IoU.bulletin board": 0.31329999923706053, + "IoU.shower": 0.0037000000476837156, + "IoU.radiator": 0.5365000152587891, + "IoU.glass": 0.07329999923706054, + "IoU.clock": 0.2128000068664551, + "IoU.flag": 0.3420999908447266, + "Acc.wall": 0.8776999664306641, + "Acc.building": 0.9194000244140625, + "Acc.sky": 0.9769000244140625, + "Acc.floor": 0.9019000244140625, + "Acc.tree": 0.8676000213623047, + "Acc.ceiling": 0.9195999908447265, + "Acc.road": 0.9019999694824219, + "Acc.bed ": 0.9451000213623046, + "Acc.windowpane": 0.7537000274658203, + "Acc.grass": 0.8055000305175781, + "Acc.cabinet": 0.6941000366210938, + "Acc.sidewalk": 0.7672000122070313, + "Acc.person": 0.9170999908447266, + "Acc.earth": 0.4490999984741211, + "Acc.door": 0.6309999847412109, + "Acc.table": 0.7154000091552735, + "Acc.mountain": 0.7108999633789063, + "Acc.plant": 0.6272000122070313, + "Acc.curtain": 0.836500015258789, + "Acc.chair": 0.6402999877929687, + "Acc.car": 0.9108000183105469, + "Acc.water": 0.6759999847412109, + "Acc.painting": 0.8590000152587891, + "Acc.sofa": 0.7923999786376953, + "Acc.shelf": 0.6215999984741211, + "Acc.house": 0.6197000122070313, + "Acc.sea": 0.8908999633789062, + "Acc.mirror": 0.7175, + "Acc.rug": 0.7341999816894531, + "Acc.field": 0.5054000091552734, + "Acc.armchair": 0.6370000076293946, + "Acc.seat": 0.7952999877929687, + "Acc.fence": 0.4916999816894531, + "Acc.desk": 0.678499984741211, + "Acc.rock": 0.6408000183105469, + "Acc.wardrobe": 0.596500015258789, + "Acc.lamp": 0.6659999847412109, + "Acc.bathtub": 0.8194000244140625, + "Acc.railing": 0.4834999847412109, + "Acc.cushion": 0.6841000366210938, + "Acc.base": 0.43599998474121093, + "Acc.box": 0.30989999771118165, + "Acc.column": 0.5640000152587891, + "Acc.signboard": 0.4509000015258789, + "Acc.chest of drawers": 0.5463000106811523, + "Acc.counter": 0.40029998779296877, + "Acc.sand": 0.5583000183105469, + "Acc.sink": 0.7566000366210938, + "Acc.skyscraper": 0.6497000122070312, + "Acc.fireplace": 0.8997000122070312, + "Acc.refrigerator": 0.8325, + "Acc.grandstand": 0.7087999725341797, + "Acc.path": 0.30389999389648437, + "Acc.stairs": 0.34810001373291016, + "Acc.runway": 0.8484999847412109, + "Acc.case": 0.6526000213623047, + "Acc.pool table": 0.9602999877929688, + "Acc.pillow": 0.645199966430664, + "Acc.screen door": 0.7105999755859375, + "Acc.stairway": 0.39169998168945314, + "Acc.river": 0.4290999984741211, + "Acc.bridge": 0.8393000030517578, + "Acc.bookcase": 0.5413000106811523, + "Acc.blind": 0.4725, + "Acc.coffee table": 0.802300033569336, + "Acc.toilet": 0.875, + "Acc.flower": 0.5559999847412109, + "Acc.book": 0.6043999862670898, + "Acc.hill": 0.15470000267028808, + "Acc.bench": 0.4906999969482422, + "Acc.countertop": 0.7337999725341797, + "Acc.stove": 0.7808999633789062, + "Acc.palm": 0.6604000091552734, + "Acc.kitchen island": 0.6920999908447265, + "Acc.computer": 0.7476000213623046, + "Acc.swivel chair": 0.5645000076293946, + "Acc.boat": 0.8391999816894531, + "Acc.bar": 0.6587000274658203, + "Acc.arcade machine": 0.39540000915527346, + "Acc.hovel": 0.6127000045776367, + "Acc.bus": 0.9093000030517578, + "Acc.towel": 0.7118000030517578, + "Acc.light": 0.355, + "Acc.truck": 0.25760000228881835, + "Acc.tower": 0.41, + "Acc.chandelier": 0.7563999938964844, + "Acc.awning": 0.4431999969482422, + "Acc.streetlight": 0.17540000915527343, + "Acc.booth": 0.41619998931884766, + "Acc.television receiver": 0.7466000366210938, + "Acc.airplane": 0.6711000061035156, + "Acc.dirt track": 0.09420000076293945, + "Acc.apparel": 0.47299999237060547, + "Acc.pole": 0.24049999237060546, + "Acc.land": 0.05760000228881836, + "Acc.bannister": 0.108100004196167, + "Acc.escalator": 0.3053000068664551, + "Acc.ottoman": 0.6125, + "Acc.bottle": 0.5890999984741211, + "Acc.buffet": 0.4259000015258789, + "Acc.poster": 0.2645999908447266, + "Acc.stage": 0.3215999984741211, + "Acc.van": 0.5109000015258789, + "Acc.ship": 0.6793000030517579, + "Acc.fountain": 0.20579999923706055, + "Acc.conveyer belt": 0.7919999694824219, + "Acc.canopy": 0.33119998931884764, + "Acc.washer": 0.7233999633789062, + "Acc.plaything": 0.38060001373291014, + "Acc.swimming pool": 0.7880999755859375, + "Acc.stool": 0.288700008392334, + "Acc.barrel": 0.6093999862670898, + "Acc.basket": 0.31420000076293947, + "Acc.waterfall": 0.6455999755859375, + "Acc.tent": 0.9915000152587891, + "Acc.bag": 0.12680000305175781, + "Acc.minibike": 0.6555999755859375, + "Acc.cradle": 0.9648999786376953, + "Acc.oven": 0.46439998626708984, + "Acc.ball": 0.4197999954223633, + "Acc.food": 0.7069000244140625, + "Acc.step": 0.15140000343322754, + "Acc.tank": 0.6283000183105468, + "Acc.trade name": 0.28319999694824216, + "Acc.microwave": 0.3984000015258789, + "Acc.pot": 0.4022999954223633, + "Acc.animal": 0.6423999786376953, + "Acc.bicycle": 0.7280999755859375, + "Acc.lake": 0.4111000061035156, + "Acc.dishwasher": 0.6070999908447265, + "Acc.screen": 0.9195999908447265, + "Acc.blanket": 0.1381999969482422, + "Acc.sculpture": 0.6138999938964844, + "Acc.hood": 0.5347999954223632, + "Acc.sconce": 0.41319999694824217, + "Acc.vase": 0.33430000305175783, + "Acc.traffic light": 0.4306999969482422, + "Acc.tray": 0.022200000286102296, + "Acc.ashcan": 0.47299999237060547, + "Acc.fan": 0.6504000091552734, + "Acc.pier": 0.484900016784668, + "Acc.crt screen": 0.0, + "Acc.plate": 0.5034999847412109, + "Acc.monitor": 0.07380000114440918, + "Acc.bulletin board": 0.44619998931884763, + "Acc.shower": 0.009300000071525573, + "Acc.radiator": 0.6013000106811524, + "Acc.glass": 0.07760000228881836, + "Acc.clock": 0.2459000015258789, + "Acc.flag": 0.4077000045776367 + } + }, + "16": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8197, + "mIoU": 0.46, + "mAcc": 0.5799, + "IoU.wall": 0.7580999755859374, + "IoU.building": 0.8230000305175781, + "IoU.sky": 0.9377999877929688, + "IoU.floor": 0.8020999908447266, + "IoU.tree": 0.735999984741211, + "IoU.ceiling": 0.8283000183105469, + "IoU.road": 0.8218000030517578, + "IoU.bed ": 0.8712000274658203, + "IoU.windowpane": 0.6058000183105469, + "IoU.grass": 0.6436000061035156, + "IoU.cabinet": 0.5883000183105469, + "IoU.sidewalk": 0.6329000091552734, + "IoU.person": 0.777300033569336, + "IoU.earth": 0.32880001068115233, + "IoU.door": 0.48, + "IoU.table": 0.5620999908447266, + "IoU.mountain": 0.5704000091552734, + "IoU.plant": 0.5011000061035156, + "IoU.curtain": 0.729800033569336, + "IoU.chair": 0.5227999877929688, + "IoU.car": 0.8154000091552734, + "IoU.water": 0.5438999938964844, + "IoU.painting": 0.6780000305175782, + "IoU.sofa": 0.6213999938964844, + "IoU.shelf": 0.43099998474121093, + "IoU.house": 0.4843000030517578, + "IoU.sea": 0.6408999633789062, + "IoU.mirror": 0.6615000152587891, + "IoU.rug": 0.6641000366210937, + "IoU.field": 0.2780999946594238, + "IoU.armchair": 0.43020000457763674, + "IoU.seat": 0.6245000076293945, + "IoU.fence": 0.4022000122070313, + "IoU.desk": 0.44229999542236326, + "IoU.rock": 0.41150001525878904, + "IoU.wardrobe": 0.47779998779296873, + "IoU.lamp": 0.5463000106811523, + "IoU.bathtub": 0.7248999786376953, + "IoU.railing": 0.311299991607666, + "IoU.cushion": 0.5370999908447266, + "IoU.base": 0.265, + "IoU.box": 0.22860000610351563, + "IoU.column": 0.4556999969482422, + "IoU.signboard": 0.33169998168945314, + "IoU.chest of drawers": 0.33939998626708984, + "IoU.counter": 0.24809999465942384, + "IoU.sand": 0.41439998626708985, + "IoU.sink": 0.6681999969482422, + "IoU.skyscraper": 0.4936000061035156, + "IoU.fireplace": 0.7088999938964844, + "IoU.refrigerator": 0.7709999847412109, + "IoU.grandstand": 0.4393000030517578, + "IoU.path": 0.215, + "IoU.stairs": 0.276299991607666, + "IoU.runway": 0.6488999938964843, + "IoU.case": 0.5304999923706055, + "IoU.pool table": 0.9086000061035157, + "IoU.pillow": 0.5777999877929687, + "IoU.screen door": 0.6327999877929688, + "IoU.stairway": 0.32169998168945313, + "IoU.river": 0.22110000610351563, + "IoU.bridge": 0.6790000152587891, + "IoU.bookcase": 0.33810001373291015, + "IoU.blind": 0.41959999084472654, + "IoU.coffee table": 0.5940000152587891, + "IoU.toilet": 0.8225, + "IoU.flower": 0.3358000183105469, + "IoU.book": 0.43389999389648437, + "IoU.hill": 0.10079999923706055, + "IoU.bench": 0.41779998779296873, + "IoU.countertop": 0.5722999954223633, + "IoU.stove": 0.6948999786376953, + "IoU.palm": 0.4736000061035156, + "IoU.kitchen island": 0.3818999862670898, + "IoU.computer": 0.7219999694824218, + "IoU.swivel chair": 0.44459999084472657, + "IoU.boat": 0.7183999633789062, + "IoU.bar": 0.5272999954223633, + "IoU.arcade machine": 0.4075, + "IoU.hovel": 0.4854000091552734, + "IoU.bus": 0.7551999664306641, + "IoU.towel": 0.5733000183105469, + "IoU.light": 0.31209999084472656, + "IoU.truck": 0.2588999938964844, + "IoU.tower": 0.325099983215332, + "IoU.chandelier": 0.6047000122070313, + "IoU.awning": 0.3233000183105469, + "IoU.streetlight": 0.15930000305175782, + "IoU.booth": 0.34119998931884765, + "IoU.television receiver": 0.6083000183105469, + "IoU.airplane": 0.5736999893188477, + "IoU.dirt track": 0.07159999847412109, + "IoU.apparel": 0.32560001373291014, + "IoU.pole": 0.19489999771118163, + "IoU.land": 0.022899999618530273, + "IoU.bannister": 0.09609999656677246, + "IoU.escalator": 0.3265999984741211, + "IoU.ottoman": 0.504000015258789, + "IoU.bottle": 0.3352000045776367, + "IoU.buffet": 0.3833000183105469, + "IoU.poster": 0.22969999313354492, + "IoU.stage": 0.18739999771118165, + "IoU.van": 0.39, + "IoU.ship": 0.6820999908447266, + "IoU.fountain": 0.1981999969482422, + "IoU.conveyer belt": 0.7401000213623047, + "IoU.canopy": 0.19420000076293945, + "IoU.washer": 0.7173000335693359, + "IoU.plaything": 0.26489999771118167, + "IoU.swimming pool": 0.6102999877929688, + "IoU.stool": 0.25360000610351563, + "IoU.barrel": 0.5020000076293946, + "IoU.basket": 0.2514999961853027, + "IoU.waterfall": 0.5477000045776367, + "IoU.tent": 0.9141000366210937, + "IoU.bag": 0.1, + "IoU.minibike": 0.5231999969482422, + "IoU.cradle": 0.7851000213623047, + "IoU.oven": 0.1843000030517578, + "IoU.ball": 0.3920999908447266, + "IoU.food": 0.5709999847412109, + "IoU.step": 0.13350000381469726, + "IoU.tank": 0.5002999877929688, + "IoU.trade name": 0.24870000839233397, + "IoU.microwave": 0.3656000137329102, + "IoU.pot": 0.34619998931884766, + "IoU.animal": 0.596599998474121, + "IoU.bicycle": 0.4727999877929687, + "IoU.lake": 0.422400016784668, + "IoU.dishwasher": 0.5061999893188477, + "IoU.screen": 0.5258000183105469, + "IoU.blanket": 0.1477999973297119, + "IoU.sculpture": 0.49639999389648437, + "IoU.hood": 0.4958000183105469, + "IoU.sconce": 0.35, + "IoU.vase": 0.25459999084472656, + "IoU.traffic light": 0.25459999084472656, + "IoU.tray": 0.011200000047683716, + "IoU.ashcan": 0.3138999938964844, + "IoU.fan": 0.4956999969482422, + "IoU.pier": 0.28670000076293944, + "IoU.crt screen": 9.999999776482581e-05, + "IoU.plate": 0.42650001525878906, + "IoU.monitor": 0.020999999046325685, + "IoU.bulletin board": 0.3064999961853027, + "IoU.shower": 0.003799999952316284, + "IoU.radiator": 0.49150001525878906, + "IoU.glass": 0.07519999980926513, + "IoU.clock": 0.2209000015258789, + "IoU.flag": 0.3759000015258789, + "Acc.wall": 0.8763999938964844, + "Acc.building": 0.9236000061035157, + "Acc.sky": 0.9766000366210937, + "Acc.floor": 0.899000015258789, + "Acc.tree": 0.865, + "Acc.ceiling": 0.9173999786376953, + "Acc.road": 0.8977999877929688, + "Acc.bed ": 0.9484999847412109, + "Acc.windowpane": 0.7584999847412109, + "Acc.grass": 0.7931999969482422, + "Acc.cabinet": 0.6981999969482422, + "Acc.sidewalk": 0.7747000122070312, + "Acc.person": 0.9183999633789063, + "Acc.earth": 0.4525, + "Acc.door": 0.6419999694824219, + "Acc.table": 0.7212999725341797, + "Acc.mountain": 0.7162999725341797, + "Acc.plant": 0.6238999938964844, + "Acc.curtain": 0.8381999969482422, + "Acc.chair": 0.65, + "Acc.car": 0.9055000305175781, + "Acc.water": 0.6973999786376953, + "Acc.painting": 0.8508000183105469, + "Acc.sofa": 0.7648000335693359, + "Acc.shelf": 0.6341999816894531, + "Acc.house": 0.5881000137329102, + "Acc.sea": 0.8708999633789063, + "Acc.mirror": 0.7481999969482422, + "Acc.rug": 0.7416999816894532, + "Acc.field": 0.4797999954223633, + "Acc.armchair": 0.6695999908447265, + "Acc.seat": 0.8019999694824219, + "Acc.fence": 0.5470999908447266, + "Acc.desk": 0.6519999694824219, + "Acc.rock": 0.657300033569336, + "Acc.wardrobe": 0.6340999984741211, + "Acc.lamp": 0.6711000061035156, + "Acc.bathtub": 0.8013999938964844, + "Acc.railing": 0.45799999237060546, + "Acc.cushion": 0.6904000091552734, + "Acc.base": 0.41580001831054686, + "Acc.box": 0.3120000076293945, + "Acc.column": 0.5861000061035156, + "Acc.signboard": 0.4413000106811523, + "Acc.chest of drawers": 0.5445000076293945, + "Acc.counter": 0.3475, + "Acc.sand": 0.5877000045776367, + "Acc.sink": 0.7529000091552734, + "Acc.skyscraper": 0.5658000183105468, + "Acc.fireplace": 0.8975, + "Acc.refrigerator": 0.8569999694824219, + "Acc.grandstand": 0.7308000183105469, + "Acc.path": 0.3054999923706055, + "Acc.stairs": 0.35560001373291017, + "Acc.runway": 0.8486000061035156, + "Acc.case": 0.6847000122070312, + "Acc.pool table": 0.9623999786376953, + "Acc.pillow": 0.6783000183105469, + "Acc.screen door": 0.7515000152587891, + "Acc.stairway": 0.4718000030517578, + "Acc.river": 0.405, + "Acc.bridge": 0.8290000152587891, + "Acc.bookcase": 0.5593999862670899, + "Acc.blind": 0.4740999984741211, + "Acc.coffee table": 0.8044999694824219, + "Acc.toilet": 0.885199966430664, + "Acc.flower": 0.5597000122070312, + "Acc.book": 0.5979000091552734, + "Acc.hill": 0.20040000915527345, + "Acc.bench": 0.5068999862670899, + "Acc.countertop": 0.7108999633789063, + "Acc.stove": 0.7827999877929688, + "Acc.palm": 0.6677999877929688, + "Acc.kitchen island": 0.6894000244140625, + "Acc.computer": 0.8779000091552734, + "Acc.swivel chair": 0.599900016784668, + "Acc.boat": 0.8472000122070312, + "Acc.bar": 0.7254000091552735, + "Acc.arcade machine": 0.4520999908447266, + "Acc.hovel": 0.5418999862670898, + "Acc.bus": 0.9108999633789062, + "Acc.towel": 0.7373999786376954, + "Acc.light": 0.34180000305175784, + "Acc.truck": 0.36130001068115236, + "Acc.tower": 0.46689998626708984, + "Acc.chandelier": 0.7591999816894531, + "Acc.awning": 0.3806999969482422, + "Acc.streetlight": 0.18100000381469727, + "Acc.booth": 0.3997000122070313, + "Acc.television receiver": 0.7302999877929688, + "Acc.airplane": 0.6487999725341796, + "Acc.dirt track": 0.17540000915527343, + "Acc.apparel": 0.4679000091552734, + "Acc.pole": 0.24260000228881837, + "Acc.land": 0.041399998664855955, + "Acc.bannister": 0.12619999885559083, + "Acc.escalator": 0.39279998779296876, + "Acc.ottoman": 0.6491999816894531, + "Acc.bottle": 0.5675, + "Acc.buffet": 0.43590000152587893, + "Acc.poster": 0.31860000610351563, + "Acc.stage": 0.36959999084472656, + "Acc.van": 0.4933000183105469, + "Acc.ship": 0.8166000366210937, + "Acc.fountain": 0.2038999938964844, + "Acc.conveyer belt": 0.8394000244140625, + "Acc.canopy": 0.3081999969482422, + "Acc.washer": 0.725999984741211, + "Acc.plaything": 0.45689998626708983, + "Acc.swimming pool": 0.7977999877929688, + "Acc.stool": 0.354900016784668, + "Acc.barrel": 0.6302999877929687, + "Acc.basket": 0.3252000045776367, + "Acc.waterfall": 0.6054000091552735, + "Acc.tent": 0.9918000030517579, + "Acc.bag": 0.118100004196167, + "Acc.minibike": 0.6313999938964844, + "Acc.cradle": 0.9738999938964844, + "Acc.oven": 0.5161999893188477, + "Acc.ball": 0.4622999954223633, + "Acc.food": 0.7191999816894531, + "Acc.step": 0.16149999618530272, + "Acc.tank": 0.647300033569336, + "Acc.trade name": 0.2989999961853027, + "Acc.microwave": 0.41049999237060547, + "Acc.pot": 0.41189998626708985, + "Acc.animal": 0.6388999938964843, + "Acc.bicycle": 0.7112000274658203, + "Acc.lake": 0.4688999938964844, + "Acc.dishwasher": 0.5659000015258789, + "Acc.screen": 0.9233000183105469, + "Acc.blanket": 0.16030000686645507, + "Acc.sculpture": 0.6347000122070312, + "Acc.hood": 0.5434000015258789, + "Acc.sconce": 0.43020000457763674, + "Acc.vase": 0.34900001525878904, + "Acc.traffic light": 0.43599998474121093, + "Acc.tray": 0.014500000476837159, + "Acc.ashcan": 0.4634000015258789, + "Acc.fan": 0.6476000213623047, + "Acc.pier": 0.46549999237060546, + "Acc.crt screen": 0.00019999999552965163, + "Acc.plate": 0.5511000061035156, + "Acc.monitor": 0.022200000286102296, + "Acc.bulletin board": 0.42650001525878906, + "Acc.shower": 0.014099999666213989, + "Acc.radiator": 0.5545000076293946, + "Acc.glass": 0.07989999771118164, + "Acc.clock": 0.26049999237060545, + "Acc.flag": 0.43979999542236325 + } + }, + "17": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8238, + "mIoU": 0.4694, + "mAcc": 0.5878, + "IoU.wall": 0.7612000274658203, + "IoU.building": 0.8276000213623047, + "IoU.sky": 0.9394999694824219, + "IoU.floor": 0.8054000091552734, + "IoU.tree": 0.7416999816894532, + "IoU.ceiling": 0.8356999969482422, + "IoU.road": 0.824800033569336, + "IoU.bed ": 0.87, + "IoU.windowpane": 0.5997000122070313, + "IoU.grass": 0.668499984741211, + "IoU.cabinet": 0.5936000061035156, + "IoU.sidewalk": 0.6436000061035156, + "IoU.person": 0.7838999938964843, + "IoU.earth": 0.34040000915527346, + "IoU.door": 0.48470001220703124, + "IoU.table": 0.5770999908447265, + "IoU.mountain": 0.594900016784668, + "IoU.plant": 0.5109999847412109, + "IoU.curtain": 0.7319999694824219, + "IoU.chair": 0.5286999893188477, + "IoU.car": 0.8262000274658203, + "IoU.water": 0.597599983215332, + "IoU.painting": 0.6922000122070312, + "IoU.sofa": 0.6401000213623047, + "IoU.shelf": 0.43279998779296874, + "IoU.house": 0.48520000457763673, + "IoU.sea": 0.6820999908447266, + "IoU.mirror": 0.6268000030517578, + "IoU.rug": 0.6641999816894532, + "IoU.field": 0.2904999923706055, + "IoU.armchair": 0.4065999984741211, + "IoU.seat": 0.6113000106811524, + "IoU.fence": 0.43189998626708986, + "IoU.desk": 0.4620000076293945, + "IoU.rock": 0.42529998779296874, + "IoU.wardrobe": 0.5243999862670898, + "IoU.lamp": 0.5522000122070313, + "IoU.bathtub": 0.7351999664306641, + "IoU.railing": 0.3243999862670898, + "IoU.cushion": 0.5433000183105469, + "IoU.base": 0.28170000076293944, + "IoU.box": 0.24629999160766602, + "IoU.column": 0.4672000122070312, + "IoU.signboard": 0.33619998931884765, + "IoU.chest of drawers": 0.3213999938964844, + "IoU.counter": 0.28030000686645506, + "IoU.sand": 0.3865000152587891, + "IoU.sink": 0.6790000152587891, + "IoU.skyscraper": 0.48150001525878905, + "IoU.fireplace": 0.6911000061035156, + "IoU.refrigerator": 0.769000015258789, + "IoU.grandstand": 0.45610000610351564, + "IoU.path": 0.24469999313354493, + "IoU.stairs": 0.27860000610351565, + "IoU.runway": 0.6313000106811524, + "IoU.case": 0.5147000122070312, + "IoU.pool table": 0.9108999633789062, + "IoU.pillow": 0.5588999938964844, + "IoU.screen door": 0.5152000045776367, + "IoU.stairway": 0.32720001220703127, + "IoU.river": 0.18870000839233397, + "IoU.bridge": 0.6908000183105468, + "IoU.bookcase": 0.3436000061035156, + "IoU.blind": 0.4063999938964844, + "IoU.coffee table": 0.575999984741211, + "IoU.toilet": 0.8193000030517578, + "IoU.flower": 0.34560001373291016, + "IoU.book": 0.4420000076293945, + "IoU.hill": 0.11359999656677246, + "IoU.bench": 0.4165999984741211, + "IoU.countertop": 0.5302999877929687, + "IoU.stove": 0.7080999755859375, + "IoU.palm": 0.4834999847412109, + "IoU.kitchen island": 0.36900001525878906, + "IoU.computer": 0.7191999816894531, + "IoU.swivel chair": 0.5093000030517578, + "IoU.boat": 0.6956999969482421, + "IoU.bar": 0.5420000076293945, + "IoU.arcade machine": 0.5929999923706055, + "IoU.hovel": 0.5736000061035156, + "IoU.bus": 0.8018000030517578, + "IoU.towel": 0.574900016784668, + "IoU.light": 0.30270000457763674, + "IoU.truck": 0.22100000381469725, + "IoU.tower": 0.2861000061035156, + "IoU.chandelier": 0.6031000137329101, + "IoU.awning": 0.41259998321533203, + "IoU.streetlight": 0.1715999984741211, + "IoU.booth": 0.30870000839233397, + "IoU.television receiver": 0.5906999969482422, + "IoU.airplane": 0.5761000061035156, + "IoU.dirt track": 0.040500001907348634, + "IoU.apparel": 0.3970999908447266, + "IoU.pole": 0.18489999771118165, + "IoU.land": 0.01899999976158142, + "IoU.bannister": 0.103100004196167, + "IoU.escalator": 0.38549999237060545, + "IoU.ottoman": 0.4622000122070313, + "IoU.bottle": 0.35080001831054686, + "IoU.buffet": 0.3856999969482422, + "IoU.poster": 0.2652000045776367, + "IoU.stage": 0.15850000381469725, + "IoU.van": 0.39299999237060546, + "IoU.ship": 0.5365999984741211, + "IoU.fountain": 0.20629999160766602, + "IoU.conveyer belt": 0.7113999938964843, + "IoU.canopy": 0.23370000839233399, + "IoU.washer": 0.6869000244140625, + "IoU.plaything": 0.2506999969482422, + "IoU.swimming pool": 0.5843999862670899, + "IoU.stool": 0.2922999954223633, + "IoU.barrel": 0.5663999938964843, + "IoU.basket": 0.2809000015258789, + "IoU.waterfall": 0.5738999938964844, + "IoU.tent": 0.9159999847412109, + "IoU.bag": 0.09430000305175781, + "IoU.minibike": 0.6088999938964844, + "IoU.cradle": 0.8148999786376954, + "IoU.oven": 0.1909000015258789, + "IoU.ball": 0.4006999969482422, + "IoU.food": 0.5531000137329102, + "IoU.step": 0.11949999809265137, + "IoU.tank": 0.48939998626708986, + "IoU.trade name": 0.236200008392334, + "IoU.microwave": 0.3618000030517578, + "IoU.pot": 0.36080001831054687, + "IoU.animal": 0.6576000213623047, + "IoU.bicycle": 0.5059999847412109, + "IoU.lake": 0.5538000106811524, + "IoU.dishwasher": 0.5952000045776367, + "IoU.screen": 0.5877999877929687, + "IoU.blanket": 0.15300000190734864, + "IoU.sculpture": 0.4990999984741211, + "IoU.hood": 0.505099983215332, + "IoU.sconce": 0.36380001068115236, + "IoU.vase": 0.21510000228881837, + "IoU.traffic light": 0.26309999465942385, + "IoU.tray": 0.02490000009536743, + "IoU.ashcan": 0.3590999984741211, + "IoU.fan": 0.5006000137329102, + "IoU.pier": 0.42650001525878906, + "IoU.crt screen": 0.003199999928474426, + "IoU.plate": 0.450099983215332, + "IoU.monitor": 0.03410000085830688, + "IoU.bulletin board": 0.3559999847412109, + "IoU.shower": 0.004799999892711639, + "IoU.radiator": 0.504900016784668, + "IoU.glass": 0.07510000228881836, + "IoU.clock": 0.22989999771118164, + "IoU.flag": 0.40810001373291016, + "Acc.wall": 0.877300033569336, + "Acc.building": 0.9244999694824219, + "Acc.sky": 0.9773000335693359, + "Acc.floor": 0.9008999633789062, + "Acc.tree": 0.8672000122070312, + "Acc.ceiling": 0.9256999969482422, + "Acc.road": 0.9033000183105468, + "Acc.bed ": 0.9502999877929688, + "Acc.windowpane": 0.759000015258789, + "Acc.grass": 0.8127999877929688, + "Acc.cabinet": 0.705, + "Acc.sidewalk": 0.7812999725341797, + "Acc.person": 0.9158999633789062, + "Acc.earth": 0.47759998321533204, + "Acc.door": 0.6483000183105468, + "Acc.table": 0.7280000305175781, + "Acc.mountain": 0.7415000152587891, + "Acc.plant": 0.6368000030517578, + "Acc.curtain": 0.8369000244140625, + "Acc.chair": 0.6591999816894532, + "Acc.car": 0.9125, + "Acc.water": 0.7463999938964844, + "Acc.painting": 0.8623000335693359, + "Acc.sofa": 0.7941000366210937, + "Acc.shelf": 0.6277000045776367, + "Acc.house": 0.5952999877929688, + "Acc.sea": 0.8481999969482422, + "Acc.mirror": 0.7025, + "Acc.rug": 0.7487000274658203, + "Acc.field": 0.4947999954223633, + "Acc.armchair": 0.6340999984741211, + "Acc.seat": 0.8162999725341797, + "Acc.fence": 0.5788000106811524, + "Acc.desk": 0.6691999816894532, + "Acc.rock": 0.6437999725341796, + "Acc.wardrobe": 0.6483000183105468, + "Acc.lamp": 0.6755000305175781, + "Acc.bathtub": 0.7944999694824219, + "Acc.railing": 0.47080001831054685, + "Acc.cushion": 0.6816999816894531, + "Acc.base": 0.45049999237060545, + "Acc.box": 0.33360000610351564, + "Acc.column": 0.6036000061035156, + "Acc.signboard": 0.4463000106811523, + "Acc.chest of drawers": 0.545, + "Acc.counter": 0.39220001220703127, + "Acc.sand": 0.544099998474121, + "Acc.sink": 0.7604000091552734, + "Acc.skyscraper": 0.5704999923706054, + "Acc.fireplace": 0.8925, + "Acc.refrigerator": 0.8612000274658204, + "Acc.grandstand": 0.7426000213623047, + "Acc.path": 0.322599983215332, + "Acc.stairs": 0.37009998321533205, + "Acc.runway": 0.8363999938964843, + "Acc.case": 0.6794999694824219, + "Acc.pool table": 0.9683999633789062, + "Acc.pillow": 0.6672000122070313, + "Acc.screen door": 0.6580999755859375, + "Acc.stairway": 0.44290000915527344, + "Acc.river": 0.38349998474121094, + "Acc.bridge": 0.8393000030517578, + "Acc.bookcase": 0.5672000122070312, + "Acc.blind": 0.45619998931884764, + "Acc.coffee table": 0.8020999908447266, + "Acc.toilet": 0.8894999694824218, + "Acc.flower": 0.5252000045776367, + "Acc.book": 0.6220000076293946, + "Acc.hill": 0.20049999237060548, + "Acc.bench": 0.4915999984741211, + "Acc.countertop": 0.685199966430664, + "Acc.stove": 0.7958000183105469, + "Acc.palm": 0.6765000152587891, + "Acc.kitchen island": 0.6683999633789063, + "Acc.computer": 0.8641999816894531, + "Acc.swivel chair": 0.6891000366210938, + "Acc.boat": 0.8308999633789063, + "Acc.bar": 0.7423000335693359, + "Acc.arcade machine": 0.6397000122070312, + "Acc.hovel": 0.6444000244140625, + "Acc.bus": 0.912300033569336, + "Acc.towel": 0.7104000091552735, + "Acc.light": 0.3268000030517578, + "Acc.truck": 0.303799991607666, + "Acc.tower": 0.4231000137329102, + "Acc.chandelier": 0.7619000244140625, + "Acc.awning": 0.47959999084472654, + "Acc.streetlight": 0.20100000381469726, + "Acc.booth": 0.39349998474121095, + "Acc.television receiver": 0.7322000122070312, + "Acc.airplane": 0.6494000244140625, + "Acc.dirt track": 0.05909999847412109, + "Acc.apparel": 0.5379999923706055, + "Acc.pole": 0.2325, + "Acc.land": 0.028499999046325684, + "Acc.bannister": 0.1478999996185303, + "Acc.escalator": 0.4677999877929688, + "Acc.ottoman": 0.5761000061035156, + "Acc.bottle": 0.6206999969482422, + "Acc.buffet": 0.44400001525878907, + "Acc.poster": 0.37029998779296874, + "Acc.stage": 0.3409000015258789, + "Acc.van": 0.4847999954223633, + "Acc.ship": 0.6508000183105469, + "Acc.fountain": 0.20829999923706055, + "Acc.conveyer belt": 0.8869000244140625, + "Acc.canopy": 0.31079999923706053, + "Acc.washer": 0.6908000183105468, + "Acc.plaything": 0.3970000076293945, + "Acc.swimming pool": 0.7393000030517578, + "Acc.stool": 0.37860000610351563, + "Acc.barrel": 0.6315999984741211, + "Acc.basket": 0.3427999877929688, + "Acc.waterfall": 0.6506999969482422, + "Acc.tent": 0.9912999725341797, + "Acc.bag": 0.11229999542236328, + "Acc.minibike": 0.7323999786376953, + "Acc.cradle": 0.9697000122070313, + "Acc.oven": 0.5218999862670899, + "Acc.ball": 0.46419998168945314, + "Acc.food": 0.6906999969482421, + "Acc.step": 0.14079999923706055, + "Acc.tank": 0.6279000091552734, + "Acc.trade name": 0.2748999977111816, + "Acc.microwave": 0.40380001068115234, + "Acc.pot": 0.42939998626708986, + "Acc.animal": 0.6984999847412109, + "Acc.bicycle": 0.7016000366210937, + "Acc.lake": 0.6325999832153321, + "Acc.dishwasher": 0.6730000305175782, + "Acc.screen": 0.9001000213623047, + "Acc.blanket": 0.16459999084472657, + "Acc.sculpture": 0.6452999877929687, + "Acc.hood": 0.5779999923706055, + "Acc.sconce": 0.44279998779296875, + "Acc.vase": 0.31709999084472656, + "Acc.traffic light": 0.4418999862670898, + "Acc.tray": 0.03390000104904175, + "Acc.ashcan": 0.5091999816894531, + "Acc.fan": 0.6587000274658203, + "Acc.pier": 0.7448999786376953, + "Acc.crt screen": 0.008299999833106995, + "Acc.plate": 0.5659000015258789, + "Acc.monitor": 0.03609999895095825, + "Acc.bulletin board": 0.4840999984741211, + "Acc.shower": 0.02809999942779541, + "Acc.radiator": 0.5561000061035156, + "Acc.glass": 0.07969999790191651, + "Acc.clock": 0.2620000076293945, + "Acc.flag": 0.4809999847412109 + } + }, + "18": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8267, + "mIoU": 0.4768, + "mAcc": 0.5964, + "IoU.wall": 0.7638999938964843, + "IoU.building": 0.8283999633789062, + "IoU.sky": 0.9376000213623047, + "IoU.floor": 0.8062000274658203, + "IoU.tree": 0.7441000366210937, + "IoU.ceiling": 0.8354000091552735, + "IoU.road": 0.8270999908447265, + "IoU.bed ": 0.8754000091552734, + "IoU.windowpane": 0.6116999816894532, + "IoU.grass": 0.6636000061035157, + "IoU.cabinet": 0.6041999816894531, + "IoU.sidewalk": 0.6419000244140625, + "IoU.person": 0.7933000183105469, + "IoU.earth": 0.36369998931884767, + "IoU.door": 0.47759998321533204, + "IoU.table": 0.5991999816894531, + "IoU.mountain": 0.5840999984741211, + "IoU.plant": 0.5279999923706055, + "IoU.curtain": 0.7430000305175781, + "IoU.chair": 0.5420999908447266, + "IoU.car": 0.826500015258789, + "IoU.water": 0.5570999908447266, + "IoU.painting": 0.7168000030517578, + "IoU.sofa": 0.6555000305175781, + "IoU.shelf": 0.4502000045776367, + "IoU.house": 0.4988000106811523, + "IoU.sea": 0.6213999938964844, + "IoU.mirror": 0.6483000183105468, + "IoU.rug": 0.6702999877929687, + "IoU.field": 0.31549999237060544, + "IoU.armchair": 0.4184000015258789, + "IoU.seat": 0.6422000122070313, + "IoU.fence": 0.3983000183105469, + "IoU.desk": 0.4734999847412109, + "IoU.rock": 0.4647999954223633, + "IoU.wardrobe": 0.5297000122070312, + "IoU.lamp": 0.5768000030517578, + "IoU.bathtub": 0.7527999877929688, + "IoU.railing": 0.3661000061035156, + "IoU.cushion": 0.5575, + "IoU.base": 0.3333000183105469, + "IoU.box": 0.25610000610351563, + "IoU.column": 0.47950000762939454, + "IoU.signboard": 0.3434000015258789, + "IoU.chest of drawers": 0.32860000610351564, + "IoU.counter": 0.32290000915527345, + "IoU.sand": 0.4127000045776367, + "IoU.sink": 0.6783999633789063, + "IoU.skyscraper": 0.6104999923706055, + "IoU.fireplace": 0.6933999633789063, + "IoU.refrigerator": 0.7566999816894531, + "IoU.grandstand": 0.4093000030517578, + "IoU.path": 0.22610000610351563, + "IoU.stairs": 0.28579999923706056, + "IoU.runway": 0.6937999725341797, + "IoU.case": 0.5452999877929687, + "IoU.pool table": 0.9176000213623047, + "IoU.pillow": 0.5797999954223633, + "IoU.screen door": 0.4702000045776367, + "IoU.stairway": 0.3856999969482422, + "IoU.river": 0.14800000190734863, + "IoU.bridge": 0.6343000030517578, + "IoU.bookcase": 0.3554000091552734, + "IoU.blind": 0.42369998931884767, + "IoU.coffee table": 0.5895999908447266, + "IoU.toilet": 0.8176999664306641, + "IoU.flower": 0.35709999084472654, + "IoU.book": 0.45740001678466796, + "IoU.hill": 0.12850000381469726, + "IoU.bench": 0.4854000091552734, + "IoU.countertop": 0.4625, + "IoU.stove": 0.6980000305175781, + "IoU.palm": 0.5056000137329102, + "IoU.kitchen island": 0.38110000610351563, + "IoU.computer": 0.7308000183105469, + "IoU.swivel chair": 0.49439998626708986, + "IoU.boat": 0.712699966430664, + "IoU.bar": 0.5806999969482421, + "IoU.arcade machine": 0.4056999969482422, + "IoU.hovel": 0.528400001525879, + "IoU.bus": 0.7711000061035156, + "IoU.towel": 0.6131999969482422, + "IoU.light": 0.32369998931884764, + "IoU.truck": 0.322599983215332, + "IoU.tower": 0.35520000457763673, + "IoU.chandelier": 0.6265999984741211, + "IoU.awning": 0.4154999923706055, + "IoU.streetlight": 0.17389999389648436, + "IoU.booth": 0.3125, + "IoU.television receiver": 0.6598000335693359, + "IoU.airplane": 0.5734999847412109, + "IoU.dirt track": 0.081899995803833, + "IoU.apparel": 0.3718000030517578, + "IoU.pole": 0.16459999084472657, + "IoU.land": 0.02859999895095825, + "IoU.bannister": 0.10189999580383301, + "IoU.escalator": 0.3736999893188477, + "IoU.ottoman": 0.4331999969482422, + "IoU.bottle": 0.21110000610351562, + "IoU.buffet": 0.4084000015258789, + "IoU.poster": 0.28889999389648435, + "IoU.stage": 0.18350000381469728, + "IoU.van": 0.44409999847412107, + "IoU.ship": 0.30510000228881834, + "IoU.fountain": 0.20329999923706055, + "IoU.conveyer belt": 0.7126000213623047, + "IoU.canopy": 0.2643000030517578, + "IoU.washer": 0.7180999755859375, + "IoU.plaything": 0.2770999908447266, + "IoU.swimming pool": 0.6455999755859375, + "IoU.stool": 0.2996999931335449, + "IoU.barrel": 0.5175, + "IoU.basket": 0.2545000076293945, + "IoU.waterfall": 0.5866999816894531, + "IoU.tent": 0.9244000244140625, + "IoU.bag": 0.14390000343322754, + "IoU.minibike": 0.6563999938964844, + "IoU.cradle": 0.7872000122070313, + "IoU.oven": 0.17530000686645508, + "IoU.ball": 0.445, + "IoU.food": 0.519900016784668, + "IoU.step": 0.0840999984741211, + "IoU.tank": 0.535, + "IoU.trade name": 0.22709999084472657, + "IoU.microwave": 0.3813999938964844, + "IoU.pot": 0.38630001068115233, + "IoU.animal": 0.6437999725341796, + "IoU.bicycle": 0.5322999954223633, + "IoU.lake": 0.5875, + "IoU.dishwasher": 0.5754999923706055, + "IoU.screen": 0.5916999816894531, + "IoU.blanket": 0.15279999732971192, + "IoU.sculpture": 0.5372000122070313, + "IoU.hood": 0.5622000122070312, + "IoU.sconce": 0.3647999954223633, + "IoU.vase": 0.286200008392334, + "IoU.traffic light": 0.24489999771118165, + "IoU.tray": 0.03069999933242798, + "IoU.ashcan": 0.38729999542236326, + "IoU.fan": 0.5075999832153321, + "IoU.pier": 0.22440000534057616, + "IoU.crt screen": 0.03130000114440918, + "IoU.plate": 0.46919998168945315, + "IoU.monitor": 0.06980000019073486, + "IoU.bulletin board": 0.49189998626708986, + "IoU.shower": 0.004399999976158142, + "IoU.radiator": 0.5690999984741211, + "IoU.glass": 0.08949999809265137, + "IoU.clock": 0.25860000610351563, + "IoU.flag": 0.48009998321533204, + "Acc.wall": 0.8770999908447266, + "Acc.building": 0.9215000152587891, + "Acc.sky": 0.9772000122070312, + "Acc.floor": 0.9043000030517578, + "Acc.tree": 0.8706999969482422, + "Acc.ceiling": 0.9225, + "Acc.road": 0.8991999816894531, + "Acc.bed ": 0.9541999816894531, + "Acc.windowpane": 0.7604000091552734, + "Acc.grass": 0.8040000152587891, + "Acc.cabinet": 0.7156999969482422, + "Acc.sidewalk": 0.7916000366210938, + "Acc.person": 0.9163999938964844, + "Acc.earth": 0.5063999938964844, + "Acc.door": 0.6466000366210938, + "Acc.table": 0.7380000305175781, + "Acc.mountain": 0.7162999725341797, + "Acc.plant": 0.6331999969482421, + "Acc.curtain": 0.8605000305175782, + "Acc.chair": 0.6655000305175781, + "Acc.car": 0.9076999664306641, + "Acc.water": 0.7144000244140625, + "Acc.painting": 0.8595999908447266, + "Acc.sofa": 0.8016000366210938, + "Acc.shelf": 0.6683000183105469, + "Acc.house": 0.6670999908447266, + "Acc.sea": 0.846500015258789, + "Acc.mirror": 0.73, + "Acc.rug": 0.7323999786376953, + "Acc.field": 0.557599983215332, + "Acc.armchair": 0.6469999694824219, + "Acc.seat": 0.8227999877929687, + "Acc.fence": 0.523400001525879, + "Acc.desk": 0.6794999694824219, + "Acc.rock": 0.7044999694824219, + "Acc.wardrobe": 0.6676999664306641, + "Acc.lamp": 0.7091999816894531, + "Acc.bathtub": 0.8316000366210937, + "Acc.railing": 0.5109000015258789, + "Acc.cushion": 0.6858999633789062, + "Acc.base": 0.48630001068115236, + "Acc.box": 0.34869998931884766, + "Acc.column": 0.6229000091552734, + "Acc.signboard": 0.4506999969482422, + "Acc.chest of drawers": 0.5866999816894531, + "Acc.counter": 0.45029998779296876, + "Acc.sand": 0.555, + "Acc.sink": 0.7576000213623046, + "Acc.skyscraper": 0.6979000091552734, + "Acc.fireplace": 0.9111000061035156, + "Acc.refrigerator": 0.8495999908447266, + "Acc.grandstand": 0.7705999755859375, + "Acc.path": 0.306299991607666, + "Acc.stairs": 0.37560001373291013, + "Acc.runway": 0.8287000274658203, + "Acc.case": 0.7312999725341797, + "Acc.pool table": 0.9701999664306641, + "Acc.pillow": 0.6705000305175781, + "Acc.screen door": 0.5897000122070313, + "Acc.stairway": 0.5283000183105468, + "Acc.river": 0.276200008392334, + "Acc.bridge": 0.7865000152587891, + "Acc.bookcase": 0.5891999816894531, + "Acc.blind": 0.5006000137329102, + "Acc.coffee table": 0.815, + "Acc.toilet": 0.8887000274658203, + "Acc.flower": 0.5297999954223633, + "Acc.book": 0.6331000137329101, + "Acc.hill": 0.24049999237060546, + "Acc.bench": 0.5663000106811523, + "Acc.countertop": 0.6056000137329102, + "Acc.stove": 0.7963999938964844, + "Acc.palm": 0.72, + "Acc.kitchen island": 0.6633999633789063, + "Acc.computer": 0.8751000213623047, + "Acc.swivel chair": 0.668499984741211, + "Acc.boat": 0.8437000274658203, + "Acc.bar": 0.7580000305175781, + "Acc.arcade machine": 0.4311999893188477, + "Acc.hovel": 0.6681999969482422, + "Acc.bus": 0.9368000030517578, + "Acc.towel": 0.7383000183105469, + "Acc.light": 0.3534999847412109, + "Acc.truck": 0.4340999984741211, + "Acc.tower": 0.49990001678466794, + "Acc.chandelier": 0.7805999755859375, + "Acc.awning": 0.49770000457763675, + "Acc.streetlight": 0.2134000015258789, + "Acc.booth": 0.42139999389648436, + "Acc.television receiver": 0.775, + "Acc.airplane": 0.643499984741211, + "Acc.dirt track": 0.19020000457763672, + "Acc.apparel": 0.5054000091552734, + "Acc.pole": 0.20659999847412108, + "Acc.land": 0.043299999237060544, + "Acc.bannister": 0.14510000228881836, + "Acc.escalator": 0.46310001373291015, + "Acc.ottoman": 0.5779999923706055, + "Acc.bottle": 0.2868000030517578, + "Acc.buffet": 0.4627000045776367, + "Acc.poster": 0.3852000045776367, + "Acc.stage": 0.33360000610351564, + "Acc.van": 0.5354000091552734, + "Acc.ship": 0.342599983215332, + "Acc.fountain": 0.20670000076293946, + "Acc.conveyer belt": 0.9266999816894531, + "Acc.canopy": 0.36650001525878906, + "Acc.washer": 0.7258999633789063, + "Acc.plaything": 0.4506999969482422, + "Acc.swimming pool": 0.7531999969482421, + "Acc.stool": 0.37959999084472656, + "Acc.barrel": 0.6283000183105468, + "Acc.basket": 0.3331999969482422, + "Acc.waterfall": 0.6538999938964843, + "Acc.tent": 0.9902999877929688, + "Acc.bag": 0.18219999313354493, + "Acc.minibike": 0.7876000213623047, + "Acc.cradle": 0.9691000366210938, + "Acc.oven": 0.4656999969482422, + "Acc.ball": 0.545, + "Acc.food": 0.6291999816894531, + "Acc.step": 0.10739999771118164, + "Acc.tank": 0.6343000030517578, + "Acc.trade name": 0.26040000915527345, + "Acc.microwave": 0.43209999084472656, + "Acc.pot": 0.4622999954223633, + "Acc.animal": 0.6891999816894532, + "Acc.bicycle": 0.7123999786376953, + "Acc.lake": 0.6368000030517578, + "Acc.dishwasher": 0.6677999877929688, + "Acc.screen": 0.8970999908447266, + "Acc.blanket": 0.1640999984741211, + "Acc.sculpture": 0.7255000305175782, + "Acc.hood": 0.6265999984741211, + "Acc.sconce": 0.44310001373291014, + "Acc.vase": 0.40330001831054685, + "Acc.traffic light": 0.43439998626708987, + "Acc.tray": 0.04059999942779541, + "Acc.ashcan": 0.5020999908447266, + "Acc.fan": 0.6594999694824218, + "Acc.pier": 0.45930000305175783, + "Acc.crt screen": 0.07619999885559083, + "Acc.plate": 0.639900016784668, + "Acc.monitor": 0.07769999980926513, + "Acc.bulletin board": 0.6498000335693359, + "Acc.shower": 0.030999999046325683, + "Acc.radiator": 0.6413999938964844, + "Acc.glass": 0.09420000076293945, + "Acc.clock": 0.30270000457763674, + "Acc.flag": 0.537400016784668 + } + }, + "19": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8309000000000001, + "mIoU": 0.4878, + "mAcc": 0.6069, + "IoU.wall": 0.7691000366210937, + "IoU.building": 0.8351999664306641, + "IoU.sky": 0.9383999633789063, + "IoU.floor": 0.8195999908447266, + "IoU.tree": 0.7427999877929687, + "IoU.ceiling": 0.8383000183105469, + "IoU.road": 0.8369999694824218, + "IoU.bed ": 0.8794999694824219, + "IoU.windowpane": 0.6213000106811524, + "IoU.grass": 0.6666000366210938, + "IoU.cabinet": 0.624000015258789, + "IoU.sidewalk": 0.6505000305175781, + "IoU.person": 0.8011000061035156, + "IoU.earth": 0.39439998626708983, + "IoU.door": 0.4891999816894531, + "IoU.table": 0.5804000091552735, + "IoU.mountain": 0.6111999893188477, + "IoU.plant": 0.5154999923706055, + "IoU.curtain": 0.7330999755859375, + "IoU.chair": 0.5441999816894532, + "IoU.car": 0.8437999725341797, + "IoU.water": 0.5297000122070312, + "IoU.painting": 0.706500015258789, + "IoU.sofa": 0.6659999847412109, + "IoU.shelf": 0.4565000152587891, + "IoU.house": 0.4993000030517578, + "IoU.sea": 0.6405000305175781, + "IoU.mirror": 0.6295999908447265, + "IoU.rug": 0.688499984741211, + "IoU.field": 0.31799999237060544, + "IoU.armchair": 0.425, + "IoU.seat": 0.66, + "IoU.fence": 0.4231999969482422, + "IoU.desk": 0.5068999862670899, + "IoU.rock": 0.44490001678466795, + "IoU.wardrobe": 0.5513999938964844, + "IoU.lamp": 0.5743999862670899, + "IoU.bathtub": 0.8026999664306641, + "IoU.railing": 0.3604999923706055, + "IoU.cushion": 0.5590999984741211, + "IoU.base": 0.32380001068115233, + "IoU.box": 0.26899999618530274, + "IoU.column": 0.4956999969482422, + "IoU.signboard": 0.359900016784668, + "IoU.chest of drawers": 0.340099983215332, + "IoU.counter": 0.33299999237060546, + "IoU.sand": 0.5288000106811523, + "IoU.sink": 0.6787999725341797, + "IoU.skyscraper": 0.6193000030517578, + "IoU.fireplace": 0.6831999969482422, + "IoU.refrigerator": 0.7668000030517578, + "IoU.grandstand": 0.46650001525878904, + "IoU.path": 0.22180000305175782, + "IoU.stairs": 0.276200008392334, + "IoU.runway": 0.6306999969482422, + "IoU.case": 0.5690000152587891, + "IoU.pool table": 0.9243000030517579, + "IoU.pillow": 0.5797000122070313, + "IoU.screen door": 0.47400001525878904, + "IoU.stairway": 0.37220001220703125, + "IoU.river": 0.12529999732971192, + "IoU.bridge": 0.6654000091552734, + "IoU.bookcase": 0.3320000076293945, + "IoU.blind": 0.41569999694824217, + "IoU.coffee table": 0.5772000122070312, + "IoU.toilet": 0.7738999938964843, + "IoU.flower": 0.3484000015258789, + "IoU.book": 0.46, + "IoU.hill": 0.11880000114440918, + "IoU.bench": 0.472400016784668, + "IoU.countertop": 0.4590000152587891, + "IoU.stove": 0.709800033569336, + "IoU.palm": 0.5004000091552734, + "IoU.kitchen island": 0.364900016784668, + "IoU.computer": 0.7380000305175781, + "IoU.swivel chair": 0.5254999923706055, + "IoU.boat": 0.7156999969482422, + "IoU.bar": 0.4377000045776367, + "IoU.arcade machine": 0.4775, + "IoU.hovel": 0.5566999816894531, + "IoU.bus": 0.8854000091552734, + "IoU.towel": 0.6379999923706055, + "IoU.light": 0.3040999984741211, + "IoU.truck": 0.32970001220703127, + "IoU.tower": 0.3759000015258789, + "IoU.chandelier": 0.6293000030517578, + "IoU.awning": 0.4136000061035156, + "IoU.streetlight": 0.20649999618530274, + "IoU.booth": 0.3071999931335449, + "IoU.television receiver": 0.6694999694824219, + "IoU.airplane": 0.6318000030517578, + "IoU.dirt track": 0.10710000038146973, + "IoU.apparel": 0.3631000137329102, + "IoU.pole": 0.18959999084472656, + "IoU.land": 0.020299999713897704, + "IoU.bannister": 0.12649999618530272, + "IoU.escalator": 0.5031000137329101, + "IoU.ottoman": 0.4631999969482422, + "IoU.bottle": 0.253700008392334, + "IoU.buffet": 0.46450000762939453, + "IoU.poster": 0.3029999923706055, + "IoU.stage": 0.15529999732971192, + "IoU.van": 0.4363000106811523, + "IoU.ship": 0.48389999389648436, + "IoU.fountain": 0.20739999771118164, + "IoU.conveyer belt": 0.7780000305175782, + "IoU.canopy": 0.25739999771118166, + "IoU.washer": 0.7294999694824219, + "IoU.plaything": 0.34, + "IoU.swimming pool": 0.7001999664306641, + "IoU.stool": 0.32459999084472657, + "IoU.barrel": 0.5506999969482422, + "IoU.basket": 0.2359000015258789, + "IoU.waterfall": 0.7541999816894531, + "IoU.tent": 0.952699966430664, + "IoU.bag": 0.13329999923706054, + "IoU.minibike": 0.5854000091552735, + "IoU.cradle": 0.7994999694824219, + "IoU.oven": 0.2347999954223633, + "IoU.ball": 0.5093999862670898, + "IoU.food": 0.48630001068115236, + "IoU.step": 0.07809999942779541, + "IoU.tank": 0.56, + "IoU.trade name": 0.25940000534057617, + "IoU.microwave": 0.46040000915527346, + "IoU.pot": 0.3965000152587891, + "IoU.animal": 0.6529000091552735, + "IoU.bicycle": 0.5034999847412109, + "IoU.lake": 0.5890000152587891, + "IoU.dishwasher": 0.5643999862670899, + "IoU.screen": 0.5933000183105469, + "IoU.blanket": 0.1490999984741211, + "IoU.sculpture": 0.5904999923706055, + "IoU.hood": 0.500099983215332, + "IoU.sconce": 0.3265999984741211, + "IoU.vase": 0.3110000038146973, + "IoU.traffic light": 0.2528000068664551, + "IoU.tray": 0.01600000023841858, + "IoU.ashcan": 0.41939998626708985, + "IoU.fan": 0.5063000106811524, + "IoU.pier": 0.2621999931335449, + "IoU.crt screen": 0.04190000057220459, + "IoU.plate": 0.4577000045776367, + "IoU.monitor": 0.08869999885559082, + "IoU.bulletin board": 0.4911999893188477, + "IoU.shower": 0.0009000000357627869, + "IoU.radiator": 0.5879999923706055, + "IoU.glass": 0.09869999885559082, + "IoU.clock": 0.26299999237060545, + "IoU.flag": 0.43779998779296875, + "Acc.wall": 0.8808999633789063, + "Acc.building": 0.9294999694824219, + "Acc.sky": 0.9773999786376953, + "Acc.floor": 0.9095999908447265, + "Acc.tree": 0.8705999755859375, + "Acc.ceiling": 0.9198999786376953, + "Acc.road": 0.903499984741211, + "Acc.bed ": 0.9511000061035156, + "Acc.windowpane": 0.7641000366210937, + "Acc.grass": 0.7918000030517578, + "Acc.cabinet": 0.725, + "Acc.sidewalk": 0.7901000213623047, + "Acc.person": 0.9202999877929687, + "Acc.earth": 0.5402999877929687, + "Acc.door": 0.6770999908447266, + "Acc.table": 0.7344999694824219, + "Acc.mountain": 0.7087000274658203, + "Acc.plant": 0.620999984741211, + "Acc.curtain": 0.8458999633789063, + "Acc.chair": 0.6655999755859375, + "Acc.car": 0.9290000152587891, + "Acc.water": 0.6783999633789063, + "Acc.painting": 0.867699966430664, + "Acc.sofa": 0.8095999908447266, + "Acc.shelf": 0.6879000091552734, + "Acc.house": 0.6498000335693359, + "Acc.sea": 0.8991000366210937, + "Acc.mirror": 0.7331999969482422, + "Acc.rug": 0.7452999877929688, + "Acc.field": 0.5706999969482421, + "Acc.armchair": 0.6608999633789062, + "Acc.seat": 0.8431999969482422, + "Acc.fence": 0.5518000030517578, + "Acc.desk": 0.7244000244140625, + "Acc.rock": 0.7069000244140625, + "Acc.wardrobe": 0.6533999633789063, + "Acc.lamp": 0.7073999786376953, + "Acc.bathtub": 0.865, + "Acc.railing": 0.5140999984741211, + "Acc.cushion": 0.6984999847412109, + "Acc.base": 0.4747999954223633, + "Acc.box": 0.35580001831054686, + "Acc.column": 0.6270999908447266, + "Acc.signboard": 0.48220001220703124, + "Acc.chest of drawers": 0.606500015258789, + "Acc.counter": 0.43810001373291013, + "Acc.sand": 0.7129000091552734, + "Acc.sink": 0.7594999694824218, + "Acc.skyscraper": 0.7029000091552734, + "Acc.fireplace": 0.9315000152587891, + "Acc.refrigerator": 0.8680000305175781, + "Acc.grandstand": 0.759000015258789, + "Acc.path": 0.28719999313354494, + "Acc.stairs": 0.3616999816894531, + "Acc.runway": 0.8156999969482421, + "Acc.case": 0.730999984741211, + "Acc.pool table": 0.9712999725341797, + "Acc.pillow": 0.6890000152587891, + "Acc.screen door": 0.5581000137329102, + "Acc.stairway": 0.5384000015258789, + "Acc.river": 0.22190000534057616, + "Acc.bridge": 0.8372000122070312, + "Acc.bookcase": 0.55, + "Acc.blind": 0.4843000030517578, + "Acc.coffee table": 0.8334999847412109, + "Acc.toilet": 0.8916999816894531, + "Acc.flower": 0.5459000015258789, + "Acc.book": 0.637400016784668, + "Acc.hill": 0.23639999389648436, + "Acc.bench": 0.5420000076293945, + "Acc.countertop": 0.5997000122070313, + "Acc.stove": 0.8033999633789063, + "Acc.palm": 0.6880000305175781, + "Acc.kitchen island": 0.6006000137329102, + "Acc.computer": 0.8777999877929688, + "Acc.swivel chair": 0.6881999969482422, + "Acc.boat": 0.8580000305175781, + "Acc.bar": 0.5938000106811523, + "Acc.arcade machine": 0.5129999923706055, + "Acc.hovel": 0.6083000183105469, + "Acc.bus": 0.9318000030517578, + "Acc.towel": 0.7566999816894531, + "Acc.light": 0.332599983215332, + "Acc.truck": 0.46430000305175784, + "Acc.tower": 0.49380001068115237, + "Acc.chandelier": 0.793499984741211, + "Acc.awning": 0.5013999938964844, + "Acc.streetlight": 0.24360000610351562, + "Acc.booth": 0.40180000305175784, + "Acc.television receiver": 0.7955000305175781, + "Acc.airplane": 0.7063999938964843, + "Acc.dirt track": 0.18459999084472656, + "Acc.apparel": 0.5309999847412109, + "Acc.pole": 0.24819999694824219, + "Acc.land": 0.03319999933242798, + "Acc.bannister": 0.1690999984741211, + "Acc.escalator": 0.6315999984741211, + "Acc.ottoman": 0.6106000137329102, + "Acc.bottle": 0.33689998626708983, + "Acc.buffet": 0.5838999938964844, + "Acc.poster": 0.37599998474121094, + "Acc.stage": 0.246200008392334, + "Acc.van": 0.5329999923706055, + "Acc.ship": 0.5327000045776367, + "Acc.fountain": 0.21420000076293946, + "Acc.conveyer belt": 0.905199966430664, + "Acc.canopy": 0.395, + "Acc.washer": 0.7404000091552735, + "Acc.plaything": 0.5613000106811523, + "Acc.swimming pool": 0.8387999725341797, + "Acc.stool": 0.442599983215332, + "Acc.barrel": 0.6372000122070313, + "Acc.basket": 0.32470001220703126, + "Acc.waterfall": 0.8220999908447265, + "Acc.tent": 0.9851000213623047, + "Acc.bag": 0.15739999771118163, + "Acc.minibike": 0.6938999938964844, + "Acc.cradle": 0.9754000091552735, + "Acc.oven": 0.5825, + "Acc.ball": 0.5511000061035156, + "Acc.food": 0.6034000015258789, + "Acc.step": 0.10600000381469726, + "Acc.tank": 0.635, + "Acc.trade name": 0.3125, + "Acc.microwave": 0.5129000091552735, + "Acc.pot": 0.45299999237060545, + "Acc.animal": 0.6925, + "Acc.bicycle": 0.7138999938964844, + "Acc.lake": 0.6302000045776367, + "Acc.dishwasher": 0.6658999633789062, + "Acc.screen": 0.9173999786376953, + "Acc.blanket": 0.16200000762939454, + "Acc.sculpture": 0.8261000061035156, + "Acc.hood": 0.5893999862670899, + "Acc.sconce": 0.41880001068115236, + "Acc.vase": 0.41389999389648435, + "Acc.traffic light": 0.4084000015258789, + "Acc.tray": 0.020899999141693115, + "Acc.ashcan": 0.548499984741211, + "Acc.fan": 0.645, + "Acc.pier": 0.4925, + "Acc.crt screen": 0.1075, + "Acc.plate": 0.6483000183105468, + "Acc.monitor": 0.11590000152587891, + "Acc.bulletin board": 0.6309000015258789, + "Acc.shower": 0.00699999988079071, + "Acc.radiator": 0.6416999816894531, + "Acc.glass": 0.10510000228881836, + "Acc.clock": 0.3331999969482422, + "Acc.flag": 0.5070000076293946 + } + }, + "20": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8342, + "mIoU": 0.4938, + "mAcc": 0.6083, + "IoU.wall": 0.7738999938964843, + "IoU.building": 0.8302999877929688, + "IoU.sky": 0.9380000305175781, + "IoU.floor": 0.8245999908447266, + "IoU.tree": 0.7451999664306641, + "IoU.ceiling": 0.8391999816894531, + "IoU.road": 0.8362999725341796, + "IoU.bed ": 0.8891000366210937, + "IoU.windowpane": 0.6263000106811524, + "IoU.grass": 0.6952999877929688, + "IoU.cabinet": 0.6206000137329102, + "IoU.sidewalk": 0.6612000274658203, + "IoU.person": 0.8044999694824219, + "IoU.earth": 0.40040000915527346, + "IoU.door": 0.4947999954223633, + "IoU.table": 0.5988000106811523, + "IoU.mountain": 0.5915999984741211, + "IoU.plant": 0.5261999893188477, + "IoU.curtain": 0.7305000305175782, + "IoU.chair": 0.5586000061035157, + "IoU.car": 0.8416999816894531, + "IoU.water": 0.5808000183105468, + "IoU.painting": 0.6908999633789062, + "IoU.sofa": 0.6869000244140625, + "IoU.shelf": 0.4515999984741211, + "IoU.house": 0.5025, + "IoU.sea": 0.6479000091552735, + "IoU.mirror": 0.6777999877929688, + "IoU.rug": 0.6751000213623047, + "IoU.field": 0.3395999908447266, + "IoU.armchair": 0.4759999847412109, + "IoU.seat": 0.6656999969482422, + "IoU.fence": 0.4366999816894531, + "IoU.desk": 0.47389999389648435, + "IoU.rock": 0.5093000030517578, + "IoU.wardrobe": 0.5768999862670898, + "IoU.lamp": 0.5658000183105468, + "IoU.bathtub": 0.7619999694824219, + "IoU.railing": 0.38709999084472657, + "IoU.cushion": 0.574900016784668, + "IoU.base": 0.3220000076293945, + "IoU.box": 0.23959999084472655, + "IoU.column": 0.49290000915527343, + "IoU.signboard": 0.35439998626708985, + "IoU.chest of drawers": 0.3640999984741211, + "IoU.counter": 0.43439998626708987, + "IoU.sand": 0.5709000015258789, + "IoU.sink": 0.6730999755859375, + "IoU.skyscraper": 0.5259999847412109, + "IoU.fireplace": 0.7040000152587891, + "IoU.refrigerator": 0.7447000122070313, + "IoU.grandstand": 0.49540000915527344, + "IoU.path": 0.22969999313354492, + "IoU.stairs": 0.31120000839233397, + "IoU.runway": 0.6722000122070313, + "IoU.case": 0.6638999938964844, + "IoU.pool table": 0.9287000274658204, + "IoU.pillow": 0.5868999862670898, + "IoU.screen door": 0.5070000076293946, + "IoU.stairway": 0.36119998931884767, + "IoU.river": 0.18829999923706053, + "IoU.bridge": 0.6537999725341797, + "IoU.bookcase": 0.3390000152587891, + "IoU.blind": 0.39240001678466796, + "IoU.coffee table": 0.6177000045776367, + "IoU.toilet": 0.7879000091552735, + "IoU.flower": 0.38979999542236327, + "IoU.book": 0.45220001220703127, + "IoU.hill": 0.13390000343322753, + "IoU.bench": 0.5108000183105469, + "IoU.countertop": 0.4859999847412109, + "IoU.stove": 0.6863999938964844, + "IoU.palm": 0.475, + "IoU.kitchen island": 0.40630001068115235, + "IoU.computer": 0.6695999908447265, + "IoU.swivel chair": 0.507400016784668, + "IoU.boat": 0.6858999633789062, + "IoU.bar": 0.6295000076293945, + "IoU.arcade machine": 0.36580001831054687, + "IoU.hovel": 0.5265999984741211, + "IoU.bus": 0.9048999786376953, + "IoU.towel": 0.6231000137329101, + "IoU.light": 0.34080001831054685, + "IoU.truck": 0.32290000915527345, + "IoU.tower": 0.3381999969482422, + "IoU.chandelier": 0.635099983215332, + "IoU.awning": 0.31799999237060544, + "IoU.streetlight": 0.16489999771118163, + "IoU.booth": 0.335, + "IoU.television receiver": 0.6876999664306641, + "IoU.airplane": 0.6318999862670899, + "IoU.dirt track": 0.003499999940395355, + "IoU.apparel": 0.31559999465942384, + "IoU.pole": 0.17940000534057618, + "IoU.land": 0.023599998950958254, + "IoU.bannister": 0.11380000114440918, + "IoU.escalator": 0.5840000152587891, + "IoU.ottoman": 0.46169998168945314, + "IoU.bottle": 0.27860000610351565, + "IoU.buffet": 0.32970001220703127, + "IoU.poster": 0.27920000076293944, + "IoU.stage": 0.15229999542236328, + "IoU.van": 0.3509000015258789, + "IoU.ship": 0.7493000030517578, + "IoU.fountain": 0.18149999618530274, + "IoU.conveyer belt": 0.7573999786376953, + "IoU.canopy": 0.22370000839233398, + "IoU.washer": 0.7362999725341797, + "IoU.plaything": 0.28639999389648435, + "IoU.swimming pool": 0.7106999969482422, + "IoU.stool": 0.34060001373291016, + "IoU.barrel": 0.5386999893188477, + "IoU.basket": 0.22479999542236329, + "IoU.waterfall": 0.5618999862670898, + "IoU.tent": 0.9176000213623047, + "IoU.bag": 0.1413000011444092, + "IoU.minibike": 0.6669999694824219, + "IoU.cradle": 0.819800033569336, + "IoU.oven": 0.22700000762939454, + "IoU.ball": 0.49189998626708986, + "IoU.food": 0.5672000122070312, + "IoU.step": 0.08680000305175781, + "IoU.tank": 0.5516999816894531, + "IoU.trade name": 0.26610000610351564, + "IoU.microwave": 0.4563999938964844, + "IoU.pot": 0.4247999954223633, + "IoU.animal": 0.6654000091552734, + "IoU.bicycle": 0.5552000045776367, + "IoU.lake": 0.5750999832153321, + "IoU.dishwasher": 0.5766999816894531, + "IoU.screen": 0.5688999938964844, + "IoU.blanket": 0.1372000026702881, + "IoU.sculpture": 0.6222000122070312, + "IoU.hood": 0.5868000030517578, + "IoU.sconce": 0.322599983215332, + "IoU.vase": 0.31979999542236326, + "IoU.traffic light": 0.22799999237060548, + "IoU.tray": 0.00949999988079071, + "IoU.ashcan": 0.4008000183105469, + "IoU.fan": 0.4956999969482422, + "IoU.pier": 0.3858000183105469, + "IoU.crt screen": 0.05309999942779541, + "IoU.plate": 0.4897999954223633, + "IoU.monitor": 0.2868000030517578, + "IoU.bulletin board": 0.4886000061035156, + "IoU.shower": 0.0015000000596046448, + "IoU.radiator": 0.607599983215332, + "IoU.glass": 0.09039999961853028, + "IoU.clock": 0.288700008392334, + "IoU.flag": 0.42020000457763673, + "Acc.wall": 0.8802999877929687, + "Acc.building": 0.9268000030517578, + "Acc.sky": 0.977699966430664, + "Acc.floor": 0.9144999694824218, + "Acc.tree": 0.8663999938964844, + "Acc.ceiling": 0.9205999755859375, + "Acc.road": 0.8998999786376953, + "Acc.bed ": 0.9555999755859375, + "Acc.windowpane": 0.7812999725341797, + "Acc.grass": 0.8001000213623047, + "Acc.cabinet": 0.7301999664306641, + "Acc.sidewalk": 0.800199966430664, + "Acc.person": 0.9197000122070312, + "Acc.earth": 0.5893000030517578, + "Acc.door": 0.6822000122070313, + "Acc.table": 0.7491999816894531, + "Acc.mountain": 0.7091000366210938, + "Acc.plant": 0.6408000183105469, + "Acc.curtain": 0.8523999786376953, + "Acc.chair": 0.6677999877929688, + "Acc.car": 0.9269000244140625, + "Acc.water": 0.7691999816894531, + "Acc.painting": 0.8655999755859375, + "Acc.sofa": 0.8327999877929687, + "Acc.shelf": 0.6563999938964844, + "Acc.house": 0.6955999755859374, + "Acc.sea": 0.7912999725341797, + "Acc.mirror": 0.7680000305175781, + "Acc.rug": 0.7441999816894531, + "Acc.field": 0.537400016784668, + "Acc.armchair": 0.7052999877929688, + "Acc.seat": 0.8370999908447265, + "Acc.fence": 0.591500015258789, + "Acc.desk": 0.6711000061035156, + "Acc.rock": 0.7727999877929688, + "Acc.wardrobe": 0.6951999664306641, + "Acc.lamp": 0.6943000030517578, + "Acc.bathtub": 0.8254000091552735, + "Acc.railing": 0.5497999954223632, + "Acc.cushion": 0.7087999725341797, + "Acc.base": 0.48970001220703124, + "Acc.box": 0.3181999969482422, + "Acc.column": 0.6131999969482422, + "Acc.signboard": 0.4640999984741211, + "Acc.chest of drawers": 0.6172999954223632, + "Acc.counter": 0.5356999969482422, + "Acc.sand": 0.7383999633789062, + "Acc.sink": 0.7372000122070312, + "Acc.skyscraper": 0.6338000106811523, + "Acc.fireplace": 0.8930999755859375, + "Acc.refrigerator": 0.7983000183105469, + "Acc.grandstand": 0.7473999786376954, + "Acc.path": 0.3145000076293945, + "Acc.stairs": 0.42020000457763673, + "Acc.runway": 0.8561000061035157, + "Acc.case": 0.827699966430664, + "Acc.pool table": 0.9662999725341797, + "Acc.pillow": 0.6808999633789062, + "Acc.screen door": 0.5943000030517578, + "Acc.stairway": 0.455099983215332, + "Acc.river": 0.32299999237060545, + "Acc.bridge": 0.7801000213623047, + "Acc.bookcase": 0.5584000015258789, + "Acc.blind": 0.44279998779296875, + "Acc.coffee table": 0.8218000030517578, + "Acc.toilet": 0.8879000091552735, + "Acc.flower": 0.5690000152587891, + "Acc.book": 0.6612999725341797, + "Acc.hill": 0.2297999954223633, + "Acc.bench": 0.6022999954223632, + "Acc.countertop": 0.6461000061035156, + "Acc.stove": 0.8016999816894531, + "Acc.palm": 0.6829000091552735, + "Acc.kitchen island": 0.6505000305175781, + "Acc.computer": 0.7981999969482422, + "Acc.swivel chair": 0.7222000122070312, + "Acc.boat": 0.8491000366210938, + "Acc.bar": 0.7841999816894532, + "Acc.arcade machine": 0.39669998168945314, + "Acc.hovel": 0.6093000030517578, + "Acc.bus": 0.9426000213623047, + "Acc.towel": 0.7465000152587891, + "Acc.light": 0.3754999923706055, + "Acc.truck": 0.45880001068115234, + "Acc.tower": 0.44880001068115233, + "Acc.chandelier": 0.7805999755859375, + "Acc.awning": 0.3609000015258789, + "Acc.streetlight": 0.19540000915527345, + "Acc.booth": 0.40430000305175784, + "Acc.television receiver": 0.8223999786376953, + "Acc.airplane": 0.6991999816894531, + "Acc.dirt track": 0.014700000286102294, + "Acc.apparel": 0.45970001220703127, + "Acc.pole": 0.2368000030517578, + "Acc.land": 0.034500000476837156, + "Acc.bannister": 0.15699999809265136, + "Acc.escalator": 0.8079000091552735, + "Acc.ottoman": 0.6077999877929687, + "Acc.bottle": 0.38229999542236326, + "Acc.buffet": 0.39349998474121095, + "Acc.poster": 0.3840999984741211, + "Acc.stage": 0.22920000076293945, + "Acc.van": 0.455099983215332, + "Acc.ship": 0.8005000305175781, + "Acc.fountain": 0.21639999389648437, + "Acc.conveyer belt": 0.9175, + "Acc.canopy": 0.3365999984741211, + "Acc.washer": 0.7543000030517578, + "Acc.plaything": 0.38369998931884763, + "Acc.swimming pool": 0.7930999755859375, + "Acc.stool": 0.4006999969482422, + "Acc.barrel": 0.6483999633789063, + "Acc.basket": 0.30540000915527343, + "Acc.waterfall": 0.6733000183105469, + "Acc.tent": 0.9841999816894531, + "Acc.bag": 0.16139999389648438, + "Acc.minibike": 0.784000015258789, + "Acc.cradle": 0.9683000183105469, + "Acc.oven": 0.5731000137329102, + "Acc.ball": 0.519000015258789, + "Acc.food": 0.6883999633789063, + "Acc.step": 0.11529999732971191, + "Acc.tank": 0.6473999786376953, + "Acc.trade name": 0.3060000038146973, + "Acc.microwave": 0.5115000152587891, + "Acc.pot": 0.49639999389648437, + "Acc.animal": 0.7133999633789062, + "Acc.bicycle": 0.7258999633789063, + "Acc.lake": 0.6047999954223633, + "Acc.dishwasher": 0.6644000244140625, + "Acc.screen": 0.7238999938964844, + "Acc.blanket": 0.14649999618530274, + "Acc.sculpture": 0.7804000091552734, + "Acc.hood": 0.632400016784668, + "Acc.sconce": 0.42069999694824217, + "Acc.vase": 0.41650001525878905, + "Acc.traffic light": 0.3390999984741211, + "Acc.tray": 0.012000000476837159, + "Acc.ashcan": 0.5263999938964844, + "Acc.fan": 0.6536000061035157, + "Acc.pier": 0.5729999923706055, + "Acc.crt screen": 0.10399999618530273, + "Acc.plate": 0.652300033569336, + "Acc.monitor": 0.4856000137329102, + "Acc.bulletin board": 0.6366999816894531, + "Acc.shower": 0.012699999809265138, + "Acc.radiator": 0.6933000183105469, + "Acc.glass": 0.09310000419616699, + "Acc.clock": 0.35900001525878905, + "Acc.flag": 0.4690999984741211 + } + }, + "21": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8359000000000001, + "mIoU": 0.4925, + "mAcc": 0.6056, + "IoU.wall": 0.7786000061035157, + "IoU.building": 0.8312999725341796, + "IoU.sky": 0.94, + "IoU.floor": 0.8225, + "IoU.tree": 0.7563999938964844, + "IoU.ceiling": 0.8412000274658203, + "IoU.road": 0.8338999938964844, + "IoU.bed ": 0.8966999816894531, + "IoU.windowpane": 0.6224000167846679, + "IoU.grass": 0.7029000091552734, + "IoU.cabinet": 0.6286999893188476, + "IoU.sidewalk": 0.6593000030517578, + "IoU.person": 0.8079000091552735, + "IoU.earth": 0.3843999862670898, + "IoU.door": 0.5179000091552735, + "IoU.table": 0.6022000122070312, + "IoU.mountain": 0.5793999862670899, + "IoU.plant": 0.5447999954223632, + "IoU.curtain": 0.7390000152587891, + "IoU.chair": 0.565, + "IoU.car": 0.8463999938964843, + "IoU.water": 0.5802999877929688, + "IoU.painting": 0.7013999938964843, + "IoU.sofa": 0.6756999969482422, + "IoU.shelf": 0.4443000030517578, + "IoU.house": 0.495, + "IoU.sea": 0.6795999908447266, + "IoU.mirror": 0.6791999816894532, + "IoU.rug": 0.6426999664306641, + "IoU.field": 0.39630001068115234, + "IoU.armchair": 0.45419998168945314, + "IoU.seat": 0.6688999938964844, + "IoU.fence": 0.47720001220703123, + "IoU.desk": 0.4988000106811523, + "IoU.rock": 0.49689998626708987, + "IoU.wardrobe": 0.5690000152587891, + "IoU.lamp": 0.5683000183105469, + "IoU.bathtub": 0.8301999664306641, + "IoU.railing": 0.39110000610351564, + "IoU.cushion": 0.5838000106811524, + "IoU.base": 0.34950000762939454, + "IoU.box": 0.2384000015258789, + "IoU.column": 0.47720001220703123, + "IoU.signboard": 0.36400001525878906, + "IoU.chest of drawers": 0.35220001220703123, + "IoU.counter": 0.3881999969482422, + "IoU.sand": 0.45689998626708983, + "IoU.sink": 0.6716999816894531, + "IoU.skyscraper": 0.49459999084472656, + "IoU.fireplace": 0.6879000091552734, + "IoU.refrigerator": 0.7523999786376954, + "IoU.grandstand": 0.5270000076293946, + "IoU.path": 0.1940999984741211, + "IoU.stairs": 0.2794000053405762, + "IoU.runway": 0.6111000061035157, + "IoU.case": 0.6224000167846679, + "IoU.pool table": 0.9322000122070313, + "IoU.pillow": 0.5840000152587891, + "IoU.screen door": 0.6043000030517578, + "IoU.stairway": 0.3681999969482422, + "IoU.river": 0.1713999938964844, + "IoU.bridge": 0.5272000122070313, + "IoU.bookcase": 0.33049999237060546, + "IoU.blind": 0.41139999389648435, + "IoU.coffee table": 0.6208000183105469, + "IoU.toilet": 0.7755000305175781, + "IoU.flower": 0.3902000045776367, + "IoU.book": 0.4520999908447266, + "IoU.hill": 0.14489999771118164, + "IoU.bench": 0.4520000076293945, + "IoU.countertop": 0.5402000045776367, + "IoU.stove": 0.7388999938964844, + "IoU.palm": 0.4925, + "IoU.kitchen island": 0.47139999389648435, + "IoU.computer": 0.7523999786376954, + "IoU.swivel chair": 0.538499984741211, + "IoU.boat": 0.6819999694824219, + "IoU.bar": 0.525099983215332, + "IoU.arcade machine": 0.7880999755859375, + "IoU.hovel": 0.5320000076293945, + "IoU.bus": 0.8806999969482422, + "IoU.towel": 0.6366999816894531, + "IoU.light": 0.34099998474121096, + "IoU.truck": 0.2829000091552734, + "IoU.tower": 0.26629999160766604, + "IoU.chandelier": 0.643499984741211, + "IoU.awning": 0.30540000915527343, + "IoU.streetlight": 0.16950000762939454, + "IoU.booth": 0.39599998474121095, + "IoU.television receiver": 0.7229000091552734, + "IoU.airplane": 0.6347000122070312, + "IoU.dirt track": 0.0030000001192092896, + "IoU.apparel": 0.3509999847412109, + "IoU.pole": 0.2031999969482422, + "IoU.land": 0.021300001144409178, + "IoU.bannister": 0.11569999694824219, + "IoU.escalator": 0.55, + "IoU.ottoman": 0.49340000152587893, + "IoU.bottle": 0.3631999969482422, + "IoU.buffet": 0.38110000610351563, + "IoU.poster": 0.30840000152587893, + "IoU.stage": 0.12390000343322755, + "IoU.van": 0.3516999816894531, + "IoU.ship": 0.11729999542236329, + "IoU.fountain": 0.19549999237060547, + "IoU.conveyer belt": 0.762300033569336, + "IoU.canopy": 0.22030000686645507, + "IoU.washer": 0.735, + "IoU.plaything": 0.3927000045776367, + "IoU.swimming pool": 0.750999984741211, + "IoU.stool": 0.3, + "IoU.barrel": 0.37810001373291013, + "IoU.basket": 0.2752000045776367, + "IoU.waterfall": 0.485, + "IoU.tent": 0.9454000091552734, + "IoU.bag": 0.12430000305175781, + "IoU.minibike": 0.6143999862670898, + "IoU.cradle": 0.8388999938964844, + "IoU.oven": 0.49279998779296874, + "IoU.ball": 0.3233000183105469, + "IoU.food": 0.5697000122070313, + "IoU.step": 0.08380000114440918, + "IoU.tank": 0.5704999923706054, + "IoU.trade name": 0.28030000686645506, + "IoU.microwave": 0.8351999664306641, + "IoU.pot": 0.41959999084472654, + "IoU.animal": 0.6297999954223633, + "IoU.bicycle": 0.5161999893188477, + "IoU.lake": 0.1427000045776367, + "IoU.dishwasher": 0.5068999862670899, + "IoU.screen": 0.5279999923706055, + "IoU.blanket": 0.14979999542236327, + "IoU.sculpture": 0.62, + "IoU.hood": 0.591500015258789, + "IoU.sconce": 0.33360000610351564, + "IoU.vase": 0.31670000076293947, + "IoU.traffic light": 0.26110000610351564, + "IoU.tray": 0.016699999570846558, + "IoU.ashcan": 0.385099983215332, + "IoU.fan": 0.5022000122070313, + "IoU.pier": 0.4225, + "IoU.crt screen": 0.038499999046325686, + "IoU.plate": 0.4779000091552734, + "IoU.monitor": 0.28, + "IoU.bulletin board": 0.35700000762939454, + "IoU.shower": 0.009800000190734863, + "IoU.radiator": 0.5552000045776367, + "IoU.glass": 0.10489999771118164, + "IoU.clock": 0.27510000228881837, + "IoU.flag": 0.5745000076293946, + "Acc.wall": 0.8833000183105468, + "Acc.building": 0.9248999786376954, + "Acc.sky": 0.9773000335693359, + "Acc.floor": 0.9147000122070312, + "Acc.tree": 0.867699966430664, + "Acc.ceiling": 0.9283000183105469, + "Acc.road": 0.9031999969482422, + "Acc.bed ": 0.9552999877929688, + "Acc.windowpane": 0.7712000274658203, + "Acc.grass": 0.8205999755859374, + "Acc.cabinet": 0.735, + "Acc.sidewalk": 0.7981999969482422, + "Acc.person": 0.9212999725341797, + "Acc.earth": 0.5718000030517578, + "Acc.door": 0.7086000061035156, + "Acc.table": 0.7609999847412109, + "Acc.mountain": 0.7076000213623047, + "Acc.plant": 0.6626000213623047, + "Acc.curtain": 0.8562999725341797, + "Acc.chair": 0.6869000244140625, + "Acc.car": 0.9294000244140626, + "Acc.water": 0.7669000244140625, + "Acc.painting": 0.8626000213623047, + "Acc.sofa": 0.8062000274658203, + "Acc.shelf": 0.6127999877929687, + "Acc.house": 0.7330000305175781, + "Acc.sea": 0.865, + "Acc.mirror": 0.7725, + "Acc.rug": 0.725, + "Acc.field": 0.5779999923706055, + "Acc.armchair": 0.7055000305175781, + "Acc.seat": 0.8290000152587891, + "Acc.fence": 0.6513999938964844, + "Acc.desk": 0.6873000335693359, + "Acc.rock": 0.7231999969482422, + "Acc.wardrobe": 0.6925, + "Acc.lamp": 0.7027999877929687, + "Acc.bathtub": 0.8858999633789062, + "Acc.railing": 0.535, + "Acc.cushion": 0.7331999969482422, + "Acc.base": 0.5645000076293946, + "Acc.box": 0.3295000076293945, + "Acc.column": 0.6136999893188476, + "Acc.signboard": 0.4727000045776367, + "Acc.chest of drawers": 0.5761000061035156, + "Acc.counter": 0.49599998474121093, + "Acc.sand": 0.6338000106811523, + "Acc.sink": 0.7394999694824219, + "Acc.skyscraper": 0.5968000030517578, + "Acc.fireplace": 0.8931999969482421, + "Acc.refrigerator": 0.8466999816894532, + "Acc.grandstand": 0.7795999908447265, + "Acc.path": 0.255, + "Acc.stairs": 0.37470001220703125, + "Acc.runway": 0.7823999786376953, + "Acc.case": 0.7887999725341797, + "Acc.pool table": 0.9669000244140625, + "Acc.pillow": 0.6769000244140625, + "Acc.screen door": 0.6983000183105469, + "Acc.stairway": 0.4625, + "Acc.river": 0.276200008392334, + "Acc.bridge": 0.6288000106811523, + "Acc.bookcase": 0.5756999969482421, + "Acc.blind": 0.4518999862670898, + "Acc.coffee table": 0.8145999908447266, + "Acc.toilet": 0.8983000183105468, + "Acc.flower": 0.5679999923706055, + "Acc.book": 0.6320000076293946, + "Acc.hill": 0.2570000076293945, + "Acc.bench": 0.5236000061035156, + "Acc.countertop": 0.6858999633789062, + "Acc.stove": 0.8231999969482422, + "Acc.palm": 0.7136000061035156, + "Acc.kitchen island": 0.6973999786376953, + "Acc.computer": 0.8926000213623047, + "Acc.swivel chair": 0.6991999816894531, + "Acc.boat": 0.8384999847412109, + "Acc.bar": 0.667300033569336, + "Acc.arcade machine": 0.8477999877929687, + "Acc.hovel": 0.5884000015258789, + "Acc.bus": 0.9301000213623047, + "Acc.towel": 0.7644999694824218, + "Acc.light": 0.36580001831054687, + "Acc.truck": 0.39310001373291015, + "Acc.tower": 0.3690999984741211, + "Acc.chandelier": 0.7841999816894532, + "Acc.awning": 0.36520000457763674, + "Acc.streetlight": 0.20020000457763673, + "Acc.booth": 0.47139999389648435, + "Acc.television receiver": 0.8254000091552735, + "Acc.airplane": 0.7080999755859375, + "Acc.dirt track": 0.013300000429153443, + "Acc.apparel": 0.48270000457763673, + "Acc.pole": 0.2709000015258789, + "Acc.land": 0.03819999933242798, + "Acc.bannister": 0.16530000686645507, + "Acc.escalator": 0.8311000061035156, + "Acc.ottoman": 0.6641999816894532, + "Acc.bottle": 0.6354999923706055, + "Acc.buffet": 0.4613999938964844, + "Acc.poster": 0.39560001373291015, + "Acc.stage": 0.2368000030517578, + "Acc.van": 0.445099983215332, + "Acc.ship": 0.12420000076293945, + "Acc.fountain": 0.21690000534057619, + "Acc.conveyer belt": 0.9213999938964844, + "Acc.canopy": 0.2830999946594238, + "Acc.washer": 0.7598000335693359, + "Acc.plaything": 0.5822000122070312, + "Acc.swimming pool": 0.8141999816894532, + "Acc.stool": 0.37029998779296874, + "Acc.barrel": 0.6459999847412109, + "Acc.basket": 0.3347999954223633, + "Acc.waterfall": 0.5963000106811523, + "Acc.tent": 0.9831999969482422, + "Acc.bag": 0.1390999984741211, + "Acc.minibike": 0.6713999938964844, + "Acc.cradle": 0.9662999725341797, + "Acc.oven": 0.6397999954223633, + "Acc.ball": 0.3375, + "Acc.food": 0.654000015258789, + "Acc.step": 0.11229999542236328, + "Acc.tank": 0.6576999664306641, + "Acc.trade name": 0.3177000045776367, + "Acc.microwave": 0.9027999877929688, + "Acc.pot": 0.4911000061035156, + "Acc.animal": 0.67, + "Acc.bicycle": 0.7222000122070312, + "Acc.lake": 0.15140000343322754, + "Acc.dishwasher": 0.6136999893188476, + "Acc.screen": 0.7191999816894531, + "Acc.blanket": 0.16350000381469726, + "Acc.sculpture": 0.7891000366210937, + "Acc.hood": 0.6551000213623047, + "Acc.sconce": 0.4141999816894531, + "Acc.vase": 0.41759998321533204, + "Acc.traffic light": 0.36509998321533205, + "Acc.tray": 0.01850000023841858, + "Acc.ashcan": 0.5013000106811524, + "Acc.fan": 0.5956999969482422, + "Acc.pier": 0.5670999908447265, + "Acc.crt screen": 0.08960000038146973, + "Acc.plate": 0.6454000091552734, + "Acc.monitor": 0.34450000762939453, + "Acc.bulletin board": 0.4829999923706055, + "Acc.shower": 0.05, + "Acc.radiator": 0.6190000152587891, + "Acc.glass": 0.10920000076293945, + "Acc.clock": 0.34330001831054685, + "Acc.flag": 0.6662000274658203 + } + }, + "22": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8367, + "mIoU": 0.4982, + "mAcc": 0.6082, + "IoU.wall": 0.7798999786376953, + "IoU.building": 0.8305999755859375, + "IoU.sky": 0.9383999633789063, + "IoU.floor": 0.82, + "IoU.tree": 0.7466000366210938, + "IoU.ceiling": 0.8405000305175782, + "IoU.road": 0.8295999908447266, + "IoU.bed ": 0.9001000213623047, + "IoU.windowpane": 0.6165000152587891, + "IoU.grass": 0.6990000152587891, + "IoU.cabinet": 0.6388999938964843, + "IoU.sidewalk": 0.653499984741211, + "IoU.person": 0.8113999938964844, + "IoU.earth": 0.40830001831054685, + "IoU.door": 0.519000015258789, + "IoU.table": 0.6138000106811523, + "IoU.mountain": 0.5990999984741211, + "IoU.plant": 0.5320999908447266, + "IoU.curtain": 0.7380000305175781, + "IoU.chair": 0.5738000106811524, + "IoU.car": 0.8462000274658203, + "IoU.water": 0.5641999816894532, + "IoU.painting": 0.7081999969482422, + "IoU.sofa": 0.6798000335693359, + "IoU.shelf": 0.43040000915527343, + "IoU.house": 0.4897999954223633, + "IoU.sea": 0.6694000244140625, + "IoU.mirror": 0.6944999694824219, + "IoU.rug": 0.6379999923706055, + "IoU.field": 0.3940999984741211, + "IoU.armchair": 0.4484000015258789, + "IoU.seat": 0.6729000091552735, + "IoU.fence": 0.47259998321533203, + "IoU.desk": 0.49200000762939455, + "IoU.rock": 0.5202000045776367, + "IoU.wardrobe": 0.5652999877929688, + "IoU.lamp": 0.5641999816894532, + "IoU.bathtub": 0.8462999725341797, + "IoU.railing": 0.3965000152587891, + "IoU.cushion": 0.6006000137329102, + "IoU.base": 0.31290000915527344, + "IoU.box": 0.24159999847412109, + "IoU.column": 0.4822999954223633, + "IoU.signboard": 0.3565999984741211, + "IoU.chest of drawers": 0.35189998626708985, + "IoU.counter": 0.38159999847412107, + "IoU.sand": 0.5227999877929688, + "IoU.sink": 0.7041999816894531, + "IoU.skyscraper": 0.49220001220703125, + "IoU.fireplace": 0.711500015258789, + "IoU.refrigerator": 0.7491999816894531, + "IoU.grandstand": 0.48900001525878906, + "IoU.path": 0.20059999465942382, + "IoU.stairs": 0.2627000045776367, + "IoU.runway": 0.6365000152587891, + "IoU.case": 0.5872999954223633, + "IoU.pool table": 0.9343000030517579, + "IoU.pillow": 0.5733000183105469, + "IoU.screen door": 0.5752999877929688, + "IoU.stairway": 0.40560001373291016, + "IoU.river": 0.15310000419616698, + "IoU.bridge": 0.5683000183105469, + "IoU.bookcase": 0.3479000091552734, + "IoU.blind": 0.4015000152587891, + "IoU.coffee table": 0.64, + "IoU.toilet": 0.845999984741211, + "IoU.flower": 0.43450000762939456, + "IoU.book": 0.43590000152587893, + "IoU.hill": 0.13109999656677246, + "IoU.bench": 0.45380001068115233, + "IoU.countertop": 0.5659999847412109, + "IoU.stove": 0.7638999938964843, + "IoU.palm": 0.48380001068115236, + "IoU.kitchen island": 0.43220001220703125, + "IoU.computer": 0.7108999633789063, + "IoU.swivel chair": 0.55, + "IoU.boat": 0.6570999908447266, + "IoU.bar": 0.5145999908447265, + "IoU.arcade machine": 0.617599983215332, + "IoU.hovel": 0.27469999313354493, + "IoU.bus": 0.9041000366210937, + "IoU.towel": 0.6206999969482422, + "IoU.light": 0.2861000061035156, + "IoU.truck": 0.322599983215332, + "IoU.tower": 0.26, + "IoU.chandelier": 0.6448999786376953, + "IoU.awning": 0.25809999465942385, + "IoU.streetlight": 0.17379999160766602, + "IoU.booth": 0.3265000152587891, + "IoU.television receiver": 0.709800033569336, + "IoU.airplane": 0.6241999816894531, + "IoU.dirt track": 0.030199999809265136, + "IoU.apparel": 0.3181999969482422, + "IoU.pole": 0.17920000076293946, + "IoU.land": 0.055, + "IoU.bannister": 0.06449999809265136, + "IoU.escalator": 0.5856999969482422, + "IoU.ottoman": 0.5088000106811523, + "IoU.bottle": 0.38029998779296875, + "IoU.buffet": 0.4520999908447266, + "IoU.poster": 0.35380001068115235, + "IoU.stage": 0.14260000228881836, + "IoU.van": 0.2979999923706055, + "IoU.ship": 0.7502999877929688, + "IoU.fountain": 0.24569999694824218, + "IoU.conveyer belt": 0.7276000213623047, + "IoU.canopy": 0.2752000045776367, + "IoU.washer": 0.6783000183105469, + "IoU.plaything": 0.354900016784668, + "IoU.swimming pool": 0.77, + "IoU.stool": 0.3763999938964844, + "IoU.barrel": 0.37740001678466795, + "IoU.basket": 0.27790000915527346, + "IoU.waterfall": 0.524000015258789, + "IoU.tent": 0.9543000030517578, + "IoU.bag": 0.15720000267028808, + "IoU.minibike": 0.7208999633789063, + "IoU.cradle": 0.7816999816894531, + "IoU.oven": 0.44459999084472657, + "IoU.ball": 0.44810001373291014, + "IoU.food": 0.5454999923706054, + "IoU.step": 0.08399999618530274, + "IoU.tank": 0.5681000137329102, + "IoU.trade name": 0.2528000068664551, + "IoU.microwave": 0.8205000305175781, + "IoU.pot": 0.494900016784668, + "IoU.animal": 0.6611000061035156, + "IoU.bicycle": 0.566500015258789, + "IoU.lake": 0.6468000030517578, + "IoU.dishwasher": 0.6202000045776367, + "IoU.screen": 0.5177999877929688, + "IoU.blanket": 0.17709999084472655, + "IoU.sculpture": 0.6523999786376953, + "IoU.hood": 0.5059000015258789, + "IoU.sconce": 0.27719999313354493, + "IoU.vase": 0.31709999084472656, + "IoU.traffic light": 0.2546999931335449, + "IoU.tray": 0.023599998950958254, + "IoU.ashcan": 0.3502999877929687, + "IoU.fan": 0.42689998626708986, + "IoU.pier": 0.3072999954223633, + "IoU.crt screen": 0.011799999475479127, + "IoU.plate": 0.4986999893188477, + "IoU.monitor": 0.17969999313354493, + "IoU.bulletin board": 0.41009998321533203, + "IoU.shower": 0.008600000143051147, + "IoU.radiator": 0.5541999816894532, + "IoU.glass": 0.10729999542236328, + "IoU.clock": 0.3159000015258789, + "IoU.flag": 0.47880001068115235, + "Acc.wall": 0.89, + "Acc.building": 0.927300033569336, + "Acc.sky": 0.9780999755859375, + "Acc.floor": 0.9081999969482422, + "Acc.tree": 0.8708000183105469, + "Acc.ceiling": 0.9305999755859375, + "Acc.road": 0.9030000305175782, + "Acc.bed ": 0.9588999938964844, + "Acc.windowpane": 0.7725, + "Acc.grass": 0.8009999847412109, + "Acc.cabinet": 0.7502999877929688, + "Acc.sidewalk": 0.7908999633789062, + "Acc.person": 0.9215000152587891, + "Acc.earth": 0.5834999847412109, + "Acc.door": 0.6962999725341796, + "Acc.table": 0.7702999877929687, + "Acc.mountain": 0.7301999664306641, + "Acc.plant": 0.6652999877929687, + "Acc.curtain": 0.847699966430664, + "Acc.chair": 0.6937000274658203, + "Acc.car": 0.9333000183105469, + "Acc.water": 0.7188999938964844, + "Acc.painting": 0.8363999938964843, + "Acc.sofa": 0.826500015258789, + "Acc.shelf": 0.6091999816894531, + "Acc.house": 0.6956999969482421, + "Acc.sea": 0.8808999633789063, + "Acc.mirror": 0.7759999847412109, + "Acc.rug": 0.7233000183105469, + "Acc.field": 0.5888000106811524, + "Acc.armchair": 0.6501000213623047, + "Acc.seat": 0.8427999877929687, + "Acc.fence": 0.6361000061035156, + "Acc.desk": 0.6741000366210937, + "Acc.rock": 0.7277999877929687, + "Acc.wardrobe": 0.6791000366210938, + "Acc.lamp": 0.6731999969482422, + "Acc.bathtub": 0.888499984741211, + "Acc.railing": 0.5218000030517578, + "Acc.cushion": 0.7531999969482421, + "Acc.base": 0.5238000106811523, + "Acc.box": 0.30190000534057615, + "Acc.column": 0.6002999877929688, + "Acc.signboard": 0.4725, + "Acc.chest of drawers": 0.5702999877929688, + "Acc.counter": 0.5175, + "Acc.sand": 0.6825, + "Acc.sink": 0.772300033569336, + "Acc.skyscraper": 0.615, + "Acc.fireplace": 0.9056999969482422, + "Acc.refrigerator": 0.8137999725341797, + "Acc.grandstand": 0.7455999755859375, + "Acc.path": 0.26930000305175783, + "Acc.stairs": 0.3534000015258789, + "Acc.runway": 0.8122000122070312, + "Acc.case": 0.7276000213623047, + "Acc.pool table": 0.9631999969482422, + "Acc.pillow": 0.6536000061035157, + "Acc.screen door": 0.6518000030517578, + "Acc.stairway": 0.5325, + "Acc.river": 0.28969999313354494, + "Acc.bridge": 0.6688999938964844, + "Acc.bookcase": 0.5934999847412109, + "Acc.blind": 0.445, + "Acc.coffee table": 0.7894999694824218, + "Acc.toilet": 0.8958000183105469, + "Acc.flower": 0.5979999923706054, + "Acc.book": 0.6004999923706055, + "Acc.hill": 0.24139999389648437, + "Acc.bench": 0.5475, + "Acc.countertop": 0.7166999816894531, + "Acc.stove": 0.8329000091552734, + "Acc.palm": 0.6698000335693359, + "Acc.kitchen island": 0.5684000015258789, + "Acc.computer": 0.845999984741211, + "Acc.swivel chair": 0.6980000305175781, + "Acc.boat": 0.8311000061035156, + "Acc.bar": 0.6213000106811524, + "Acc.arcade machine": 0.6588999938964843, + "Acc.hovel": 0.30120000839233396, + "Acc.bus": 0.9433999633789063, + "Acc.towel": 0.7679000091552735, + "Acc.light": 0.29920000076293946, + "Acc.truck": 0.45439998626708983, + "Acc.tower": 0.37189998626708987, + "Acc.chandelier": 0.7698000335693359, + "Acc.awning": 0.29600000381469727, + "Acc.streetlight": 0.205, + "Acc.booth": 0.4279999923706055, + "Acc.television receiver": 0.797300033569336, + "Acc.airplane": 0.6919999694824219, + "Acc.dirt track": 0.13489999771118164, + "Acc.apparel": 0.4386999893188477, + "Acc.pole": 0.23049999237060548, + "Acc.land": 0.07409999847412109, + "Acc.bannister": 0.07869999885559081, + "Acc.escalator": 0.7741999816894531, + "Acc.ottoman": 0.6712000274658203, + "Acc.bottle": 0.5983000183105469, + "Acc.buffet": 0.5309000015258789, + "Acc.poster": 0.5377000045776367, + "Acc.stage": 0.23290000915527342, + "Acc.van": 0.3584999847412109, + "Acc.ship": 0.7926000213623047, + "Acc.fountain": 0.25059999465942384, + "Acc.conveyer belt": 0.9204000091552734, + "Acc.canopy": 0.32880001068115233, + "Acc.washer": 0.7533000183105468, + "Acc.plaything": 0.6140999984741211, + "Acc.swimming pool": 0.8773999786376954, + "Acc.stool": 0.44290000915527344, + "Acc.barrel": 0.6451000213623047, + "Acc.basket": 0.35400001525878905, + "Acc.waterfall": 0.6322999954223633, + "Acc.tent": 0.9780000305175781, + "Acc.bag": 0.18299999237060546, + "Acc.minibike": 0.8076000213623047, + "Acc.cradle": 0.9583999633789062, + "Acc.oven": 0.5279000091552735, + "Acc.ball": 0.4995000076293945, + "Acc.food": 0.6302000045776367, + "Acc.step": 0.10939999580383301, + "Acc.tank": 0.6530000305175782, + "Acc.trade name": 0.27950000762939453, + "Acc.microwave": 0.9038999938964843, + "Acc.pot": 0.5797000122070313, + "Acc.animal": 0.7044000244140625, + "Acc.bicycle": 0.685, + "Acc.lake": 0.6890000152587891, + "Acc.dishwasher": 0.7258999633789063, + "Acc.screen": 0.7241000366210938, + "Acc.blanket": 0.19290000915527344, + "Acc.sculpture": 0.8108000183105468, + "Acc.hood": 0.6325999832153321, + "Acc.sconce": 0.34939998626708985, + "Acc.vase": 0.4391999816894531, + "Acc.traffic light": 0.36959999084472656, + "Acc.tray": 0.02619999885559082, + "Acc.ashcan": 0.5081999969482421, + "Acc.fan": 0.4759000015258789, + "Acc.pier": 0.4259999847412109, + "Acc.crt screen": 0.02940000057220459, + "Acc.plate": 0.6704000091552734, + "Acc.monitor": 0.24709999084472656, + "Acc.bulletin board": 0.5361999893188476, + "Acc.shower": 0.04929999828338623, + "Acc.radiator": 0.6352000045776367, + "Acc.glass": 0.11069999694824219, + "Acc.clock": 0.35119998931884766, + "Acc.flag": 0.5125999832153321 + } + }, + "23": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8362999999999999, + "mIoU": 0.4938, + "mAcc": 0.5938, + "IoU.wall": 0.7758000183105469, + "IoU.building": 0.8319999694824218, + "IoU.sky": 0.9363999938964844, + "IoU.floor": 0.8180999755859375, + "IoU.tree": 0.7427999877929687, + "IoU.ceiling": 0.8373000335693359, + "IoU.road": 0.8290000152587891, + "IoU.bed ": 0.897300033569336, + "IoU.windowpane": 0.6190000152587891, + "IoU.grass": 0.6806999969482422, + "IoU.cabinet": 0.6383000183105468, + "IoU.sidewalk": 0.6561000061035156, + "IoU.person": 0.8126999664306641, + "IoU.earth": 0.40110000610351565, + "IoU.door": 0.5172000122070313, + "IoU.table": 0.619900016784668, + "IoU.mountain": 0.5981000137329101, + "IoU.plant": 0.5377000045776367, + "IoU.curtain": 0.747699966430664, + "IoU.chair": 0.5638999938964844, + "IoU.car": 0.8466000366210937, + "IoU.water": 0.5668000030517578, + "IoU.painting": 0.7273999786376953, + "IoU.sofa": 0.6862000274658203, + "IoU.shelf": 0.43720001220703125, + "IoU.house": 0.4797999954223633, + "IoU.sea": 0.6698000335693359, + "IoU.mirror": 0.7116999816894531, + "IoU.rug": 0.6519999694824219, + "IoU.field": 0.3656999969482422, + "IoU.armchair": 0.44729999542236326, + "IoU.seat": 0.6725, + "IoU.fence": 0.41770000457763673, + "IoU.desk": 0.5118999862670899, + "IoU.rock": 0.5133000183105468, + "IoU.wardrobe": 0.5393000030517578, + "IoU.lamp": 0.5702999877929688, + "IoU.bathtub": 0.8529000091552734, + "IoU.railing": 0.4022000122070313, + "IoU.cushion": 0.6061999893188477, + "IoU.base": 0.3295000076293945, + "IoU.box": 0.253700008392334, + "IoU.column": 0.49040000915527343, + "IoU.signboard": 0.37599998474121094, + "IoU.chest of drawers": 0.35630001068115236, + "IoU.counter": 0.38159999847412107, + "IoU.sand": 0.527400016784668, + "IoU.sink": 0.7091000366210938, + "IoU.skyscraper": 0.4940999984741211, + "IoU.fireplace": 0.7122000122070312, + "IoU.refrigerator": 0.7513999938964844, + "IoU.grandstand": 0.49779998779296875, + "IoU.path": 0.20280000686645508, + "IoU.stairs": 0.22149999618530272, + "IoU.runway": 0.6537999725341797, + "IoU.case": 0.5606999969482422, + "IoU.pool table": 0.9337999725341797, + "IoU.pillow": 0.5597999954223633, + "IoU.screen door": 0.7301000213623047, + "IoU.stairway": 0.32, + "IoU.river": 0.15869999885559083, + "IoU.bridge": 0.4697999954223633, + "IoU.bookcase": 0.3431999969482422, + "IoU.blind": 0.39599998474121095, + "IoU.coffee table": 0.6508999633789062, + "IoU.toilet": 0.8520999908447265, + "IoU.flower": 0.37029998779296874, + "IoU.book": 0.4633000183105469, + "IoU.hill": 0.14010000228881836, + "IoU.bench": 0.43560001373291013, + "IoU.countertop": 0.5700999832153321, + "IoU.stove": 0.731500015258789, + "IoU.palm": 0.5088000106811523, + "IoU.kitchen island": 0.445099983215332, + "IoU.computer": 0.6918000030517578, + "IoU.swivel chair": 0.5270000076293946, + "IoU.boat": 0.7412999725341797, + "IoU.bar": 0.5906000137329102, + "IoU.arcade machine": 0.545099983215332, + "IoU.hovel": 0.4166999816894531, + "IoU.bus": 0.9023999786376953, + "IoU.towel": 0.6612999725341797, + "IoU.light": 0.30309999465942383, + "IoU.truck": 0.21829999923706056, + "IoU.tower": 0.18610000610351562, + "IoU.chandelier": 0.6443000030517578, + "IoU.awning": 0.2468000030517578, + "IoU.streetlight": 0.16739999771118164, + "IoU.booth": 0.35139999389648435, + "IoU.television receiver": 0.6962000274658203, + "IoU.airplane": 0.5915999984741211, + "IoU.dirt track": 0.07289999961853028, + "IoU.apparel": 0.34450000762939453, + "IoU.pole": 0.151899995803833, + "IoU.land": 0.02490000009536743, + "IoU.bannister": 0.090600004196167, + "IoU.escalator": 0.6122999954223632, + "IoU.ottoman": 0.49529998779296874, + "IoU.bottle": 0.3193000030517578, + "IoU.buffet": 0.3454000091552734, + "IoU.poster": 0.2539999961853027, + "IoU.stage": 0.16290000915527345, + "IoU.van": 0.3715999984741211, + "IoU.ship": 0.12930000305175782, + "IoU.fountain": 0.15479999542236328, + "IoU.conveyer belt": 0.7291000366210938, + "IoU.canopy": 0.2256999969482422, + "IoU.washer": 0.7491999816894531, + "IoU.plaything": 0.37270000457763675, + "IoU.swimming pool": 0.8019000244140625, + "IoU.stool": 0.392599983215332, + "IoU.barrel": 0.5731999969482422, + "IoU.basket": 0.34099998474121096, + "IoU.waterfall": 0.4577000045776367, + "IoU.tent": 0.9580000305175781, + "IoU.bag": 0.1734000015258789, + "IoU.minibike": 0.7120999908447265, + "IoU.cradle": 0.8123000335693359, + "IoU.oven": 0.23639999389648436, + "IoU.ball": 0.5797999954223633, + "IoU.food": 0.5302999877929687, + "IoU.step": 0.08850000381469726, + "IoU.tank": 0.5638000106811524, + "IoU.trade name": 0.2677000045776367, + "IoU.microwave": 0.5277000045776368, + "IoU.pot": 0.48069999694824217, + "IoU.animal": 0.5934000015258789, + "IoU.bicycle": 0.5758000183105468, + "IoU.lake": 0.46029998779296877, + "IoU.dishwasher": 0.6869999694824219, + "IoU.screen": 0.5877999877929687, + "IoU.blanket": 0.14859999656677247, + "IoU.sculpture": 0.6962999725341796, + "IoU.hood": 0.5122999954223633, + "IoU.sconce": 0.2947999954223633, + "IoU.vase": 0.33810001373291015, + "IoU.traffic light": 0.266200008392334, + "IoU.tray": 0.04070000171661377, + "IoU.ashcan": 0.39099998474121095, + "IoU.fan": 0.4102999877929687, + "IoU.pier": 0.2805999946594238, + "IoU.crt screen": 0.0705999994277954, + "IoU.plate": 0.5315000152587891, + "IoU.monitor": 0.5202999877929687, + "IoU.bulletin board": 0.42130001068115236, + "IoU.shower": 0.03059999942779541, + "IoU.radiator": 0.5693999862670899, + "IoU.glass": 0.061100001335144045, + "IoU.clock": 0.29739999771118164, + "IoU.flag": 0.3591999816894531, + "Acc.wall": 0.9013999938964844, + "Acc.building": 0.9325, + "Acc.sky": 0.9777999877929687, + "Acc.floor": 0.9168000030517578, + "Acc.tree": 0.8708000183105469, + "Acc.ceiling": 0.9319999694824219, + "Acc.road": 0.9108000183105469, + "Acc.bed ": 0.9566999816894531, + "Acc.windowpane": 0.7686000061035156, + "Acc.grass": 0.7954000091552734, + "Acc.cabinet": 0.7447000122070313, + "Acc.sidewalk": 0.7816999816894531, + "Acc.person": 0.9180999755859375, + "Acc.earth": 0.5638999938964844, + "Acc.door": 0.668499984741211, + "Acc.table": 0.7733000183105468, + "Acc.mountain": 0.7308999633789063, + "Acc.plant": 0.6579000091552735, + "Acc.curtain": 0.8501999664306641, + "Acc.chair": 0.6651000213623047, + "Acc.car": 0.9277999877929688, + "Acc.water": 0.7277999877929687, + "Acc.painting": 0.845, + "Acc.sofa": 0.8684999847412109, + "Acc.shelf": 0.6252000045776367, + "Acc.house": 0.6322000122070313, + "Acc.sea": 0.8631999969482422, + "Acc.mirror": 0.7801000213623047, + "Acc.rug": 0.7377999877929687, + "Acc.field": 0.5836999893188477, + "Acc.armchair": 0.6295999908447265, + "Acc.seat": 0.8363999938964843, + "Acc.fence": 0.5547000122070312, + "Acc.desk": 0.6929000091552734, + "Acc.rock": 0.6912999725341797, + "Acc.wardrobe": 0.6401000213623047, + "Acc.lamp": 0.6647000122070312, + "Acc.bathtub": 0.8954000091552734, + "Acc.railing": 0.5320000076293945, + "Acc.cushion": 0.734800033569336, + "Acc.base": 0.5125, + "Acc.box": 0.3218000030517578, + "Acc.column": 0.5963999938964843, + "Acc.signboard": 0.4679000091552734, + "Acc.chest of drawers": 0.549099998474121, + "Acc.counter": 0.4866999816894531, + "Acc.sand": 0.7076000213623047, + "Acc.sink": 0.7716000366210938, + "Acc.skyscraper": 0.595, + "Acc.fireplace": 0.8668000030517579, + "Acc.refrigerator": 0.8079000091552735, + "Acc.grandstand": 0.7576999664306641, + "Acc.path": 0.2725, + "Acc.stairs": 0.2922999954223633, + "Acc.runway": 0.8398999786376953, + "Acc.case": 0.7423999786376954, + "Acc.pool table": 0.9670999908447265, + "Acc.pillow": 0.6313000106811524, + "Acc.screen door": 0.7977999877929688, + "Acc.stairway": 0.46450000762939453, + "Acc.river": 0.3057999992370605, + "Acc.bridge": 0.5361999893188476, + "Acc.bookcase": 0.5734999847412109, + "Acc.blind": 0.43290000915527344, + "Acc.coffee table": 0.7998999786376954, + "Acc.toilet": 0.8883000183105468, + "Acc.flower": 0.49380001068115237, + "Acc.book": 0.6158000183105469, + "Acc.hill": 0.23540000915527343, + "Acc.bench": 0.5004999923706055, + "Acc.countertop": 0.7, + "Acc.stove": 0.8158999633789062, + "Acc.palm": 0.6798999786376954, + "Acc.kitchen island": 0.5781000137329102, + "Acc.computer": 0.7919000244140625, + "Acc.swivel chair": 0.6675, + "Acc.boat": 0.8112999725341797, + "Acc.bar": 0.7073000335693359, + "Acc.arcade machine": 0.575999984741211, + "Acc.hovel": 0.44209999084472656, + "Acc.bus": 0.9263999938964844, + "Acc.towel": 0.7534999847412109, + "Acc.light": 0.3209000015258789, + "Acc.truck": 0.288700008392334, + "Acc.tower": 0.2748999977111816, + "Acc.chandelier": 0.7787000274658203, + "Acc.awning": 0.2727000045776367, + "Acc.streetlight": 0.2034000015258789, + "Acc.booth": 0.3756999969482422, + "Acc.television receiver": 0.7727999877929688, + "Acc.airplane": 0.6415000152587891, + "Acc.dirt track": 0.3060000038146973, + "Acc.apparel": 0.47720001220703123, + "Acc.pole": 0.18670000076293947, + "Acc.land": 0.032699999809265134, + "Acc.bannister": 0.10779999732971192, + "Acc.escalator": 0.8016000366210938, + "Acc.ottoman": 0.6433999633789063, + "Acc.bottle": 0.43380001068115237, + "Acc.buffet": 0.4025, + "Acc.poster": 0.48330001831054686, + "Acc.stage": 0.22280000686645507, + "Acc.van": 0.44459999084472657, + "Acc.ship": 0.1356999969482422, + "Acc.fountain": 0.15640000343322755, + "Acc.conveyer belt": 0.9230999755859375, + "Acc.canopy": 0.24200000762939453, + "Acc.washer": 0.7712999725341797, + "Acc.plaything": 0.46189998626708983, + "Acc.swimming pool": 0.9070999908447266, + "Acc.stool": 0.4829999923706055, + "Acc.barrel": 0.6459999847412109, + "Acc.basket": 0.41639999389648436, + "Acc.waterfall": 0.5731999969482422, + "Acc.tent": 0.9733999633789062, + "Acc.bag": 0.19959999084472657, + "Acc.minibike": 0.7855000305175781, + "Acc.cradle": 0.9533999633789062, + "Acc.oven": 0.5349000167846679, + "Acc.ball": 0.6537999725341797, + "Acc.food": 0.6027999877929687, + "Acc.step": 0.1125, + "Acc.tank": 0.6512999725341797, + "Acc.trade name": 0.2919000053405762, + "Acc.microwave": 0.5727999877929687, + "Acc.pot": 0.5411999893188476, + "Acc.animal": 0.6181000137329101, + "Acc.bicycle": 0.6744999694824219, + "Acc.lake": 0.611599998474121, + "Acc.dishwasher": 0.7258000183105469, + "Acc.screen": 0.705999984741211, + "Acc.blanket": 0.15930000305175782, + "Acc.sculpture": 0.7804000091552734, + "Acc.hood": 0.6356999969482422, + "Acc.sconce": 0.35709999084472654, + "Acc.vase": 0.4186000061035156, + "Acc.traffic light": 0.36080001831054687, + "Acc.tray": 0.04550000190734863, + "Acc.ashcan": 0.5568000030517578, + "Acc.fan": 0.47819999694824217, + "Acc.pier": 0.40759998321533203, + "Acc.crt screen": 0.1018000030517578, + "Acc.plate": 0.6729000091552735, + "Acc.monitor": 0.7155000305175782, + "Acc.bulletin board": 0.4916999816894531, + "Acc.shower": 0.0425, + "Acc.radiator": 0.6120999908447265, + "Acc.glass": 0.06219999790191651, + "Acc.clock": 0.31690000534057616, + "Acc.flag": 0.39919998168945314 + } + }, + "24": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8341, + "mIoU": 0.4844, + "mAcc": 0.5743, + "IoU.wall": 0.7744000244140625, + "IoU.building": 0.8269999694824218, + "IoU.sky": 0.9280999755859375, + "IoU.floor": 0.8123000335693359, + "IoU.tree": 0.7237999725341797, + "IoU.ceiling": 0.8369000244140625, + "IoU.road": 0.8251999664306641, + "IoU.bed ": 0.8906999969482422, + "IoU.windowpane": 0.6165999984741211, + "IoU.grass": 0.6813999938964844, + "IoU.cabinet": 0.6497000122070312, + "IoU.sidewalk": 0.653499984741211, + "IoU.person": 0.8129000091552734, + "IoU.earth": 0.3983000183105469, + "IoU.door": 0.5058000183105469, + "IoU.table": 0.6190999984741211, + "IoU.mountain": 0.5913999938964843, + "IoU.plant": 0.5308000183105469, + "IoU.curtain": 0.7411000061035157, + "IoU.chair": 0.5593000030517579, + "IoU.car": 0.8398999786376953, + "IoU.water": 0.589900016784668, + "IoU.painting": 0.7116999816894531, + "IoU.sofa": 0.6794000244140626, + "IoU.shelf": 0.43400001525878906, + "IoU.house": 0.4840999984741211, + "IoU.sea": 0.6765000152587891, + "IoU.mirror": 0.6848000335693359, + "IoU.rug": 0.6343999862670898, + "IoU.field": 0.3818000030517578, + "IoU.armchair": 0.41400001525878904, + "IoU.seat": 0.6744999694824219, + "IoU.fence": 0.40369998931884765, + "IoU.desk": 0.5054000091552734, + "IoU.rock": 0.5333000183105469, + "IoU.wardrobe": 0.5545000076293946, + "IoU.lamp": 0.5488000106811524, + "IoU.bathtub": 0.8369000244140625, + "IoU.railing": 0.4008000183105469, + "IoU.cushion": 0.5977999877929687, + "IoU.base": 0.2951000022888184, + "IoU.box": 0.25760000228881835, + "IoU.column": 0.4631999969482422, + "IoU.signboard": 0.34720001220703123, + "IoU.chest of drawers": 0.37130001068115237, + "IoU.counter": 0.38119998931884763, + "IoU.sand": 0.47630001068115235, + "IoU.sink": 0.7026000213623047, + "IoU.skyscraper": 0.47560001373291017, + "IoU.fireplace": 0.7194999694824219, + "IoU.refrigerator": 0.7566999816894531, + "IoU.grandstand": 0.5018999862670899, + "IoU.path": 0.21610000610351562, + "IoU.stairs": 0.1581999969482422, + "IoU.runway": 0.6451000213623047, + "IoU.case": 0.5243000030517578, + "IoU.pool table": 0.9305000305175781, + "IoU.pillow": 0.49590000152587893, + "IoU.screen door": 0.7337000274658203, + "IoU.stairway": 0.2984000015258789, + "IoU.river": 0.17440000534057618, + "IoU.bridge": 0.5336000061035157, + "IoU.bookcase": 0.3097999954223633, + "IoU.blind": 0.37810001373291013, + "IoU.coffee table": 0.6468000030517578, + "IoU.toilet": 0.8380999755859375, + "IoU.flower": 0.38299999237060545, + "IoU.book": 0.43189998626708986, + "IoU.hill": 0.13430000305175782, + "IoU.bench": 0.47299999237060547, + "IoU.countertop": 0.5556000137329101, + "IoU.stove": 0.7397000122070313, + "IoU.palm": 0.4508000183105469, + "IoU.kitchen island": 0.46610000610351565, + "IoU.computer": 0.7566999816894531, + "IoU.swivel chair": 0.5275, + "IoU.boat": 0.6265999984741211, + "IoU.bar": 0.575999984741211, + "IoU.arcade machine": 0.41049999237060547, + "IoU.hovel": 0.2827000045776367, + "IoU.bus": 0.9080999755859375, + "IoU.towel": 0.6431999969482421, + "IoU.light": 0.29690000534057615, + "IoU.truck": 0.22430000305175782, + "IoU.tower": 0.16959999084472657, + "IoU.chandelier": 0.6204999923706055, + "IoU.awning": 0.22870000839233398, + "IoU.streetlight": 0.17469999313354492, + "IoU.booth": 0.35630001068115236, + "IoU.television receiver": 0.6894000244140625, + "IoU.airplane": 0.5852999877929688, + "IoU.dirt track": 0.056500000953674315, + "IoU.apparel": 0.3295000076293945, + "IoU.pole": 0.11939999580383301, + "IoU.land": 0.055399999618530274, + "IoU.bannister": 0.10579999923706054, + "IoU.escalator": 0.5983000183105469, + "IoU.ottoman": 0.4659000015258789, + "IoU.bottle": 0.1875, + "IoU.buffet": 0.35619998931884767, + "IoU.poster": 0.1606999969482422, + "IoU.stage": 0.1352999973297119, + "IoU.van": 0.3025, + "IoU.ship": 0.11069999694824219, + "IoU.fountain": 0.22870000839233398, + "IoU.conveyer belt": 0.7294999694824219, + "IoU.canopy": 0.18690000534057616, + "IoU.washer": 0.7354000091552735, + "IoU.plaything": 0.4433000183105469, + "IoU.swimming pool": 0.7876000213623047, + "IoU.stool": 0.38040000915527344, + "IoU.barrel": 0.5852000045776368, + "IoU.basket": 0.36700000762939455, + "IoU.waterfall": 0.4590000152587891, + "IoU.tent": 0.9508999633789063, + "IoU.bag": 0.16110000610351563, + "IoU.minibike": 0.644000015258789, + "IoU.cradle": 0.7987999725341797, + "IoU.oven": 0.2595000076293945, + "IoU.ball": 0.544000015258789, + "IoU.food": 0.450099983215332, + "IoU.step": 0.09829999923706055, + "IoU.tank": 0.5372000122070313, + "IoU.trade name": 0.2015999984741211, + "IoU.microwave": 0.7662000274658203, + "IoU.pot": 0.47169998168945315, + "IoU.animal": 0.6022000122070312, + "IoU.bicycle": 0.5286000061035157, + "IoU.lake": 0.5143999862670898, + "IoU.dishwasher": 0.6458999633789062, + "IoU.screen": 0.590999984741211, + "IoU.blanket": 0.11779999732971191, + "IoU.sculpture": 0.7045999908447266, + "IoU.hood": 0.5054999923706055, + "IoU.sconce": 0.24940000534057616, + "IoU.vase": 0.3356999969482422, + "IoU.traffic light": 0.24350000381469727, + "IoU.tray": 0.05699999809265137, + "IoU.ashcan": 0.35880001068115236, + "IoU.fan": 0.46619998931884765, + "IoU.pier": 0.28170000076293944, + "IoU.crt screen": 0.03059999942779541, + "IoU.plate": 0.5034000015258789, + "IoU.monitor": 0.5456000137329101, + "IoU.bulletin board": 0.26059999465942385, + "IoU.shower": 0.024800000190734865, + "IoU.radiator": 0.6090999984741211, + "IoU.glass": 0.11649999618530274, + "IoU.clock": 0.30489999771118165, + "IoU.flag": 0.38860000610351564, + "Acc.wall": 0.9008000183105469, + "Acc.building": 0.9470999908447265, + "Acc.sky": 0.9847000122070313, + "Acc.floor": 0.9266000366210938, + "Acc.tree": 0.8202999877929688, + "Acc.ceiling": 0.9372000122070312, + "Acc.road": 0.915199966430664, + "Acc.bed ": 0.9584999847412109, + "Acc.windowpane": 0.7955999755859375, + "Acc.grass": 0.8045999908447266, + "Acc.cabinet": 0.7701000213623047, + "Acc.sidewalk": 0.7865000152587891, + "Acc.person": 0.8906999969482422, + "Acc.earth": 0.5704999923706054, + "Acc.door": 0.6252000045776367, + "Acc.table": 0.7923999786376953, + "Acc.mountain": 0.7288999938964844, + "Acc.plant": 0.6236000061035156, + "Acc.curtain": 0.8466000366210937, + "Acc.chair": 0.6516999816894531, + "Acc.car": 0.9052999877929687, + "Acc.water": 0.7770999908447266, + "Acc.painting": 0.8272000122070312, + "Acc.sofa": 0.8812000274658203, + "Acc.shelf": 0.6263999938964844, + "Acc.house": 0.599900016784668, + "Acc.sea": 0.8719999694824219, + "Acc.mirror": 0.7498999786376953, + "Acc.rug": 0.7187000274658203, + "Acc.field": 0.6118999862670899, + "Acc.armchair": 0.5111999893188477, + "Acc.seat": 0.8452999877929688, + "Acc.fence": 0.5352000045776367, + "Acc.desk": 0.6412999725341797, + "Acc.rock": 0.7012000274658203, + "Acc.wardrobe": 0.6726999664306641, + "Acc.lamp": 0.6202999877929688, + "Acc.bathtub": 0.8716999816894532, + "Acc.railing": 0.5468000030517578, + "Acc.cushion": 0.7041000366210938, + "Acc.base": 0.395099983215332, + "Acc.box": 0.311299991607666, + "Acc.column": 0.5393000030517578, + "Acc.signboard": 0.45549999237060546, + "Acc.chest of drawers": 0.5397000122070312, + "Acc.counter": 0.5036999893188476, + "Acc.sand": 0.6136000061035156, + "Acc.sink": 0.7558999633789063, + "Acc.skyscraper": 0.5531999969482422, + "Acc.fireplace": 0.8266999816894531, + "Acc.refrigerator": 0.7880999755859375, + "Acc.grandstand": 0.7455000305175781, + "Acc.path": 0.28569999694824216, + "Acc.stairs": 0.19950000762939454, + "Acc.runway": 0.8483000183105469, + "Acc.case": 0.7208000183105469, + "Acc.pool table": 0.9637999725341797, + "Acc.pillow": 0.5581999969482422, + "Acc.screen door": 0.7841999816894532, + "Acc.stairway": 0.5004000091552734, + "Acc.river": 0.281299991607666, + "Acc.bridge": 0.6097000122070313, + "Acc.bookcase": 0.5018000030517578, + "Acc.blind": 0.40810001373291016, + "Acc.coffee table": 0.7805999755859375, + "Acc.toilet": 0.8719999694824219, + "Acc.flower": 0.5268000030517578, + "Acc.book": 0.6054000091552735, + "Acc.hill": 0.21379999160766602, + "Acc.bench": 0.5136000061035156, + "Acc.countertop": 0.7081999969482422, + "Acc.stove": 0.8098999786376954, + "Acc.palm": 0.5611000061035156, + "Acc.kitchen island": 0.6586000061035157, + "Acc.computer": 0.8595999908447266, + "Acc.swivel chair": 0.635099983215332, + "Acc.boat": 0.6794000244140626, + "Acc.bar": 0.6630999755859375, + "Acc.arcade machine": 0.42790000915527343, + "Acc.hovel": 0.2981999969482422, + "Acc.bus": 0.9316999816894531, + "Acc.towel": 0.7865000152587891, + "Acc.light": 0.3270999908447266, + "Acc.truck": 0.27579999923706056, + "Acc.tower": 0.2143000030517578, + "Acc.chandelier": 0.7612999725341797, + "Acc.awning": 0.24479999542236328, + "Acc.streetlight": 0.221299991607666, + "Acc.booth": 0.3718000030517578, + "Acc.television receiver": 0.7405000305175782, + "Acc.airplane": 0.6275999832153321, + "Acc.dirt track": 0.17079999923706055, + "Acc.apparel": 0.42150001525878905, + "Acc.pole": 0.14239999771118164, + "Acc.land": 0.08229999542236328, + "Acc.bannister": 0.13, + "Acc.escalator": 0.7663999938964844, + "Acc.ottoman": 0.609900016784668, + "Acc.bottle": 0.22239999771118163, + "Acc.buffet": 0.4234000015258789, + "Acc.poster": 0.22649999618530273, + "Acc.stage": 0.18450000762939453, + "Acc.van": 0.3504999923706055, + "Acc.ship": 0.12600000381469725, + "Acc.fountain": 0.23139999389648438, + "Acc.conveyer belt": 0.9247000122070312, + "Acc.canopy": 0.19360000610351563, + "Acc.washer": 0.7616000366210938, + "Acc.plaything": 0.5761000061035156, + "Acc.swimming pool": 0.8937999725341796, + "Acc.stool": 0.4625, + "Acc.barrel": 0.6433999633789063, + "Acc.basket": 0.4686000061035156, + "Acc.waterfall": 0.595, + "Acc.tent": 0.9586000061035156, + "Acc.bag": 0.1868000030517578, + "Acc.minibike": 0.674000015258789, + "Acc.cradle": 0.9456999969482421, + "Acc.oven": 0.34330001831054685, + "Acc.ball": 0.6683999633789063, + "Acc.food": 0.5013999938964844, + "Acc.step": 0.12220000267028809, + "Acc.tank": 0.6118000030517579, + "Acc.trade name": 0.21229999542236327, + "Acc.microwave": 0.8455000305175782, + "Acc.pot": 0.5197999954223633, + "Acc.animal": 0.6204999923706055, + "Acc.bicycle": 0.6129999923706054, + "Acc.lake": 0.5777000045776367, + "Acc.dishwasher": 0.7283000183105469, + "Acc.screen": 0.74, + "Acc.blanket": 0.14260000228881836, + "Acc.sculpture": 0.7427999877929687, + "Acc.hood": 0.5929000091552734, + "Acc.sconce": 0.291299991607666, + "Acc.vase": 0.4115999984741211, + "Acc.traffic light": 0.3128000068664551, + "Acc.tray": 0.07389999866485596, + "Acc.ashcan": 0.48630001068115236, + "Acc.fan": 0.5222999954223633, + "Acc.pier": 0.3770000076293945, + "Acc.crt screen": 0.04539999961853027, + "Acc.plate": 0.6211000061035157, + "Acc.monitor": 0.6459999847412109, + "Acc.bulletin board": 0.3135000038146973, + "Acc.shower": 0.043400001525878903, + "Acc.radiator": 0.6694999694824219, + "Acc.glass": 0.12390000343322755, + "Acc.clock": 0.3340999984741211, + "Acc.flag": 0.4161000061035156 + } + }, + "25": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8361, + "mIoU": 0.5074000000000001, + "mAcc": 0.6362, + "IoU.wall": 0.7805000305175781, + "IoU.building": 0.8291000366210938, + "IoU.sky": 0.938499984741211, + "IoU.floor": 0.8169999694824219, + "IoU.tree": 0.7427999877929687, + "IoU.ceiling": 0.837699966430664, + "IoU.road": 0.8354000091552735, + "IoU.bed ": 0.902300033569336, + "IoU.windowpane": 0.6245999908447266, + "IoU.grass": 0.6679000091552735, + "IoU.cabinet": 0.6293000030517578, + "IoU.sidewalk": 0.6604000091552734, + "IoU.person": 0.8106999969482422, + "IoU.earth": 0.3843000030517578, + "IoU.door": 0.518400001525879, + "IoU.table": 0.6093999862670898, + "IoU.mountain": 0.5929000091552734, + "IoU.plant": 0.49590000152587893, + "IoU.curtain": 0.7531999969482421, + "IoU.chair": 0.572400016784668, + "IoU.car": 0.8455000305175782, + "IoU.water": 0.6293999862670898, + "IoU.painting": 0.7180000305175781, + "IoU.sofa": 0.7187999725341797, + "IoU.shelf": 0.4168000030517578, + "IoU.house": 0.49900001525878906, + "IoU.sea": 0.7145999908447266, + "IoU.mirror": 0.7023999786376953, + "IoU.rug": 0.6547000122070312, + "IoU.field": 0.37259998321533205, + "IoU.armchair": 0.49080001831054687, + "IoU.seat": 0.6486000061035156, + "IoU.fence": 0.4725, + "IoU.desk": 0.49740001678466794, + "IoU.rock": 0.5241999816894531, + "IoU.wardrobe": 0.5913000106811523, + "IoU.lamp": 0.5820000076293945, + "IoU.bathtub": 0.8625, + "IoU.railing": 0.38790000915527345, + "IoU.cushion": 0.6013000106811524, + "IoU.base": 0.3406999969482422, + "IoU.box": 0.26129999160766604, + "IoU.column": 0.4909000015258789, + "IoU.signboard": 0.35639999389648436, + "IoU.chest of drawers": 0.3468000030517578, + "IoU.counter": 0.3536000061035156, + "IoU.sand": 0.5272000122070313, + "IoU.sink": 0.720999984741211, + "IoU.skyscraper": 0.5125, + "IoU.fireplace": 0.7405000305175782, + "IoU.refrigerator": 0.7247000122070313, + "IoU.grandstand": 0.5193000030517578, + "IoU.path": 0.2511000061035156, + "IoU.stairs": 0.2575, + "IoU.runway": 0.6854000091552734, + "IoU.case": 0.5472999954223633, + "IoU.pool table": 0.9151000213623047, + "IoU.pillow": 0.5804000091552735, + "IoU.screen door": 0.6716000366210938, + "IoU.stairway": 0.32240001678466795, + "IoU.river": 0.1834000015258789, + "IoU.bridge": 0.5977999877929687, + "IoU.bookcase": 0.3143000030517578, + "IoU.blind": 0.41639999389648436, + "IoU.coffee table": 0.5829999923706055, + "IoU.toilet": 0.8068000030517578, + "IoU.flower": 0.3940999984741211, + "IoU.book": 0.45970001220703127, + "IoU.hill": 0.135, + "IoU.bench": 0.5152999877929687, + "IoU.countertop": 0.6009000015258789, + "IoU.stove": 0.7362999725341797, + "IoU.palm": 0.5097999954223633, + "IoU.kitchen island": 0.44740001678466795, + "IoU.computer": 0.757300033569336, + "IoU.swivel chair": 0.5425, + "IoU.boat": 0.6306000137329102, + "IoU.bar": 0.5609000015258789, + "IoU.arcade machine": 0.7512999725341797, + "IoU.hovel": 0.5372999954223633, + "IoU.bus": 0.9068000030517578, + "IoU.towel": 0.6011999893188477, + "IoU.light": 0.4506999969482422, + "IoU.truck": 0.2876000022888184, + "IoU.tower": 0.29940000534057615, + "IoU.chandelier": 0.650199966430664, + "IoU.awning": 0.2965999984741211, + "IoU.streetlight": 0.20879999160766602, + "IoU.booth": 0.3997999954223633, + "IoU.television receiver": 0.7151000213623047, + "IoU.airplane": 0.6541999816894531, + "IoU.dirt track": 0.013400000333786011, + "IoU.apparel": 0.3461000061035156, + "IoU.pole": 0.16200000762939454, + "IoU.land": 0.03609999895095825, + "IoU.bannister": 0.138100004196167, + "IoU.escalator": 0.5368000030517578, + "IoU.ottoman": 0.4784000015258789, + "IoU.bottle": 0.32930000305175783, + "IoU.buffet": 0.49599998474121093, + "IoU.poster": 0.2989999961853027, + "IoU.stage": 0.17969999313354493, + "IoU.van": 0.3908000183105469, + "IoU.ship": 0.23729999542236327, + "IoU.fountain": 0.25879999160766604, + "IoU.conveyer belt": 0.7202999877929688, + "IoU.canopy": 0.24729999542236328, + "IoU.washer": 0.7188999938964844, + "IoU.plaything": 0.33299999237060546, + "IoU.swimming pool": 0.6755999755859375, + "IoU.stool": 0.39110000610351564, + "IoU.barrel": 0.5111000061035156, + "IoU.basket": 0.357400016784668, + "IoU.waterfall": 0.4747999954223633, + "IoU.tent": 0.8447000122070313, + "IoU.bag": 0.16639999389648438, + "IoU.minibike": 0.7229000091552734, + "IoU.cradle": 0.8030000305175782, + "IoU.oven": 0.3184000015258789, + "IoU.ball": 0.5358000183105469, + "IoU.food": 0.5454000091552734, + "IoU.step": 0.09329999923706055, + "IoU.tank": 0.5558000183105469, + "IoU.trade name": 0.20280000686645508, + "IoU.microwave": 0.7618000030517578, + "IoU.pot": 0.5052999877929687, + "IoU.animal": 0.6573999786376953, + "IoU.bicycle": 0.5833000183105469, + "IoU.lake": 0.6118999862670899, + "IoU.dishwasher": 0.6841999816894532, + "IoU.screen": 0.5602000045776367, + "IoU.blanket": 0.16870000839233398, + "IoU.sculpture": 0.7256999969482422, + "IoU.hood": 0.5343999862670898, + "IoU.sconce": 0.35630001068115236, + "IoU.vase": 0.3697999954223633, + "IoU.traffic light": 0.29399999618530276, + "IoU.tray": 0.10880000114440919, + "IoU.ashcan": 0.3509000015258789, + "IoU.fan": 0.552599983215332, + "IoU.pier": 0.22389999389648438, + "IoU.crt screen": 0.06739999771118164, + "IoU.plate": 0.5125999832153321, + "IoU.monitor": 0.2625, + "IoU.bulletin board": 0.49639999389648437, + "IoU.shower": 0.012599999904632569, + "IoU.radiator": 0.6058000183105469, + "IoU.glass": 0.16649999618530273, + "IoU.clock": 0.3806999969482422, + "IoU.flag": 0.4256000137329102, + "Acc.wall": 0.8777999877929688, + "Acc.building": 0.9326000213623047, + "Acc.sky": 0.9638999938964844, + "Acc.floor": 0.8948999786376953, + "Acc.tree": 0.8873999786376953, + "Acc.ceiling": 0.905, + "Acc.road": 0.8994999694824218, + "Acc.bed ": 0.9686000061035156, + "Acc.windowpane": 0.7819999694824219, + "Acc.grass": 0.8041000366210938, + "Acc.cabinet": 0.7480999755859375, + "Acc.sidewalk": 0.8070999908447266, + "Acc.person": 0.9280999755859375, + "Acc.earth": 0.533499984741211, + "Acc.door": 0.6694999694824219, + "Acc.table": 0.7605000305175781, + "Acc.mountain": 0.7220999908447265, + "Acc.plant": 0.5781999969482422, + "Acc.curtain": 0.8737000274658203, + "Acc.chair": 0.7116999816894531, + "Acc.car": 0.9283000183105469, + "Acc.water": 0.7630999755859375, + "Acc.painting": 0.8705000305175781, + "Acc.sofa": 0.8498999786376953, + "Acc.shelf": 0.5506000137329101, + "Acc.house": 0.645199966430664, + "Acc.sea": 0.8704000091552735, + "Acc.mirror": 0.8141999816894532, + "Acc.rug": 0.7863999938964844, + "Acc.field": 0.6293999862670898, + "Acc.armchair": 0.6797000122070312, + "Acc.seat": 0.8555999755859375, + "Acc.fence": 0.638400001525879, + "Acc.desk": 0.7604000091552734, + "Acc.rock": 0.6990000152587891, + "Acc.wardrobe": 0.7605000305175781, + "Acc.lamp": 0.7537999725341797, + "Acc.bathtub": 0.9055000305175781, + "Acc.railing": 0.48720001220703124, + "Acc.cushion": 0.7237000274658203, + "Acc.base": 0.629900016784668, + "Acc.box": 0.335099983215332, + "Acc.column": 0.5897000122070313, + "Acc.signboard": 0.4490000152587891, + "Acc.chest of drawers": 0.6620999908447266, + "Acc.counter": 0.45479999542236327, + "Acc.sand": 0.7491000366210937, + "Acc.sink": 0.7870999908447266, + "Acc.skyscraper": 0.6245999908447266, + "Acc.fireplace": 0.9280999755859375, + "Acc.refrigerator": 0.831500015258789, + "Acc.grandstand": 0.719800033569336, + "Acc.path": 0.37240001678466794, + "Acc.stairs": 0.3597999954223633, + "Acc.runway": 0.9098999786376953, + "Acc.case": 0.6973999786376953, + "Acc.pool table": 0.9787000274658203, + "Acc.pillow": 0.6770999908447266, + "Acc.screen door": 0.7594000244140625, + "Acc.stairway": 0.49840000152587893, + "Acc.river": 0.36540000915527343, + "Acc.bridge": 0.7120999908447265, + "Acc.bookcase": 0.5245000076293945, + "Acc.blind": 0.46130001068115234, + "Acc.coffee table": 0.8566000366210937, + "Acc.toilet": 0.9086000061035157, + "Acc.flower": 0.5738000106811524, + "Acc.book": 0.6612000274658203, + "Acc.hill": 0.24790000915527344, + "Acc.bench": 0.6108000183105469, + "Acc.countertop": 0.7376000213623047, + "Acc.stove": 0.8608000183105469, + "Acc.palm": 0.7087999725341797, + "Acc.kitchen island": 0.7605999755859375, + "Acc.computer": 0.899000015258789, + "Acc.swivel chair": 0.7462000274658203, + "Acc.boat": 0.8605999755859375, + "Acc.bar": 0.7280000305175781, + "Acc.arcade machine": 0.824800033569336, + "Acc.hovel": 0.5829999923706055, + "Acc.bus": 0.9544999694824219, + "Acc.towel": 0.7579000091552734, + "Acc.light": 0.5718999862670898, + "Acc.truck": 0.39060001373291015, + "Acc.tower": 0.5086999893188476, + "Acc.chandelier": 0.8225, + "Acc.awning": 0.3395999908447266, + "Acc.streetlight": 0.32040000915527345, + "Acc.booth": 0.42439998626708986, + "Acc.television receiver": 0.8433999633789062, + "Acc.airplane": 0.7116999816894531, + "Acc.dirt track": 0.06929999828338623, + "Acc.apparel": 0.4458000183105469, + "Acc.pole": 0.20610000610351562, + "Acc.land": 0.07829999923706055, + "Acc.bannister": 0.18870000839233397, + "Acc.escalator": 0.7512999725341797, + "Acc.ottoman": 0.6941999816894531, + "Acc.bottle": 0.5063000106811524, + "Acc.buffet": 0.6633999633789063, + "Acc.poster": 0.36220001220703124, + "Acc.stage": 0.4343000030517578, + "Acc.van": 0.47869998931884766, + "Acc.ship": 0.24629999160766602, + "Acc.fountain": 0.2670000076293945, + "Acc.conveyer belt": 0.9531999969482422, + "Acc.canopy": 0.26309999465942385, + "Acc.washer": 0.7658000183105469, + "Acc.plaything": 0.4595000076293945, + "Acc.swimming pool": 0.8916999816894531, + "Acc.stool": 0.5588000106811524, + "Acc.barrel": 0.6508000183105469, + "Acc.basket": 0.5140999984741211, + "Acc.waterfall": 0.7129000091552734, + "Acc.tent": 0.9901000213623047, + "Acc.bag": 0.18489999771118165, + "Acc.minibike": 0.8155000305175781, + "Acc.cradle": 0.972699966430664, + "Acc.oven": 0.4622000122070313, + "Acc.ball": 0.6154000091552735, + "Acc.food": 0.6022000122070312, + "Acc.step": 0.12949999809265136, + "Acc.tank": 0.6554000091552734, + "Acc.trade name": 0.2165999984741211, + "Acc.microwave": 0.8491999816894531, + "Acc.pot": 0.6122000122070312, + "Acc.animal": 0.6901000213623046, + "Acc.bicycle": 0.7698999786376953, + "Acc.lake": 0.78, + "Acc.dishwasher": 0.788499984741211, + "Acc.screen": 0.7284999847412109, + "Acc.blanket": 0.21030000686645509, + "Acc.sculpture": 0.8130999755859375, + "Acc.hood": 0.6804000091552734, + "Acc.sconce": 0.46299999237060546, + "Acc.vase": 0.55, + "Acc.traffic light": 0.5009000015258789, + "Acc.tray": 0.16329999923706054, + "Acc.ashcan": 0.48869998931884767, + "Acc.fan": 0.7663999938964844, + "Acc.pier": 0.4463999938964844, + "Acc.crt screen": 0.17260000228881836, + "Acc.plate": 0.7404000091552735, + "Acc.monitor": 0.3202000045776367, + "Acc.bulletin board": 0.7025, + "Acc.shower": 0.05, + "Acc.radiator": 0.7168000030517578, + "Acc.glass": 0.18110000610351562, + "Acc.clock": 0.44599998474121094, + "Acc.flag": 0.4856999969482422 + } + }, + "26": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8342, + "mIoU": 0.4989, + "mAcc": 0.6272, + "IoU.wall": 0.7780999755859375, + "IoU.building": 0.829800033569336, + "IoU.sky": 0.9376999664306641, + "IoU.floor": 0.8172000122070312, + "IoU.tree": 0.7431999969482422, + "IoU.ceiling": 0.8338999938964844, + "IoU.road": 0.8338999938964844, + "IoU.bed ": 0.8995999908447265, + "IoU.windowpane": 0.6227000045776367, + "IoU.grass": 0.6813999938964844, + "IoU.cabinet": 0.6261000061035156, + "IoU.sidewalk": 0.6483999633789063, + "IoU.person": 0.8066000366210937, + "IoU.earth": 0.3834000015258789, + "IoU.door": 0.5179999923706055, + "IoU.table": 0.6006999969482422, + "IoU.mountain": 0.5831999969482422, + "IoU.plant": 0.4997999954223633, + "IoU.curtain": 0.7516999816894532, + "IoU.chair": 0.5659999847412109, + "IoU.car": 0.8444000244140625, + "IoU.water": 0.6291999816894531, + "IoU.painting": 0.7169999694824218, + "IoU.sofa": 0.7019000244140625, + "IoU.shelf": 0.4093000030517578, + "IoU.house": 0.47150001525878904, + "IoU.sea": 0.7081999969482422, + "IoU.mirror": 0.6944000244140625, + "IoU.rug": 0.6647000122070312, + "IoU.field": 0.35139999389648435, + "IoU.armchair": 0.469900016784668, + "IoU.seat": 0.6370000076293946, + "IoU.fence": 0.44409999847412107, + "IoU.desk": 0.504900016784668, + "IoU.rock": 0.46130001068115234, + "IoU.wardrobe": 0.5802999877929688, + "IoU.lamp": 0.5647999954223633, + "IoU.bathtub": 0.8644999694824219, + "IoU.railing": 0.3827000045776367, + "IoU.cushion": 0.5888999938964844, + "IoU.base": 0.32240001678466795, + "IoU.box": 0.2518000030517578, + "IoU.column": 0.4840999984741211, + "IoU.signboard": 0.3445999908447266, + "IoU.chest of drawers": 0.3345999908447266, + "IoU.counter": 0.3736999893188477, + "IoU.sand": 0.5220999908447266, + "IoU.sink": 0.7125, + "IoU.skyscraper": 0.5354999923706054, + "IoU.fireplace": 0.7302999877929688, + "IoU.refrigerator": 0.7379000091552734, + "IoU.grandstand": 0.510099983215332, + "IoU.path": 0.2409000015258789, + "IoU.stairs": 0.2681999969482422, + "IoU.runway": 0.6952999877929688, + "IoU.case": 0.5650999832153321, + "IoU.pool table": 0.9129000091552735, + "IoU.pillow": 0.5902999877929688, + "IoU.screen door": 0.6022999954223632, + "IoU.stairway": 0.3240000152587891, + "IoU.river": 0.19059999465942382, + "IoU.bridge": 0.5797999954223633, + "IoU.bookcase": 0.32360000610351564, + "IoU.blind": 0.4318000030517578, + "IoU.coffee table": 0.575900001525879, + "IoU.toilet": 0.7963999938964844, + "IoU.flower": 0.38029998779296875, + "IoU.book": 0.4534000015258789, + "IoU.hill": 0.14489999771118164, + "IoU.bench": 0.47689998626708985, + "IoU.countertop": 0.6120999908447265, + "IoU.stove": 0.7455000305175781, + "IoU.palm": 0.5065000152587891, + "IoU.kitchen island": 0.4490000152587891, + "IoU.computer": 0.7602999877929687, + "IoU.swivel chair": 0.5290999984741211, + "IoU.boat": 0.6365999984741211, + "IoU.bar": 0.5277000045776368, + "IoU.arcade machine": 0.7404000091552735, + "IoU.hovel": 0.44869998931884764, + "IoU.bus": 0.9104000091552734, + "IoU.towel": 0.6272999954223633, + "IoU.light": 0.447599983215332, + "IoU.truck": 0.22709999084472657, + "IoU.tower": 0.3188999938964844, + "IoU.chandelier": 0.6422000122070313, + "IoU.awning": 0.29700000762939455, + "IoU.streetlight": 0.19649999618530273, + "IoU.booth": 0.40290000915527346, + "IoU.television receiver": 0.6715000152587891, + "IoU.airplane": 0.6181000137329101, + "IoU.dirt track": 0.03, + "IoU.apparel": 0.3461000061035156, + "IoU.pole": 0.15229999542236328, + "IoU.land": 0.032200000286102294, + "IoU.bannister": 0.09130000114440918, + "IoU.escalator": 0.5256999969482422, + "IoU.ottoman": 0.4804999923706055, + "IoU.bottle": 0.3231999969482422, + "IoU.buffet": 0.4518000030517578, + "IoU.poster": 0.21559999465942384, + "IoU.stage": 0.16760000228881836, + "IoU.van": 0.4056999969482422, + "IoU.ship": 0.24700000762939453, + "IoU.fountain": 0.26209999084472657, + "IoU.conveyer belt": 0.6886000061035156, + "IoU.canopy": 0.2738999938964844, + "IoU.washer": 0.7112000274658203, + "IoU.plaything": 0.255, + "IoU.swimming pool": 0.6770999908447266, + "IoU.stool": 0.3763000106811523, + "IoU.barrel": 0.5040999984741211, + "IoU.basket": 0.33529998779296877, + "IoU.waterfall": 0.45810001373291015, + "IoU.tent": 0.8623999786376954, + "IoU.bag": 0.16959999084472657, + "IoU.minibike": 0.7045999908447266, + "IoU.cradle": 0.8130000305175781, + "IoU.oven": 0.34369998931884765, + "IoU.ball": 0.48950000762939455, + "IoU.food": 0.5409000015258789, + "IoU.step": 0.09840000152587891, + "IoU.tank": 0.5566999816894531, + "IoU.trade name": 0.19709999084472657, + "IoU.microwave": 0.7077999877929687, + "IoU.pot": 0.46619998931884765, + "IoU.animal": 0.6466000366210938, + "IoU.bicycle": 0.5695999908447266, + "IoU.lake": 0.5959000015258789, + "IoU.dishwasher": 0.6430000305175781, + "IoU.screen": 0.5365999984741211, + "IoU.blanket": 0.14609999656677247, + "IoU.sculpture": 0.7168000030517578, + "IoU.hood": 0.5554000091552734, + "IoU.sconce": 0.36209999084472655, + "IoU.vase": 0.352400016784668, + "IoU.traffic light": 0.28889999389648435, + "IoU.tray": 0.09680000305175782, + "IoU.ashcan": 0.39110000610351564, + "IoU.fan": 0.524900016784668, + "IoU.pier": 0.23829999923706055, + "IoU.crt screen": 0.06880000114440918, + "IoU.plate": 0.5016999816894532, + "IoU.monitor": 0.208700008392334, + "IoU.bulletin board": 0.49130001068115237, + "IoU.shower": 0.009200000166893006, + "IoU.radiator": 0.5834999847412109, + "IoU.glass": 0.16239999771118163, + "IoU.clock": 0.3334000015258789, + "IoU.flag": 0.4711000061035156, + "Acc.wall": 0.8773999786376954, + "Acc.building": 0.9347000122070312, + "Acc.sky": 0.962699966430664, + "Acc.floor": 0.8955999755859375, + "Acc.tree": 0.8922000122070313, + "Acc.ceiling": 0.9036000061035157, + "Acc.road": 0.8952999877929687, + "Acc.bed ": 0.9680000305175781, + "Acc.windowpane": 0.7769000244140625, + "Acc.grass": 0.8184999847412109, + "Acc.cabinet": 0.7469000244140624, + "Acc.sidewalk": 0.8098000335693359, + "Acc.person": 0.9261000061035156, + "Acc.earth": 0.5363000106811523, + "Acc.door": 0.6598000335693359, + "Acc.table": 0.7561000061035156, + "Acc.mountain": 0.7238999938964844, + "Acc.plant": 0.5870999908447265, + "Acc.curtain": 0.8723000335693359, + "Acc.chair": 0.7104000091552735, + "Acc.car": 0.9280999755859375, + "Acc.water": 0.7693000030517578, + "Acc.painting": 0.865999984741211, + "Acc.sofa": 0.8379000091552734, + "Acc.shelf": 0.5379999923706055, + "Acc.house": 0.5968000030517578, + "Acc.sea": 0.8572000122070312, + "Acc.mirror": 0.8037000274658204, + "Acc.rug": 0.7941000366210937, + "Acc.field": 0.5549000167846679, + "Acc.armchair": 0.6637000274658204, + "Acc.seat": 0.842699966430664, + "Acc.fence": 0.6043000030517578, + "Acc.desk": 0.7584999847412109, + "Acc.rock": 0.627400016784668, + "Acc.wardrobe": 0.7559999847412109, + "Acc.lamp": 0.7337999725341797, + "Acc.bathtub": 0.9048999786376953, + "Acc.railing": 0.48150001525878905, + "Acc.cushion": 0.7106999969482422, + "Acc.base": 0.6161000061035157, + "Acc.box": 0.32529998779296876, + "Acc.column": 0.5902000045776368, + "Acc.signboard": 0.43509998321533205, + "Acc.chest of drawers": 0.6386000061035156, + "Acc.counter": 0.479900016784668, + "Acc.sand": 0.7541000366210937, + "Acc.sink": 0.7775, + "Acc.skyscraper": 0.6379000091552735, + "Acc.fireplace": 0.9194000244140625, + "Acc.refrigerator": 0.8522000122070312, + "Acc.grandstand": 0.7102999877929688, + "Acc.path": 0.3529999923706055, + "Acc.stairs": 0.37200000762939456, + "Acc.runway": 0.9130999755859375, + "Acc.case": 0.7216999816894532, + "Acc.pool table": 0.9729000091552734, + "Acc.pillow": 0.6916000366210937, + "Acc.screen door": 0.7034999847412109, + "Acc.stairway": 0.4861000061035156, + "Acc.river": 0.385099983215332, + "Acc.bridge": 0.6816000366210937, + "Acc.bookcase": 0.5413999938964844, + "Acc.blind": 0.48759998321533204, + "Acc.coffee table": 0.8508000183105469, + "Acc.toilet": 0.9037999725341797, + "Acc.flower": 0.5297000122070312, + "Acc.book": 0.6519000244140625, + "Acc.hill": 0.2570000076293945, + "Acc.bench": 0.5720999908447265, + "Acc.countertop": 0.7412000274658204, + "Acc.stove": 0.8633999633789062, + "Acc.palm": 0.7179000091552734, + "Acc.kitchen island": 0.7330000305175781, + "Acc.computer": 0.9145999908447265, + "Acc.swivel chair": 0.7316000366210937, + "Acc.boat": 0.852699966430664, + "Acc.bar": 0.701500015258789, + "Acc.arcade machine": 0.8319000244140625, + "Acc.hovel": 0.4861000061035156, + "Acc.bus": 0.9594999694824219, + "Acc.towel": 0.7758999633789062, + "Acc.light": 0.5668000030517578, + "Acc.truck": 0.3085000038146973, + "Acc.tower": 0.48869998931884767, + "Acc.chandelier": 0.8258000183105468, + "Acc.awning": 0.34569999694824216, + "Acc.streetlight": 0.29399999618530276, + "Acc.booth": 0.44290000915527344, + "Acc.television receiver": 0.8116000366210937, + "Acc.airplane": 0.6783999633789063, + "Acc.dirt track": 0.1090999984741211, + "Acc.apparel": 0.4420000076293945, + "Acc.pole": 0.19610000610351563, + "Acc.land": 0.06880000114440918, + "Acc.bannister": 0.12529999732971192, + "Acc.escalator": 0.7805999755859375, + "Acc.ottoman": 0.6952999877929688, + "Acc.bottle": 0.5081000137329101, + "Acc.buffet": 0.612599983215332, + "Acc.poster": 0.2577000045776367, + "Acc.stage": 0.4141999816894531, + "Acc.van": 0.5054999923706055, + "Acc.ship": 0.25809999465942385, + "Acc.fountain": 0.2696999931335449, + "Acc.conveyer belt": 0.9395999908447266, + "Acc.canopy": 0.2940999984741211, + "Acc.washer": 0.7447000122070313, + "Acc.plaything": 0.3665999984741211, + "Acc.swimming pool": 0.8851000213623047, + "Acc.stool": 0.5481000137329102, + "Acc.barrel": 0.6480000305175782, + "Acc.basket": 0.47150001525878904, + "Acc.waterfall": 0.6752999877929687, + "Acc.tent": 0.9891000366210938, + "Acc.bag": 0.18729999542236328, + "Acc.minibike": 0.8112999725341797, + "Acc.cradle": 0.9730999755859375, + "Acc.oven": 0.5406000137329101, + "Acc.ball": 0.5736999893188477, + "Acc.food": 0.6011999893188477, + "Acc.step": 0.13359999656677246, + "Acc.tank": 0.6562999725341797, + "Acc.trade name": 0.20920000076293946, + "Acc.microwave": 0.7919000244140625, + "Acc.pot": 0.5688000106811524, + "Acc.animal": 0.6788999938964844, + "Acc.bicycle": 0.7677999877929688, + "Acc.lake": 0.7530999755859376, + "Acc.dishwasher": 0.7468000030517579, + "Acc.screen": 0.7323999786376953, + "Acc.blanket": 0.18389999389648437, + "Acc.sculpture": 0.8040000152587891, + "Acc.hood": 0.6972000122070312, + "Acc.sconce": 0.46680000305175784, + "Acc.vase": 0.5443999862670899, + "Acc.traffic light": 0.5036999893188476, + "Acc.tray": 0.13680000305175782, + "Acc.ashcan": 0.5456999969482422, + "Acc.fan": 0.7605999755859375, + "Acc.pier": 0.44360000610351563, + "Acc.crt screen": 0.1759000015258789, + "Acc.plate": 0.7336000061035156, + "Acc.monitor": 0.25809999465942385, + "Acc.bulletin board": 0.6805000305175781, + "Acc.shower": 0.05, + "Acc.radiator": 0.7061000061035156, + "Acc.glass": 0.17690000534057618, + "Acc.clock": 0.38490001678466795, + "Acc.flag": 0.5315999984741211 + } + }, + "27": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8323, + "mIoU": 0.49079999999999996, + "mAcc": 0.6213000000000001, + "IoU.wall": 0.7733000183105468, + "IoU.building": 0.8319000244140625, + "IoU.sky": 0.9380999755859375, + "IoU.floor": 0.8133000183105469, + "IoU.tree": 0.7448000335693359, + "IoU.ceiling": 0.8316000366210937, + "IoU.road": 0.8276000213623047, + "IoU.bed ": 0.8931999969482421, + "IoU.windowpane": 0.6181999969482422, + "IoU.grass": 0.6916000366210937, + "IoU.cabinet": 0.6238000106811523, + "IoU.sidewalk": 0.6416000366210938, + "IoU.person": 0.7987999725341797, + "IoU.earth": 0.3927000045776367, + "IoU.door": 0.49709999084472656, + "IoU.table": 0.5933000183105469, + "IoU.mountain": 0.6040000152587891, + "IoU.plant": 0.5002000045776367, + "IoU.curtain": 0.7231999969482422, + "IoU.chair": 0.5515999984741211, + "IoU.car": 0.8380999755859375, + "IoU.water": 0.6040000152587891, + "IoU.painting": 0.7151999664306641, + "IoU.sofa": 0.7151999664306641, + "IoU.shelf": 0.4109999847412109, + "IoU.house": 0.5579999923706055, + "IoU.sea": 0.6947000122070313, + "IoU.mirror": 0.7001000213623046, + "IoU.rug": 0.6611000061035156, + "IoU.field": 0.38040000915527344, + "IoU.armchair": 0.47580001831054686, + "IoU.seat": 0.6383000183105468, + "IoU.fence": 0.43529998779296875, + "IoU.desk": 0.48720001220703124, + "IoU.rock": 0.48950000762939455, + "IoU.wardrobe": 0.5549000167846679, + "IoU.lamp": 0.5536000061035157, + "IoU.bathtub": 0.8680999755859375, + "IoU.railing": 0.3675, + "IoU.cushion": 0.5920000076293945, + "IoU.base": 0.32119998931884763, + "IoU.box": 0.23049999237060548, + "IoU.column": 0.46360000610351565, + "IoU.signboard": 0.34380001068115235, + "IoU.chest of drawers": 0.35279998779296873, + "IoU.counter": 0.35759998321533204, + "IoU.sand": 0.4465000152587891, + "IoU.sink": 0.6979000091552734, + "IoU.skyscraper": 0.5283000183105468, + "IoU.fireplace": 0.7191999816894531, + "IoU.refrigerator": 0.7626000213623046, + "IoU.grandstand": 0.5188999938964843, + "IoU.path": 0.24610000610351562, + "IoU.stairs": 0.25229999542236325, + "IoU.runway": 0.7087999725341797, + "IoU.case": 0.5363000106811523, + "IoU.pool table": 0.9123999786376953, + "IoU.pillow": 0.5638000106811524, + "IoU.screen door": 0.6531999969482422, + "IoU.stairway": 0.31709999084472656, + "IoU.river": 0.19510000228881835, + "IoU.bridge": 0.550099983215332, + "IoU.bookcase": 0.34669998168945315, + "IoU.blind": 0.4381999969482422, + "IoU.coffee table": 0.582400016784668, + "IoU.toilet": 0.7787999725341797, + "IoU.flower": 0.3240000152587891, + "IoU.book": 0.44959999084472657, + "IoU.hill": 0.10949999809265137, + "IoU.bench": 0.47869998931884766, + "IoU.countertop": 0.5863000106811523, + "IoU.stove": 0.7565000152587891, + "IoU.palm": 0.5265999984741211, + "IoU.kitchen island": 0.44310001373291014, + "IoU.computer": 0.7637999725341796, + "IoU.swivel chair": 0.485, + "IoU.boat": 0.6798000335693359, + "IoU.bar": 0.49709999084472656, + "IoU.arcade machine": 0.6381999969482421, + "IoU.hovel": 0.46439998626708984, + "IoU.bus": 0.9018000030517578, + "IoU.towel": 0.6354999923706055, + "IoU.light": 0.43150001525878906, + "IoU.truck": 0.2680999946594238, + "IoU.tower": 0.30670000076293946, + "IoU.chandelier": 0.6268999862670899, + "IoU.awning": 0.28399999618530275, + "IoU.streetlight": 0.19200000762939454, + "IoU.booth": 0.42880001068115237, + "IoU.television receiver": 0.6302999877929687, + "IoU.airplane": 0.5943000030517578, + "IoU.dirt track": 0.03210000038146973, + "IoU.apparel": 0.33869998931884765, + "IoU.pole": 0.13899999618530273, + "IoU.land": 0.04429999828338623, + "IoU.bannister": 0.08789999961853028, + "IoU.escalator": 0.5059000015258789, + "IoU.ottoman": 0.45099998474121095, + "IoU.bottle": 0.34619998931884766, + "IoU.buffet": 0.47080001831054685, + "IoU.poster": 0.2240999984741211, + "IoU.stage": 0.16559999465942382, + "IoU.van": 0.41310001373291017, + "IoU.ship": 0.16799999237060548, + "IoU.fountain": 0.22190000534057616, + "IoU.conveyer belt": 0.6543000030517578, + "IoU.canopy": 0.22059999465942381, + "IoU.washer": 0.7291999816894531, + "IoU.plaything": 0.23299999237060548, + "IoU.swimming pool": 0.5956999969482422, + "IoU.stool": 0.34889999389648435, + "IoU.barrel": 0.49840000152587893, + "IoU.basket": 0.28420000076293944, + "IoU.waterfall": 0.505, + "IoU.tent": 0.8180000305175781, + "IoU.bag": 0.15670000076293944, + "IoU.minibike": 0.6904000091552734, + "IoU.cradle": 0.8048000335693359, + "IoU.oven": 0.39169998168945314, + "IoU.ball": 0.4804999923706055, + "IoU.food": 0.5152000045776367, + "IoU.step": 0.042699999809265136, + "IoU.tank": 0.5354999923706054, + "IoU.trade name": 0.2125, + "IoU.microwave": 0.7781999969482422, + "IoU.pot": 0.3990000152587891, + "IoU.animal": 0.6108000183105469, + "IoU.bicycle": 0.5738999938964844, + "IoU.lake": 0.6656999969482422, + "IoU.dishwasher": 0.6444000244140625, + "IoU.screen": 0.4736000061035156, + "IoU.blanket": 0.17879999160766602, + "IoU.sculpture": 0.6483999633789063, + "IoU.hood": 0.5145999908447265, + "IoU.sconce": 0.3706999969482422, + "IoU.vase": 0.35650001525878905, + "IoU.traffic light": 0.2818000030517578, + "IoU.tray": 0.08289999961853027, + "IoU.ashcan": 0.3793000030517578, + "IoU.fan": 0.5304000091552734, + "IoU.pier": 0.19459999084472657, + "IoU.crt screen": 0.052199997901916505, + "IoU.plate": 0.48689998626708986, + "IoU.monitor": 0.1906999969482422, + "IoU.bulletin board": 0.49150001525878906, + "IoU.shower": 0.01100000023841858, + "IoU.radiator": 0.5704999923706054, + "IoU.glass": 0.12890000343322755, + "IoU.clock": 0.2928000068664551, + "IoU.flag": 0.4347999954223633, + "Acc.wall": 0.8734999847412109, + "Acc.building": 0.9379000091552734, + "Acc.sky": 0.9613999938964843, + "Acc.floor": 0.8906999969482422, + "Acc.tree": 0.8965000152587891, + "Acc.ceiling": 0.9008999633789062, + "Acc.road": 0.887300033569336, + "Acc.bed ": 0.9637000274658203, + "Acc.windowpane": 0.7654000091552734, + "Acc.grass": 0.8172000122070312, + "Acc.cabinet": 0.7454000091552735, + "Acc.sidewalk": 0.8076999664306641, + "Acc.person": 0.9213999938964844, + "Acc.earth": 0.5527000045776367, + "Acc.door": 0.6480000305175782, + "Acc.table": 0.7522000122070313, + "Acc.mountain": 0.734800033569336, + "Acc.plant": 0.579900016784668, + "Acc.curtain": 0.8644999694824219, + "Acc.chair": 0.6938999938964844, + "Acc.car": 0.9298000335693359, + "Acc.water": 0.7416000366210938, + "Acc.painting": 0.8619999694824219, + "Acc.sofa": 0.849800033569336, + "Acc.shelf": 0.5497000122070312, + "Acc.house": 0.6613999938964844, + "Acc.sea": 0.8762999725341797, + "Acc.mirror": 0.8076999664306641, + "Acc.rug": 0.8002999877929687, + "Acc.field": 0.5663000106811523, + "Acc.armchair": 0.673499984741211, + "Acc.seat": 0.8419999694824218, + "Acc.fence": 0.5990999984741211, + "Acc.desk": 0.7613999938964844, + "Acc.rock": 0.6409999847412109, + "Acc.wardrobe": 0.7587999725341796, + "Acc.lamp": 0.7330999755859375, + "Acc.bathtub": 0.9169000244140625, + "Acc.railing": 0.47950000762939454, + "Acc.cushion": 0.7119000244140625, + "Acc.base": 0.6345999908447265, + "Acc.box": 0.29620000839233396, + "Acc.column": 0.5572999954223633, + "Acc.signboard": 0.43740001678466794, + "Acc.chest of drawers": 0.6306000137329102, + "Acc.counter": 0.44990001678466796, + "Acc.sand": 0.7166000366210937, + "Acc.sink": 0.768499984741211, + "Acc.skyscraper": 0.6234999847412109, + "Acc.fireplace": 0.9291000366210938, + "Acc.refrigerator": 0.8669999694824219, + "Acc.grandstand": 0.7086000061035156, + "Acc.path": 0.3761999893188477, + "Acc.stairs": 0.3518000030517578, + "Acc.runway": 0.9308999633789062, + "Acc.case": 0.6679000091552735, + "Acc.pool table": 0.9738999938964844, + "Acc.pillow": 0.6677999877929688, + "Acc.screen door": 0.7473999786376954, + "Acc.stairway": 0.472400016784668, + "Acc.river": 0.36509998321533205, + "Acc.bridge": 0.66, + "Acc.bookcase": 0.5156000137329102, + "Acc.blind": 0.515, + "Acc.coffee table": 0.8548999786376953, + "Acc.toilet": 0.907300033569336, + "Acc.flower": 0.49720001220703125, + "Acc.book": 0.6429000091552735, + "Acc.hill": 0.21719999313354493, + "Acc.bench": 0.5795999908447266, + "Acc.countertop": 0.712699966430664, + "Acc.stove": 0.8751999664306641, + "Acc.palm": 0.749000015258789, + "Acc.kitchen island": 0.7254000091552735, + "Acc.computer": 0.9155999755859375, + "Acc.swivel chair": 0.7202999877929688, + "Acc.boat": 0.8584999847412109, + "Acc.bar": 0.6615000152587891, + "Acc.arcade machine": 0.726500015258789, + "Acc.hovel": 0.5066999816894531, + "Acc.bus": 0.9572000122070312, + "Acc.towel": 0.7783000183105468, + "Acc.light": 0.544000015258789, + "Acc.truck": 0.3681000137329102, + "Acc.tower": 0.5104999923706055, + "Acc.chandelier": 0.8073999786376953, + "Acc.awning": 0.34, + "Acc.streetlight": 0.28600000381469726, + "Acc.booth": 0.45279998779296876, + "Acc.television receiver": 0.7987999725341797, + "Acc.airplane": 0.6569999694824219, + "Acc.dirt track": 0.145, + "Acc.apparel": 0.4366999816894531, + "Acc.pole": 0.17309999465942383, + "Acc.land": 0.08880000114440918, + "Acc.bannister": 0.11609999656677246, + "Acc.escalator": 0.7588999938964843, + "Acc.ottoman": 0.6562999725341797, + "Acc.bottle": 0.5545000076293946, + "Acc.buffet": 0.6372999954223633, + "Acc.poster": 0.27610000610351565, + "Acc.stage": 0.4229000091552734, + "Acc.van": 0.5043999862670898, + "Acc.ship": 0.17780000686645508, + "Acc.fountain": 0.2297999954223633, + "Acc.conveyer belt": 0.9276999664306641, + "Acc.canopy": 0.2645000076293945, + "Acc.washer": 0.7501999664306641, + "Acc.plaything": 0.3516999816894531, + "Acc.swimming pool": 0.8887999725341796, + "Acc.stool": 0.5070999908447266, + "Acc.barrel": 0.6498999786376953, + "Acc.basket": 0.43189998626708986, + "Acc.waterfall": 0.675, + "Acc.tent": 0.9905999755859375, + "Acc.bag": 0.17299999237060548, + "Acc.minibike": 0.8041999816894532, + "Acc.cradle": 0.9716999816894532, + "Acc.oven": 0.5341999816894532, + "Acc.ball": 0.5681000137329102, + "Acc.food": 0.5695000076293946, + "Acc.step": 0.05889999866485596, + "Acc.tank": 0.654000015258789, + "Acc.trade name": 0.22870000839233398, + "Acc.microwave": 0.8666000366210938, + "Acc.pot": 0.48060001373291017, + "Acc.animal": 0.6409999847412109, + "Acc.bicycle": 0.7405999755859375, + "Acc.lake": 0.7638999938964843, + "Acc.dishwasher": 0.73, + "Acc.screen": 0.7494999694824219, + "Acc.blanket": 0.21850000381469725, + "Acc.sculpture": 0.7911000061035156, + "Acc.hood": 0.668499984741211, + "Acc.sconce": 0.46049999237060546, + "Acc.vase": 0.5525, + "Acc.traffic light": 0.5018999862670899, + "Acc.tray": 0.15239999771118165, + "Acc.ashcan": 0.5227000045776368, + "Acc.fan": 0.7383000183105469, + "Acc.pier": 0.4575, + "Acc.crt screen": 0.15279999732971192, + "Acc.plate": 0.7123999786376953, + "Acc.monitor": 0.22700000762939454, + "Acc.bulletin board": 0.6926000213623047, + "Acc.shower": 0.05, + "Acc.radiator": 0.6877999877929688, + "Acc.glass": 0.13829999923706054, + "Acc.clock": 0.34869998931884766, + "Acc.flag": 0.504000015258789 + } + }, + "28": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8261, + "mIoU": 0.47740000000000005, + "mAcc": 0.6079, + "IoU.wall": 0.7720999908447266, + "IoU.building": 0.8225, + "IoU.sky": 0.9383000183105469, + "IoU.floor": 0.8020999908447266, + "IoU.tree": 0.7423999786376954, + "IoU.ceiling": 0.8284999847412109, + "IoU.road": 0.8266000366210937, + "IoU.bed ": 0.8801000213623047, + "IoU.windowpane": 0.6127000045776367, + "IoU.grass": 0.6829000091552735, + "IoU.cabinet": 0.5959000015258789, + "IoU.sidewalk": 0.6495999908447265, + "IoU.person": 0.7955999755859375, + "IoU.earth": 0.37259998321533205, + "IoU.door": 0.48689998626708986, + "IoU.table": 0.5716999816894531, + "IoU.mountain": 0.5816999816894531, + "IoU.plant": 0.4981999969482422, + "IoU.curtain": 0.7145999908447266, + "IoU.chair": 0.537599983215332, + "IoU.car": 0.8348999786376953, + "IoU.water": 0.5515000152587891, + "IoU.painting": 0.7023000335693359, + "IoU.sofa": 0.674000015258789, + "IoU.shelf": 0.4168000030517578, + "IoU.house": 0.46799999237060547, + "IoU.sea": 0.6470999908447266, + "IoU.mirror": 0.6566999816894531, + "IoU.rug": 0.6541999816894531, + "IoU.field": 0.3233000183105469, + "IoU.armchair": 0.43939998626708987, + "IoU.seat": 0.6238000106811523, + "IoU.fence": 0.4047000122070312, + "IoU.desk": 0.4709000015258789, + "IoU.rock": 0.4558000183105469, + "IoU.wardrobe": 0.514000015258789, + "IoU.lamp": 0.5527999877929688, + "IoU.bathtub": 0.8631999969482422, + "IoU.railing": 0.34169998168945315, + "IoU.cushion": 0.5595999908447266, + "IoU.base": 0.31409999847412107, + "IoU.box": 0.233799991607666, + "IoU.column": 0.46689998626708984, + "IoU.signboard": 0.33560001373291015, + "IoU.chest of drawers": 0.3547999954223633, + "IoU.counter": 0.39299999237060546, + "IoU.sand": 0.41509998321533204, + "IoU.sink": 0.685199966430664, + "IoU.skyscraper": 0.5313000106811523, + "IoU.fireplace": 0.7466000366210938, + "IoU.refrigerator": 0.7418000030517579, + "IoU.grandstand": 0.512400016784668, + "IoU.path": 0.2571999931335449, + "IoU.stairs": 0.2168000030517578, + "IoU.runway": 0.6968000030517578, + "IoU.case": 0.5261000061035156, + "IoU.pool table": 0.9126000213623047, + "IoU.pillow": 0.530099983215332, + "IoU.screen door": 0.6680999755859375, + "IoU.stairway": 0.29149999618530276, + "IoU.river": 0.18760000228881835, + "IoU.bridge": 0.6848000335693359, + "IoU.bookcase": 0.340099983215332, + "IoU.blind": 0.425, + "IoU.coffee table": 0.5590000152587891, + "IoU.toilet": 0.770199966430664, + "IoU.flower": 0.3028000068664551, + "IoU.book": 0.44610000610351563, + "IoU.hill": 0.12710000038146974, + "IoU.bench": 0.4216999816894531, + "IoU.countertop": 0.596599998474121, + "IoU.stove": 0.6920999908447265, + "IoU.palm": 0.500999984741211, + "IoU.kitchen island": 0.4540999984741211, + "IoU.computer": 0.7411000061035157, + "IoU.swivel chair": 0.4743000030517578, + "IoU.boat": 0.6425, + "IoU.bar": 0.47139999389648435, + "IoU.arcade machine": 0.5916999816894531, + "IoU.hovel": 0.32549999237060545, + "IoU.bus": 0.8580000305175781, + "IoU.towel": 0.6358000183105469, + "IoU.light": 0.40759998321533203, + "IoU.truck": 0.21489999771118165, + "IoU.tower": 0.2953000068664551, + "IoU.chandelier": 0.6291999816894531, + "IoU.awning": 0.2640999984741211, + "IoU.streetlight": 0.19180000305175782, + "IoU.booth": 0.42029998779296873, + "IoU.television receiver": 0.6283000183105468, + "IoU.airplane": 0.5804000091552735, + "IoU.dirt track": 0.0, + "IoU.apparel": 0.34580001831054685, + "IoU.pole": 0.13899999618530273, + "IoU.land": 0.03430000066757202, + "IoU.bannister": 0.09369999885559083, + "IoU.escalator": 0.49509998321533205, + "IoU.ottoman": 0.41819999694824217, + "IoU.bottle": 0.349900016784668, + "IoU.buffet": 0.538499984741211, + "IoU.poster": 0.18319999694824218, + "IoU.stage": 0.20309999465942383, + "IoU.van": 0.4340999984741211, + "IoU.ship": 0.20020000457763673, + "IoU.fountain": 0.21350000381469728, + "IoU.conveyer belt": 0.6708999633789062, + "IoU.canopy": 0.21389999389648437, + "IoU.washer": 0.7469000244140624, + "IoU.plaything": 0.2506999969482422, + "IoU.swimming pool": 0.6168000030517579, + "IoU.stool": 0.3763999938964844, + "IoU.barrel": 0.44240001678466795, + "IoU.basket": 0.291299991607666, + "IoU.waterfall": 0.6118000030517579, + "IoU.tent": 0.7706999969482422, + "IoU.bag": 0.13210000038146974, + "IoU.minibike": 0.6622000122070313, + "IoU.cradle": 0.7898999786376953, + "IoU.oven": 0.3085000038146973, + "IoU.ball": 0.5002999877929688, + "IoU.food": 0.5252000045776367, + "IoU.step": 0.07440000057220458, + "IoU.tank": 0.5384000015258789, + "IoU.trade name": 0.211200008392334, + "IoU.microwave": 0.7645999908447265, + "IoU.pot": 0.4075, + "IoU.animal": 0.6081000137329101, + "IoU.bicycle": 0.5570999908447266, + "IoU.lake": 0.1256999969482422, + "IoU.dishwasher": 0.544099998474121, + "IoU.screen": 0.5311000061035156, + "IoU.blanket": 0.10350000381469726, + "IoU.sculpture": 0.5595999908447266, + "IoU.hood": 0.5204999923706055, + "IoU.sconce": 0.35810001373291017, + "IoU.vase": 0.3356999969482422, + "IoU.traffic light": 0.27579999923706056, + "IoU.tray": 0.07539999961853028, + "IoU.ashcan": 0.39740001678466796, + "IoU.fan": 0.5193999862670898, + "IoU.pier": 0.18190000534057618, + "IoU.crt screen": 0.0496999979019165, + "IoU.plate": 0.4879999923706055, + "IoU.monitor": 0.24489999771118165, + "IoU.bulletin board": 0.505099983215332, + "IoU.shower": 0.014800000190734863, + "IoU.radiator": 0.5336999893188477, + "IoU.glass": 0.12130000114440918, + "IoU.clock": 0.27790000915527346, + "IoU.flag": 0.5508000183105469, + "Acc.wall": 0.8780999755859376, + "Acc.building": 0.9291999816894532, + "Acc.sky": 0.9619999694824218, + "Acc.floor": 0.8870999908447266, + "Acc.tree": 0.8952999877929687, + "Acc.ceiling": 0.8930000305175781, + "Acc.road": 0.8888999938964843, + "Acc.bed ": 0.9637999725341797, + "Acc.windowpane": 0.7623999786376953, + "Acc.grass": 0.8206999969482421, + "Acc.cabinet": 0.7233999633789062, + "Acc.sidewalk": 0.8095999908447266, + "Acc.person": 0.9179000091552735, + "Acc.earth": 0.5222000122070313, + "Acc.door": 0.6184000015258789, + "Acc.table": 0.7333999633789062, + "Acc.mountain": 0.7227999877929687, + "Acc.plant": 0.575900001525879, + "Acc.curtain": 0.8590000152587891, + "Acc.chair": 0.6798000335693359, + "Acc.car": 0.9279000091552735, + "Acc.water": 0.6855999755859375, + "Acc.painting": 0.8516999816894532, + "Acc.sofa": 0.8294999694824219, + "Acc.shelf": 0.5654999923706054, + "Acc.house": 0.5843000030517578, + "Acc.sea": 0.8262999725341796, + "Acc.mirror": 0.7798999786376953, + "Acc.rug": 0.7755999755859375, + "Acc.field": 0.5213999938964844, + "Acc.armchair": 0.6311000061035156, + "Acc.seat": 0.851500015258789, + "Acc.fence": 0.5586000061035157, + "Acc.desk": 0.7505000305175781, + "Acc.rock": 0.5986000061035156, + "Acc.wardrobe": 0.7244000244140625, + "Acc.lamp": 0.7429000091552734, + "Acc.bathtub": 0.9230000305175782, + "Acc.railing": 0.45490001678466796, + "Acc.cushion": 0.7095999908447266, + "Acc.base": 0.6036999893188476, + "Acc.box": 0.2997999954223633, + "Acc.column": 0.5740999984741211, + "Acc.signboard": 0.42369998931884767, + "Acc.chest of drawers": 0.6290999984741211, + "Acc.counter": 0.4961999893188477, + "Acc.sand": 0.6476000213623047, + "Acc.sink": 0.7668000030517578, + "Acc.skyscraper": 0.6408000183105469, + "Acc.fireplace": 0.9030000305175782, + "Acc.refrigerator": 0.8558000183105469, + "Acc.grandstand": 0.7288999938964844, + "Acc.path": 0.38360000610351563, + "Acc.stairs": 0.31760000228881835, + "Acc.runway": 0.9270999908447266, + "Acc.case": 0.667300033569336, + "Acc.pool table": 0.9744000244140625, + "Acc.pillow": 0.6261999893188477, + "Acc.screen door": 0.7837999725341797, + "Acc.stairway": 0.4527000045776367, + "Acc.river": 0.4556999969482422, + "Acc.bridge": 0.8187999725341797, + "Acc.bookcase": 0.5404000091552734, + "Acc.blind": 0.5118000030517578, + "Acc.coffee table": 0.865, + "Acc.toilet": 0.8944000244140625, + "Acc.flower": 0.48779998779296874, + "Acc.book": 0.6275, + "Acc.hill": 0.21760000228881837, + "Acc.bench": 0.5106000137329102, + "Acc.countertop": 0.7461000061035157, + "Acc.stove": 0.8248999786376953, + "Acc.palm": 0.7058000183105468, + "Acc.kitchen island": 0.7936000061035157, + "Acc.computer": 0.8962999725341797, + "Acc.swivel chair": 0.697699966430664, + "Acc.boat": 0.8434999847412109, + "Acc.bar": 0.5870000076293945, + "Acc.arcade machine": 0.667300033569336, + "Acc.hovel": 0.3536000061035156, + "Acc.bus": 0.9369000244140625, + "Acc.towel": 0.765, + "Acc.light": 0.5045999908447265, + "Acc.truck": 0.2894000053405762, + "Acc.tower": 0.49990001678466794, + "Acc.chandelier": 0.8055999755859375, + "Acc.awning": 0.332599983215332, + "Acc.streetlight": 0.27610000610351565, + "Acc.booth": 0.44669998168945313, + "Acc.television receiver": 0.7666999816894531, + "Acc.airplane": 0.6680999755859375, + "Acc.dirt track": 0.0, + "Acc.apparel": 0.43979999542236325, + "Acc.pole": 0.17600000381469727, + "Acc.land": 0.05630000114440918, + "Acc.bannister": 0.13489999771118164, + "Acc.escalator": 0.7011000061035156, + "Acc.ottoman": 0.64, + "Acc.bottle": 0.5784000015258789, + "Acc.buffet": 0.7331999969482422, + "Acc.poster": 0.22930000305175782, + "Acc.stage": 0.44880001068115233, + "Acc.van": 0.5245999908447265, + "Acc.ship": 0.21040000915527343, + "Acc.fountain": 0.22280000686645507, + "Acc.conveyer belt": 0.9526000213623047, + "Acc.canopy": 0.2990999984741211, + "Acc.washer": 0.7730000305175782, + "Acc.plaything": 0.37090000152587893, + "Acc.swimming pool": 0.8637000274658203, + "Acc.stool": 0.4931000137329102, + "Acc.barrel": 0.6509999847412109, + "Acc.basket": 0.3995000076293945, + "Acc.waterfall": 0.8673999786376954, + "Acc.tent": 0.9912000274658204, + "Acc.bag": 0.14569999694824218, + "Acc.minibike": 0.7795999908447265, + "Acc.cradle": 0.9680999755859375, + "Acc.oven": 0.4063999938964844, + "Acc.ball": 0.5545000076293946, + "Acc.food": 0.5747999954223633, + "Acc.step": 0.09880000114440918, + "Acc.tank": 0.6443000030517578, + "Acc.trade name": 0.2265999984741211, + "Acc.microwave": 0.8663999938964844, + "Acc.pot": 0.48700000762939455, + "Acc.animal": 0.6397999954223633, + "Acc.bicycle": 0.717699966430664, + "Acc.lake": 0.14100000381469727, + "Acc.dishwasher": 0.6751000213623047, + "Acc.screen": 0.7863999938964844, + "Acc.blanket": 0.11760000228881835, + "Acc.sculpture": 0.6577999877929688, + "Acc.hood": 0.6951000213623046, + "Acc.sconce": 0.43990001678466795, + "Acc.vase": 0.522400016784668, + "Acc.traffic light": 0.5015999984741211, + "Acc.tray": 0.1361999988555908, + "Acc.ashcan": 0.539099998474121, + "Acc.fan": 0.7083000183105469, + "Acc.pier": 0.43979999542236325, + "Acc.crt screen": 0.13050000190734865, + "Acc.plate": 0.6948999786376953, + "Acc.monitor": 0.28399999618530275, + "Acc.bulletin board": 0.695, + "Acc.shower": 0.05, + "Acc.radiator": 0.6494000244140625, + "Acc.glass": 0.13050000190734865, + "Acc.clock": 0.33029998779296876, + "Acc.flag": 0.6166999816894532 + } + }, + "29": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8201, + "mIoU": 0.46380000000000005, + "mAcc": 0.5957, + "IoU.wall": 0.7594000244140625, + "IoU.building": 0.8216999816894531, + "IoU.sky": 0.9369000244140625, + "IoU.floor": 0.8030999755859375, + "IoU.tree": 0.7384999847412109, + "IoU.ceiling": 0.8243000030517578, + "IoU.road": 0.8262000274658203, + "IoU.bed ": 0.8687999725341797, + "IoU.windowpane": 0.6086999893188476, + "IoU.grass": 0.7016000366210937, + "IoU.cabinet": 0.5740000152587891, + "IoU.sidewalk": 0.6437000274658203, + "IoU.person": 0.7841999816894532, + "IoU.earth": 0.37, + "IoU.door": 0.43209999084472656, + "IoU.table": 0.5513000106811523, + "IoU.mountain": 0.5847000122070313, + "IoU.plant": 0.4909999847412109, + "IoU.curtain": 0.7066000366210937, + "IoU.chair": 0.5183000183105468, + "IoU.car": 0.8161000061035156, + "IoU.water": 0.5427000045776367, + "IoU.painting": 0.6908000183105468, + "IoU.sofa": 0.6437999725341796, + "IoU.shelf": 0.41330001831054686, + "IoU.house": 0.5154000091552734, + "IoU.sea": 0.6436000061035156, + "IoU.mirror": 0.6508000183105469, + "IoU.rug": 0.6666999816894531, + "IoU.field": 0.28889999389648435, + "IoU.armchair": 0.42540000915527343, + "IoU.seat": 0.6081000137329101, + "IoU.fence": 0.38099998474121094, + "IoU.desk": 0.48450000762939455, + "IoU.rock": 0.4602000045776367, + "IoU.wardrobe": 0.5156999969482422, + "IoU.lamp": 0.542400016784668, + "IoU.bathtub": 0.842699966430664, + "IoU.railing": 0.3425, + "IoU.cushion": 0.5466999816894531, + "IoU.base": 0.30309999465942383, + "IoU.box": 0.22840000152587892, + "IoU.column": 0.4477000045776367, + "IoU.signboard": 0.33799999237060546, + "IoU.chest of drawers": 0.3213999938964844, + "IoU.counter": 0.34900001525878904, + "IoU.sand": 0.35810001373291017, + "IoU.sink": 0.6665000152587891, + "IoU.skyscraper": 0.5325, + "IoU.fireplace": 0.7284999847412109, + "IoU.refrigerator": 0.6673999786376953, + "IoU.grandstand": 0.5106999969482422, + "IoU.path": 0.2168000030517578, + "IoU.stairs": 0.22299999237060547, + "IoU.runway": 0.7243000030517578, + "IoU.case": 0.4954999923706055, + "IoU.pool table": 0.9130000305175782, + "IoU.pillow": 0.5202999877929687, + "IoU.screen door": 0.5811000061035156, + "IoU.stairway": 0.2903000068664551, + "IoU.river": 0.18270000457763672, + "IoU.bridge": 0.6520999908447266, + "IoU.bookcase": 0.30139999389648436, + "IoU.blind": 0.3920000076293945, + "IoU.coffee table": 0.5418999862670898, + "IoU.toilet": 0.8122000122070312, + "IoU.flower": 0.36709999084472655, + "IoU.book": 0.42520000457763674, + "IoU.hill": 0.07309999942779541, + "IoU.bench": 0.3868000030517578, + "IoU.countertop": 0.5711000061035156, + "IoU.stove": 0.6940000152587891, + "IoU.palm": 0.46799999237060547, + "IoU.kitchen island": 0.3468000030517578, + "IoU.computer": 0.6338000106811523, + "IoU.swivel chair": 0.47400001525878904, + "IoU.boat": 0.6281000137329101, + "IoU.bar": 0.48389999389648436, + "IoU.arcade machine": 0.4581999969482422, + "IoU.hovel": 0.25659999847412107, + "IoU.bus": 0.8601000213623047, + "IoU.towel": 0.5829000091552734, + "IoU.light": 0.43490001678466794, + "IoU.truck": 0.16579999923706054, + "IoU.tower": 0.32040000915527345, + "IoU.chandelier": 0.6031999969482422, + "IoU.awning": 0.2996999931335449, + "IoU.streetlight": 0.18450000762939453, + "IoU.booth": 0.46669998168945315, + "IoU.television receiver": 0.6066999816894532, + "IoU.airplane": 0.5583000183105469, + "IoU.dirt track": 0.10659999847412109, + "IoU.apparel": 0.32060001373291014, + "IoU.pole": 0.13199999809265137, + "IoU.land": 0.06550000190734863, + "IoU.bannister": 0.09199999809265137, + "IoU.escalator": 0.32209999084472657, + "IoU.ottoman": 0.3938999938964844, + "IoU.bottle": 0.1534000015258789, + "IoU.buffet": 0.4779000091552734, + "IoU.poster": 0.2564999961853027, + "IoU.stage": 0.1881999969482422, + "IoU.van": 0.39720001220703127, + "IoU.ship": 0.6709999847412109, + "IoU.fountain": 0.208700008392334, + "IoU.conveyer belt": 0.575999984741211, + "IoU.canopy": 0.21610000610351562, + "IoU.washer": 0.7419000244140626, + "IoU.plaything": 0.2352000045776367, + "IoU.swimming pool": 0.5908000183105468, + "IoU.stool": 0.33610000610351565, + "IoU.barrel": 0.33779998779296877, + "IoU.basket": 0.23030000686645508, + "IoU.waterfall": 0.6733999633789063, + "IoU.tent": 0.8562000274658204, + "IoU.bag": 0.09590000152587891, + "IoU.minibike": 0.674000015258789, + "IoU.cradle": 0.8036000061035157, + "IoU.oven": 0.2797999954223633, + "IoU.ball": 0.3745000076293945, + "IoU.food": 0.5170000076293946, + "IoU.step": 0.09319999694824219, + "IoU.tank": 0.47200000762939454, + "IoU.trade name": 0.22030000686645507, + "IoU.microwave": 0.6872000122070312, + "IoU.pot": 0.33060001373291015, + "IoU.animal": 0.6184999847412109, + "IoU.bicycle": 0.5695000076293946, + "IoU.lake": 0.15550000190734864, + "IoU.dishwasher": 0.5011999893188477, + "IoU.screen": 0.5379999923706055, + "IoU.blanket": 0.14869999885559082, + "IoU.sculpture": 0.5215999984741211, + "IoU.hood": 0.47709999084472654, + "IoU.sconce": 0.3240999984741211, + "IoU.vase": 0.32029998779296875, + "IoU.traffic light": 0.2727000045776367, + "IoU.tray": 0.057699999809265136, + "IoU.ashcan": 0.38360000610351563, + "IoU.fan": 0.5118999862670899, + "IoU.pier": 0.19860000610351564, + "IoU.crt screen": 0.02569999933242798, + "IoU.plate": 0.47009998321533203, + "IoU.monitor": 0.18559999465942384, + "IoU.bulletin board": 0.4772999954223633, + "IoU.shower": 0.009599999785423278, + "IoU.radiator": 0.5141999816894531, + "IoU.glass": 0.09229999542236328, + "IoU.clock": 0.2528000068664551, + "IoU.flag": 0.605099983215332, + "Acc.wall": 0.8684999847412109, + "Acc.building": 0.9280000305175782, + "Acc.sky": 0.9595999908447266, + "Acc.floor": 0.8858000183105469, + "Acc.tree": 0.892300033569336, + "Acc.ceiling": 0.8912999725341797, + "Acc.road": 0.8833000183105468, + "Acc.bed ": 0.9611000061035156, + "Acc.windowpane": 0.7738999938964843, + "Acc.grass": 0.8338999938964844, + "Acc.cabinet": 0.6976000213623047, + "Acc.sidewalk": 0.8180999755859375, + "Acc.person": 0.9152999877929687, + "Acc.earth": 0.5231000137329102, + "Acc.door": 0.5645999908447266, + "Acc.table": 0.7223000335693359, + "Acc.mountain": 0.7351999664306641, + "Acc.plant": 0.5843000030517578, + "Acc.curtain": 0.8462000274658203, + "Acc.chair": 0.6644000244140625, + "Acc.car": 0.9213999938964844, + "Acc.water": 0.6762999725341797, + "Acc.painting": 0.8491000366210938, + "Acc.sofa": 0.8180000305175781, + "Acc.shelf": 0.5754999923706055, + "Acc.house": 0.6473999786376953, + "Acc.sea": 0.7912999725341797, + "Acc.mirror": 0.763499984741211, + "Acc.rug": 0.7844000244140625, + "Acc.field": 0.41580001831054686, + "Acc.armchair": 0.6081999969482422, + "Acc.seat": 0.8218000030517578, + "Acc.fence": 0.5570999908447266, + "Acc.desk": 0.7837999725341797, + "Acc.rock": 0.6052999877929688, + "Acc.wardrobe": 0.6954000091552734, + "Acc.lamp": 0.7279000091552734, + "Acc.bathtub": 0.8929000091552735, + "Acc.railing": 0.44540000915527345, + "Acc.cushion": 0.6894999694824219, + "Acc.base": 0.5609000015258789, + "Acc.box": 0.3039999961853027, + "Acc.column": 0.569900016784668, + "Acc.signboard": 0.42520000457763674, + "Acc.chest of drawers": 0.638400001525879, + "Acc.counter": 0.44729999542236326, + "Acc.sand": 0.6147999954223633, + "Acc.sink": 0.7487000274658203, + "Acc.skyscraper": 0.6225, + "Acc.fireplace": 0.8880000305175781, + "Acc.refrigerator": 0.7963999938964844, + "Acc.grandstand": 0.7369999694824219, + "Acc.path": 0.3164999961853027, + "Acc.stairs": 0.35509998321533204, + "Acc.runway": 0.9701000213623047, + "Acc.case": 0.6683000183105469, + "Acc.pool table": 0.9712000274658203, + "Acc.pillow": 0.5979999923706054, + "Acc.screen door": 0.6577999877929688, + "Acc.stairway": 0.43939998626708987, + "Acc.river": 0.47580001831054686, + "Acc.bridge": 0.8630000305175781, + "Acc.bookcase": 0.4825, + "Acc.blind": 0.44229999542236326, + "Acc.coffee table": 0.8633999633789062, + "Acc.toilet": 0.8962999725341797, + "Acc.flower": 0.5534000015258789, + "Acc.book": 0.6034999847412109, + "Acc.hill": 0.1386999988555908, + "Acc.bench": 0.48330001831054686, + "Acc.countertop": 0.712699966430664, + "Acc.stove": 0.8109999847412109, + "Acc.palm": 0.729800033569336, + "Acc.kitchen island": 0.7965000152587891, + "Acc.computer": 0.7665000152587891, + "Acc.swivel chair": 0.6769000244140625, + "Acc.boat": 0.8556999969482422, + "Acc.bar": 0.6691000366210937, + "Acc.arcade machine": 0.5186000061035156, + "Acc.hovel": 0.26059999465942385, + "Acc.bus": 0.9266999816894531, + "Acc.towel": 0.7263999938964844, + "Acc.light": 0.5256999969482422, + "Acc.truck": 0.24069999694824218, + "Acc.tower": 0.520099983215332, + "Acc.chandelier": 0.7883999633789063, + "Acc.awning": 0.37799999237060544, + "Acc.streetlight": 0.255, + "Acc.booth": 0.5159000015258789, + "Acc.television receiver": 0.7654000091552734, + "Acc.airplane": 0.6508999633789062, + "Acc.dirt track": 0.19209999084472656, + "Acc.apparel": 0.4218000030517578, + "Acc.pole": 0.16870000839233398, + "Acc.land": 0.10710000038146973, + "Acc.bannister": 0.1452000045776367, + "Acc.escalator": 0.402599983215332, + "Acc.ottoman": 0.6145000076293945, + "Acc.bottle": 0.17739999771118165, + "Acc.buffet": 0.6702999877929687, + "Acc.poster": 0.3153000068664551, + "Acc.stage": 0.47639999389648435, + "Acc.van": 0.47299999237060547, + "Acc.ship": 0.6959999847412109, + "Acc.fountain": 0.22030000686645507, + "Acc.conveyer belt": 0.9298999786376954, + "Acc.canopy": 0.3104999923706055, + "Acc.washer": 0.7641999816894531, + "Acc.plaything": 0.35439998626708985, + "Acc.swimming pool": 0.8487000274658203, + "Acc.stool": 0.49630001068115237, + "Acc.barrel": 0.6566000366210938, + "Acc.basket": 0.33740001678466797, + "Acc.waterfall": 0.8691000366210937, + "Acc.tent": 0.9944000244140625, + "Acc.bag": 0.10649999618530273, + "Acc.minibike": 0.7927999877929688, + "Acc.cradle": 0.972699966430664, + "Acc.oven": 0.4609000015258789, + "Acc.ball": 0.4215999984741211, + "Acc.food": 0.5786999893188477, + "Acc.step": 0.12520000457763672, + "Acc.tank": 0.5690000152587891, + "Acc.trade name": 0.2409000015258789, + "Acc.microwave": 0.7805999755859375, + "Acc.pot": 0.3845000076293945, + "Acc.animal": 0.6559999847412109, + "Acc.bicycle": 0.7436000061035156, + "Acc.lake": 0.20110000610351564, + "Acc.dishwasher": 0.6113000106811524, + "Acc.screen": 0.7518000030517578, + "Acc.blanket": 0.1725, + "Acc.sculpture": 0.6225, + "Acc.hood": 0.6124000167846679, + "Acc.sconce": 0.3990000152587891, + "Acc.vase": 0.5033000183105468, + "Acc.traffic light": 0.4972999954223633, + "Acc.tray": 0.10100000381469726, + "Acc.ashcan": 0.5409999847412109, + "Acc.fan": 0.7283999633789062, + "Acc.pier": 0.46169998168945314, + "Acc.crt screen": 0.09220000267028809, + "Acc.plate": 0.6358000183105469, + "Acc.monitor": 0.21989999771118163, + "Acc.bulletin board": 0.7086000061035156, + "Acc.shower": 0.05309999942779541, + "Acc.radiator": 0.6511000061035156, + "Acc.glass": 0.09770000457763672, + "Acc.clock": 0.30809999465942384, + "Acc.flag": 0.6694000244140625 + } + }, + "30": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8148000000000001, + "mIoU": 0.4551, + "mAcc": 0.5852, + "IoU.wall": 0.7554000091552734, + "IoU.building": 0.8176999664306641, + "IoU.sky": 0.9373000335693359, + "IoU.floor": 0.8026999664306641, + "IoU.tree": 0.7258999633789063, + "IoU.ceiling": 0.8241000366210938, + "IoU.road": 0.8091999816894532, + "IoU.bed ": 0.8634999847412109, + "IoU.windowpane": 0.6088000106811523, + "IoU.grass": 0.6691000366210937, + "IoU.cabinet": 0.5738000106811524, + "IoU.sidewalk": 0.6275999832153321, + "IoU.person": 0.7719999694824219, + "IoU.earth": 0.3429000091552734, + "IoU.door": 0.4291999816894531, + "IoU.table": 0.5595999908447266, + "IoU.mountain": 0.5768999862670898, + "IoU.plant": 0.49520000457763674, + "IoU.curtain": 0.7172000122070312, + "IoU.chair": 0.5004000091552734, + "IoU.car": 0.8105000305175781, + "IoU.water": 0.5309999847412109, + "IoU.painting": 0.6818000030517578, + "IoU.sofa": 0.6175, + "IoU.shelf": 0.4268000030517578, + "IoU.house": 0.5091999816894531, + "IoU.sea": 0.5497000122070312, + "IoU.mirror": 0.64, + "IoU.rug": 0.6669000244140625, + "IoU.field": 0.2596999931335449, + "IoU.armchair": 0.3504000091552734, + "IoU.seat": 0.5959000015258789, + "IoU.fence": 0.32880001068115233, + "IoU.desk": 0.43740001678466794, + "IoU.rock": 0.4686000061035156, + "IoU.wardrobe": 0.520099983215332, + "IoU.lamp": 0.5152000045776367, + "IoU.bathtub": 0.8283999633789062, + "IoU.railing": 0.31989999771118166, + "IoU.cushion": 0.5052000045776367, + "IoU.base": 0.3089999961853027, + "IoU.box": 0.22639999389648438, + "IoU.column": 0.4475, + "IoU.signboard": 0.3243999862670898, + "IoU.chest of drawers": 0.31540000915527344, + "IoU.counter": 0.2902000045776367, + "IoU.sand": 0.38029998779296875, + "IoU.sink": 0.6508999633789062, + "IoU.skyscraper": 0.6304000091552734, + "IoU.fireplace": 0.6912000274658203, + "IoU.refrigerator": 0.6858000183105468, + "IoU.grandstand": 0.42880001068115237, + "IoU.path": 0.2359000015258789, + "IoU.stairs": 0.18170000076293946, + "IoU.runway": 0.6976000213623047, + "IoU.case": 0.4784999847412109, + "IoU.pool table": 0.9091999816894532, + "IoU.pillow": 0.489900016784668, + "IoU.screen door": 0.6279000091552734, + "IoU.stairway": 0.23190000534057617, + "IoU.river": 0.15270000457763672, + "IoU.bridge": 0.6370999908447266, + "IoU.bookcase": 0.3047999954223633, + "IoU.blind": 0.36400001525878906, + "IoU.coffee table": 0.5129999923706055, + "IoU.toilet": 0.8080000305175781, + "IoU.flower": 0.3559000015258789, + "IoU.book": 0.412400016784668, + "IoU.hill": 0.0653000020980835, + "IoU.bench": 0.3802000045776367, + "IoU.countertop": 0.539099998474121, + "IoU.stove": 0.6837999725341797, + "IoU.palm": 0.45040000915527345, + "IoU.kitchen island": 0.35439998626708985, + "IoU.computer": 0.6437999725341796, + "IoU.swivel chair": 0.4308000183105469, + "IoU.boat": 0.581500015258789, + "IoU.bar": 0.5097000122070312, + "IoU.arcade machine": 0.4438000106811523, + "IoU.hovel": 0.44040000915527344, + "IoU.bus": 0.8072000122070313, + "IoU.towel": 0.5752000045776368, + "IoU.light": 0.40810001373291016, + "IoU.truck": 0.2560000038146973, + "IoU.tower": 0.3315000152587891, + "IoU.chandelier": 0.5786999893188477, + "IoU.awning": 0.276299991607666, + "IoU.streetlight": 0.17639999389648436, + "IoU.booth": 0.4325, + "IoU.television receiver": 0.6111999893188477, + "IoU.airplane": 0.5790999984741211, + "IoU.dirt track": 0.105600004196167, + "IoU.apparel": 0.3168000030517578, + "IoU.pole": 0.13350000381469726, + "IoU.land": 0.07199999809265137, + "IoU.bannister": 0.05429999828338623, + "IoU.escalator": 0.2627000045776367, + "IoU.ottoman": 0.40279998779296877, + "IoU.bottle": 0.24829999923706056, + "IoU.buffet": 0.5861000061035156, + "IoU.poster": 0.30989999771118165, + "IoU.stage": 0.13149999618530273, + "IoU.van": 0.3893000030517578, + "IoU.ship": 0.8920999908447266, + "IoU.fountain": 0.1386999988555908, + "IoU.conveyer belt": 0.6587000274658203, + "IoU.canopy": 0.24239999771118165, + "IoU.washer": 0.6983000183105469, + "IoU.plaything": 0.205, + "IoU.swimming pool": 0.5540000152587891, + "IoU.stool": 0.2965999984741211, + "IoU.barrel": 0.5356999969482422, + "IoU.basket": 0.17879999160766602, + "IoU.waterfall": 0.5438000106811524, + "IoU.tent": 0.8794000244140625, + "IoU.bag": 0.13829999923706054, + "IoU.minibike": 0.5968999862670898, + "IoU.cradle": 0.7909999847412109, + "IoU.oven": 0.1634000015258789, + "IoU.ball": 0.4966999816894531, + "IoU.food": 0.5256999969482422, + "IoU.step": 0.11270000457763672, + "IoU.tank": 0.5109999847412109, + "IoU.trade name": 0.19799999237060548, + "IoU.microwave": 0.3654999923706055, + "IoU.pot": 0.3502999877929687, + "IoU.animal": 0.5418999862670898, + "IoU.bicycle": 0.5243999862670898, + "IoU.lake": 0.5663999938964843, + "IoU.dishwasher": 0.42270000457763673, + "IoU.screen": 0.5793999862670899, + "IoU.blanket": 0.09819999694824219, + "IoU.sculpture": 0.49529998779296874, + "IoU.hood": 0.5045000076293945, + "IoU.sconce": 0.26540000915527345, + "IoU.vase": 0.2919000053405762, + "IoU.traffic light": 0.2525, + "IoU.tray": 0.03319999933242798, + "IoU.ashcan": 0.35279998779296873, + "IoU.fan": 0.47139999389648435, + "IoU.pier": 0.23280000686645508, + "IoU.crt screen": 0.027200000286102297, + "IoU.plate": 0.4659000015258789, + "IoU.monitor": 0.1477999973297119, + "IoU.bulletin board": 0.39279998779296876, + "IoU.shower": 0.016299999952316284, + "IoU.radiator": 0.5479000091552735, + "IoU.glass": 0.06739999771118164, + "IoU.clock": 0.2475, + "IoU.flag": 0.32599998474121095, + "Acc.wall": 0.8687999725341797, + "Acc.building": 0.9211000061035156, + "Acc.sky": 0.9604000091552735, + "Acc.floor": 0.889000015258789, + "Acc.tree": 0.884000015258789, + "Acc.ceiling": 0.8854000091552734, + "Acc.road": 0.8729000091552734, + "Acc.bed ": 0.9597000122070313, + "Acc.windowpane": 0.767300033569336, + "Acc.grass": 0.8190000152587891, + "Acc.cabinet": 0.7108999633789063, + "Acc.sidewalk": 0.8112999725341797, + "Acc.person": 0.91, + "Acc.earth": 0.4743000030517578, + "Acc.door": 0.581500015258789, + "Acc.table": 0.7140000152587891, + "Acc.mountain": 0.7279000091552734, + "Acc.plant": 0.594900016784668, + "Acc.curtain": 0.8569999694824219, + "Acc.chair": 0.6437999725341796, + "Acc.car": 0.9187000274658204, + "Acc.water": 0.6454000091552734, + "Acc.painting": 0.8294999694824219, + "Acc.sofa": 0.8219999694824218, + "Acc.shelf": 0.5943000030517578, + "Acc.house": 0.639900016784668, + "Acc.sea": 0.7445999908447266, + "Acc.mirror": 0.7602999877929687, + "Acc.rug": 0.7641999816894531, + "Acc.field": 0.4143000030517578, + "Acc.armchair": 0.4804000091552734, + "Acc.seat": 0.8233000183105469, + "Acc.fence": 0.4518000030517578, + "Acc.desk": 0.7433999633789062, + "Acc.rock": 0.6206999969482422, + "Acc.wardrobe": 0.7448999786376953, + "Acc.lamp": 0.7040000152587891, + "Acc.bathtub": 0.8997000122070312, + "Acc.railing": 0.42889999389648437, + "Acc.cushion": 0.6345999908447265, + "Acc.base": 0.5831999969482422, + "Acc.box": 0.281200008392334, + "Acc.column": 0.5493999862670899, + "Acc.signboard": 0.41119998931884766, + "Acc.chest of drawers": 0.6229000091552734, + "Acc.counter": 0.3990999984741211, + "Acc.sand": 0.6002999877929688, + "Acc.sink": 0.7323999786376953, + "Acc.skyscraper": 0.795, + "Acc.fireplace": 0.8693000030517578, + "Acc.refrigerator": 0.8205000305175781, + "Acc.grandstand": 0.7343000030517578, + "Acc.path": 0.37020000457763674, + "Acc.stairs": 0.2781999969482422, + "Acc.runway": 0.9083999633789063, + "Acc.case": 0.6595999908447265, + "Acc.pool table": 0.9716999816894532, + "Acc.pillow": 0.5743999862670899, + "Acc.screen door": 0.7362999725341797, + "Acc.stairway": 0.35380001068115235, + "Acc.river": 0.40450000762939453, + "Acc.bridge": 0.7965000152587891, + "Acc.bookcase": 0.4647000122070313, + "Acc.blind": 0.4097999954223633, + "Acc.coffee table": 0.8570999908447265, + "Acc.toilet": 0.8920999908447266, + "Acc.flower": 0.5190999984741211, + "Acc.book": 0.5834000015258789, + "Acc.hill": 0.11569999694824219, + "Acc.bench": 0.4906000137329102, + "Acc.countertop": 0.699800033569336, + "Acc.stove": 0.8172000122070312, + "Acc.palm": 0.7, + "Acc.kitchen island": 0.7555999755859375, + "Acc.computer": 0.8004000091552734, + "Acc.swivel chair": 0.5943999862670899, + "Acc.boat": 0.8411000061035157, + "Acc.bar": 0.715, + "Acc.arcade machine": 0.5052000045776367, + "Acc.hovel": 0.5036000061035156, + "Acc.bus": 0.9169000244140625, + "Acc.towel": 0.7323000335693359, + "Acc.light": 0.47869998931884766, + "Acc.truck": 0.34889999389648435, + "Acc.tower": 0.43439998626708987, + "Acc.chandelier": 0.7525, + "Acc.awning": 0.3327000045776367, + "Acc.streetlight": 0.2338999938964844, + "Acc.booth": 0.5315999984741211, + "Acc.television receiver": 0.7541999816894531, + "Acc.airplane": 0.6766000366210938, + "Acc.dirt track": 0.1515999984741211, + "Acc.apparel": 0.4402000045776367, + "Acc.pole": 0.15729999542236328, + "Acc.land": 0.13300000190734862, + "Acc.bannister": 0.09310000419616699, + "Acc.escalator": 0.3014999961853027, + "Acc.ottoman": 0.6459999847412109, + "Acc.bottle": 0.32209999084472657, + "Acc.buffet": 0.7705999755859375, + "Acc.poster": 0.37659999847412107, + "Acc.stage": 0.42770000457763674, + "Acc.van": 0.467599983215332, + "Acc.ship": 0.9408000183105468, + "Acc.fountain": 0.1438000011444092, + "Acc.conveyer belt": 0.915199966430664, + "Acc.canopy": 0.31670000076293947, + "Acc.washer": 0.7272000122070312, + "Acc.plaything": 0.31860000610351563, + "Acc.swimming pool": 0.8016999816894531, + "Acc.stool": 0.44040000915527344, + "Acc.barrel": 0.6483000183105468, + "Acc.basket": 0.25190000534057616, + "Acc.waterfall": 0.6981999969482422, + "Acc.tent": 0.9937000274658203, + "Acc.bag": 0.16139999389648438, + "Acc.minibike": 0.7455000305175781, + "Acc.cradle": 0.9673000335693359, + "Acc.oven": 0.4372999954223633, + "Acc.ball": 0.6109000015258789, + "Acc.food": 0.597599983215332, + "Acc.step": 0.1477000045776367, + "Acc.tank": 0.6109000015258789, + "Acc.trade name": 0.20899999618530274, + "Acc.microwave": 0.40849998474121096, + "Acc.pot": 0.4047999954223633, + "Acc.animal": 0.5829000091552734, + "Acc.bicycle": 0.7291000366210938, + "Acc.lake": 0.7577999877929688, + "Acc.dishwasher": 0.5308000183105469, + "Acc.screen": 0.8801999664306641, + "Acc.blanket": 0.10930000305175781, + "Acc.sculpture": 0.5991999816894531, + "Acc.hood": 0.5402999877929687, + "Acc.sconce": 0.33060001373291015, + "Acc.vase": 0.4375, + "Acc.traffic light": 0.45229999542236327, + "Acc.tray": 0.05, + "Acc.ashcan": 0.4622999954223633, + "Acc.fan": 0.6930000305175781, + "Acc.pier": 0.5518999862670898, + "Acc.crt screen": 0.08789999961853028, + "Acc.plate": 0.6027000045776367, + "Acc.monitor": 0.17920000076293946, + "Acc.bulletin board": 0.5656000137329101, + "Acc.shower": 0.044499998092651365, + "Acc.radiator": 0.6531999969482422, + "Acc.glass": 0.07309999942779541, + "Acc.clock": 0.28329999923706056, + "Acc.flag": 0.36520000457763674 + } + }, + "31": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8149, + "mIoU": 0.45030000000000003, + "mAcc": 0.5847, + "IoU.wall": 0.7551999664306641, + "IoU.building": 0.8175, + "IoU.sky": 0.9376000213623047, + "IoU.floor": 0.8047000122070312, + "IoU.tree": 0.7319000244140625, + "IoU.ceiling": 0.8261000061035156, + "IoU.road": 0.8141999816894532, + "IoU.bed ": 0.8669000244140626, + "IoU.windowpane": 0.6013999938964844, + "IoU.grass": 0.6459999847412109, + "IoU.cabinet": 0.599900016784668, + "IoU.sidewalk": 0.6236999893188476, + "IoU.person": 0.7709999847412109, + "IoU.earth": 0.3447000122070312, + "IoU.door": 0.42790000915527343, + "IoU.table": 0.522400016784668, + "IoU.mountain": 0.5768999862670898, + "IoU.plant": 0.49740001678466794, + "IoU.curtain": 0.7123000335693359, + "IoU.chair": 0.49779998779296875, + "IoU.car": 0.8070999908447266, + "IoU.water": 0.5433000183105469, + "IoU.painting": 0.6633999633789063, + "IoU.sofa": 0.6129999923706054, + "IoU.shelf": 0.41619998931884766, + "IoU.house": 0.48270000457763673, + "IoU.sea": 0.6040000152587891, + "IoU.mirror": 0.6466000366210938, + "IoU.rug": 0.6487999725341796, + "IoU.field": 0.2755999946594238, + "IoU.armchair": 0.3736000061035156, + "IoU.seat": 0.5943999862670899, + "IoU.fence": 0.3175, + "IoU.desk": 0.41619998931884766, + "IoU.rock": 0.44110000610351563, + "IoU.wardrobe": 0.5165000152587891, + "IoU.lamp": 0.5145000076293945, + "IoU.bathtub": 0.7901000213623047, + "IoU.railing": 0.30840000152587893, + "IoU.cushion": 0.5211999893188477, + "IoU.base": 0.29209999084472654, + "IoU.box": 0.21549999237060546, + "IoU.column": 0.4322999954223633, + "IoU.signboard": 0.3325, + "IoU.chest of drawers": 0.39290000915527346, + "IoU.counter": 0.2593000030517578, + "IoU.sand": 0.3493000030517578, + "IoU.sink": 0.6669999694824219, + "IoU.skyscraper": 0.614900016784668, + "IoU.fireplace": 0.6969999694824218, + "IoU.refrigerator": 0.6945999908447266, + "IoU.grandstand": 0.44110000610351563, + "IoU.path": 0.24159999847412109, + "IoU.stairs": 0.25420000076293947, + "IoU.runway": 0.6494999694824218, + "IoU.case": 0.5072000122070313, + "IoU.pool table": 0.9138999938964844, + "IoU.pillow": 0.5231000137329102, + "IoU.screen door": 0.6583999633789063, + "IoU.stairway": 0.2709000015258789, + "IoU.river": 0.25299999237060544, + "IoU.bridge": 0.6243000030517578, + "IoU.bookcase": 0.31979999542236326, + "IoU.blind": 0.39689998626708983, + "IoU.coffee table": 0.5277000045776368, + "IoU.toilet": 0.7219999694824218, + "IoU.flower": 0.327599983215332, + "IoU.book": 0.4204000091552734, + "IoU.hill": 0.055300002098083494, + "IoU.bench": 0.38029998779296875, + "IoU.countertop": 0.5722000122070312, + "IoU.stove": 0.6562999725341797, + "IoU.palm": 0.4527000045776367, + "IoU.kitchen island": 0.2940999984741211, + "IoU.computer": 0.6745999908447265, + "IoU.swivel chair": 0.37909999847412107, + "IoU.boat": 0.6415000152587891, + "IoU.bar": 0.4754999923706055, + "IoU.arcade machine": 0.5761000061035156, + "IoU.hovel": 0.46310001373291015, + "IoU.bus": 0.7687999725341796, + "IoU.towel": 0.5604000091552734, + "IoU.light": 0.4006999969482422, + "IoU.truck": 0.19829999923706054, + "IoU.tower": 0.36009998321533204, + "IoU.chandelier": 0.5934999847412109, + "IoU.awning": 0.33099998474121095, + "IoU.streetlight": 0.1777000045776367, + "IoU.booth": 0.41650001525878905, + "IoU.television receiver": 0.6131000137329101, + "IoU.airplane": 0.5452000045776367, + "IoU.dirt track": 0.2059000015258789, + "IoU.apparel": 0.3359000015258789, + "IoU.pole": 0.1402999973297119, + "IoU.land": 0.0990999984741211, + "IoU.bannister": 0.09130000114440918, + "IoU.escalator": 0.2520000076293945, + "IoU.ottoman": 0.46119998931884765, + "IoU.bottle": 0.335, + "IoU.buffet": 0.6487000274658203, + "IoU.poster": 0.2875, + "IoU.stage": 0.1463000011444092, + "IoU.van": 0.3975, + "IoU.ship": 0.7358999633789063, + "IoU.fountain": 0.211200008392334, + "IoU.conveyer belt": 0.6720999908447266, + "IoU.canopy": 0.25, + "IoU.washer": 0.6912999725341797, + "IoU.plaything": 0.23340000152587892, + "IoU.swimming pool": 0.5286000061035157, + "IoU.stool": 0.2306999969482422, + "IoU.barrel": 0.27860000610351565, + "IoU.basket": 0.17420000076293946, + "IoU.waterfall": 0.535, + "IoU.tent": 0.822699966430664, + "IoU.bag": 0.08640000343322754, + "IoU.minibike": 0.5245000076293945, + "IoU.cradle": 0.768499984741211, + "IoU.oven": 0.14550000190734863, + "IoU.ball": 0.3915000152587891, + "IoU.food": 0.504000015258789, + "IoU.step": 0.11699999809265137, + "IoU.tank": 0.5004999923706055, + "IoU.trade name": 0.25489999771118166, + "IoU.microwave": 0.3440999984741211, + "IoU.pot": 0.3540999984741211, + "IoU.animal": 0.5481000137329102, + "IoU.bicycle": 0.4640000152587891, + "IoU.lake": 0.4077999877929688, + "IoU.dishwasher": 0.4834999847412109, + "IoU.screen": 0.6334000015258789, + "IoU.blanket": 0.08, + "IoU.sculpture": 0.4084000015258789, + "IoU.hood": 0.49540000915527344, + "IoU.sconce": 0.2781999969482422, + "IoU.vase": 0.2590999984741211, + "IoU.traffic light": 0.2520000076293945, + "IoU.tray": 0.03960000038146973, + "IoU.ashcan": 0.2734000015258789, + "IoU.fan": 0.45430000305175783, + "IoU.pier": 0.22989999771118164, + "IoU.crt screen": 0.03140000104904175, + "IoU.plate": 0.43909999847412107, + "IoU.monitor": 0.025399999618530275, + "IoU.bulletin board": 0.38729999542236326, + "IoU.shower": 0.0064999997615814206, + "IoU.radiator": 0.5443000030517579, + "IoU.glass": 0.07369999885559082, + "IoU.clock": 0.20329999923706055, + "IoU.flag": 0.3208000183105469, + "Acc.wall": 0.8666000366210938, + "Acc.building": 0.9187000274658204, + "Acc.sky": 0.9594000244140625, + "Acc.floor": 0.8869999694824219, + "Acc.tree": 0.8908999633789062, + "Acc.ceiling": 0.889000015258789, + "Acc.road": 0.8854000091552734, + "Acc.bed ": 0.9566999816894531, + "Acc.windowpane": 0.7565000152587891, + "Acc.grass": 0.8026000213623047, + "Acc.cabinet": 0.739800033569336, + "Acc.sidewalk": 0.8022000122070313, + "Acc.person": 0.9138999938964844, + "Acc.earth": 0.46490001678466797, + "Acc.door": 0.5700999832153321, + "Acc.table": 0.6770999908447266, + "Acc.mountain": 0.7456999969482422, + "Acc.plant": 0.5915999984741211, + "Acc.curtain": 0.8520999908447265, + "Acc.chair": 0.6537000274658203, + "Acc.car": 0.9094999694824218, + "Acc.water": 0.6693000030517579, + "Acc.painting": 0.8563999938964844, + "Acc.sofa": 0.8073999786376953, + "Acc.shelf": 0.5629000091552734, + "Acc.house": 0.6093000030517578, + "Acc.sea": 0.8238999938964844, + "Acc.mirror": 0.7562999725341797, + "Acc.rug": 0.7787000274658203, + "Acc.field": 0.4834000015258789, + "Acc.armchair": 0.5441999816894532, + "Acc.seat": 0.8179000091552734, + "Acc.fence": 0.4413000106811523, + "Acc.desk": 0.7169999694824218, + "Acc.rock": 0.59, + "Acc.wardrobe": 0.6888999938964844, + "Acc.lamp": 0.7047000122070313, + "Acc.bathtub": 0.8440000152587891, + "Acc.railing": 0.42450000762939455, + "Acc.cushion": 0.6522000122070313, + "Acc.base": 0.5559000015258789, + "Acc.box": 0.27610000610351565, + "Acc.column": 0.5436999893188477, + "Acc.signboard": 0.43470001220703125, + "Acc.chest of drawers": 0.5947000122070313, + "Acc.counter": 0.36939998626708986, + "Acc.sand": 0.6161000061035157, + "Acc.sink": 0.7468000030517579, + "Acc.skyscraper": 0.7737000274658203, + "Acc.fireplace": 0.8747000122070312, + "Acc.refrigerator": 0.8590000152587891, + "Acc.grandstand": 0.712699966430664, + "Acc.path": 0.3675, + "Acc.stairs": 0.36459999084472655, + "Acc.runway": 0.8479000091552734, + "Acc.case": 0.6795999908447266, + "Acc.pool table": 0.9720999908447265, + "Acc.pillow": 0.6118000030517579, + "Acc.screen door": 0.7987999725341797, + "Acc.stairway": 0.39669998168945314, + "Acc.river": 0.5209000015258789, + "Acc.bridge": 0.8287000274658203, + "Acc.bookcase": 0.4697000122070312, + "Acc.blind": 0.47130001068115235, + "Acc.coffee table": 0.8394999694824219, + "Acc.toilet": 0.9, + "Acc.flower": 0.5070000076293946, + "Acc.book": 0.6145000076293945, + "Acc.hill": 0.10420000076293945, + "Acc.bench": 0.46380001068115234, + "Acc.countertop": 0.7293000030517578, + "Acc.stove": 0.8033999633789063, + "Acc.palm": 0.69, + "Acc.kitchen island": 0.6797000122070312, + "Acc.computer": 0.8488999938964844, + "Acc.swivel chair": 0.5084999847412109, + "Acc.boat": 0.8548999786376953, + "Acc.bar": 0.6161000061035157, + "Acc.arcade machine": 0.6848000335693359, + "Acc.hovel": 0.5202000045776367, + "Acc.bus": 0.8880000305175781, + "Acc.towel": 0.7241000366210938, + "Acc.light": 0.48720001220703124, + "Acc.truck": 0.2861000061035156, + "Acc.tower": 0.5461000061035156, + "Acc.chandelier": 0.7463999938964844, + "Acc.awning": 0.4122999954223633, + "Acc.streetlight": 0.2459000015258789, + "Acc.booth": 0.535, + "Acc.television receiver": 0.7594000244140625, + "Acc.airplane": 0.6493000030517578, + "Acc.dirt track": 0.2720999908447266, + "Acc.apparel": 0.47700000762939454, + "Acc.pole": 0.18489999771118165, + "Acc.land": 0.16940000534057617, + "Acc.bannister": 0.13189999580383302, + "Acc.escalator": 0.283799991607666, + "Acc.ottoman": 0.6737999725341797, + "Acc.bottle": 0.5299000167846679, + "Acc.buffet": 0.774000015258789, + "Acc.poster": 0.3452000045776367, + "Acc.stage": 0.37490001678466794, + "Acc.van": 0.46, + "Acc.ship": 0.7501000213623047, + "Acc.fountain": 0.21760000228881837, + "Acc.conveyer belt": 0.9248000335693359, + "Acc.canopy": 0.3009000015258789, + "Acc.washer": 0.7229000091552734, + "Acc.plaything": 0.3486000061035156, + "Acc.swimming pool": 0.8234999847412109, + "Acc.stool": 0.34299999237060547, + "Acc.barrel": 0.6481999969482422, + "Acc.basket": 0.23969999313354493, + "Acc.waterfall": 0.6293000030517578, + "Acc.tent": 0.9937999725341797, + "Acc.bag": 0.09569999694824219, + "Acc.minibike": 0.6526000213623047, + "Acc.cradle": 0.9687999725341797, + "Acc.oven": 0.38119998931884763, + "Acc.ball": 0.46580001831054685, + "Acc.food": 0.582599983215332, + "Acc.step": 0.14359999656677247, + "Acc.tank": 0.5974000167846679, + "Acc.trade name": 0.29069999694824217, + "Acc.microwave": 0.39310001373291015, + "Acc.pot": 0.4127999877929687, + "Acc.animal": 0.6036000061035156, + "Acc.bicycle": 0.7387000274658203, + "Acc.lake": 0.517400016784668, + "Acc.dishwasher": 0.6297999954223633, + "Acc.screen": 0.9022000122070313, + "Acc.blanket": 0.090600004196167, + "Acc.sculpture": 0.6029999923706054, + "Acc.hood": 0.5688999938964844, + "Acc.sconce": 0.37470001220703125, + "Acc.vase": 0.40490001678466797, + "Acc.traffic light": 0.49540000915527344, + "Acc.tray": 0.058699998855590824, + "Acc.ashcan": 0.3681999969482422, + "Acc.fan": 0.7287000274658203, + "Acc.pier": 0.5265000152587891, + "Acc.crt screen": 0.10689999580383301, + "Acc.plate": 0.5845999908447266, + "Acc.monitor": 0.02700000047683716, + "Acc.bulletin board": 0.5747000122070313, + "Acc.shower": 0.017699999809265135, + "Acc.radiator": 0.6631999969482422, + "Acc.glass": 0.08090000152587891, + "Acc.clock": 0.23579999923706055, + "Acc.flag": 0.35770000457763673 + } + }, + "32": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8131, + "mIoU": 0.4382, + "mAcc": 0.5659000000000001, + "IoU.wall": 0.7545999908447265, + "IoU.building": 0.8108000183105468, + "IoU.sky": 0.939800033569336, + "IoU.floor": 0.8059999847412109, + "IoU.tree": 0.7344000244140625, + "IoU.ceiling": 0.8252999877929688, + "IoU.road": 0.8077999877929688, + "IoU.bed ": 0.8683999633789062, + "IoU.windowpane": 0.6047000122070313, + "IoU.grass": 0.6693000030517579, + "IoU.cabinet": 0.5908000183105468, + "IoU.sidewalk": 0.624000015258789, + "IoU.person": 0.7748999786376953, + "IoU.earth": 0.3534999847412109, + "IoU.door": 0.4509000015258789, + "IoU.table": 0.5415999984741211, + "IoU.mountain": 0.585999984741211, + "IoU.plant": 0.49270000457763674, + "IoU.curtain": 0.7144000244140625, + "IoU.chair": 0.49389999389648437, + "IoU.car": 0.7941999816894532, + "IoU.water": 0.4961999893188477, + "IoU.painting": 0.6644999694824218, + "IoU.sofa": 0.6245000076293945, + "IoU.shelf": 0.4154999923706055, + "IoU.house": 0.42069999694824217, + "IoU.sea": 0.5415999984741211, + "IoU.mirror": 0.640199966430664, + "IoU.rug": 0.6641000366210937, + "IoU.field": 0.29850000381469727, + "IoU.armchair": 0.3902000045776367, + "IoU.seat": 0.6074000167846679, + "IoU.fence": 0.3372000122070313, + "IoU.desk": 0.3990000152587891, + "IoU.rock": 0.3895999908447266, + "IoU.wardrobe": 0.46349998474121096, + "IoU.lamp": 0.5086999893188476, + "IoU.bathtub": 0.8076999664306641, + "IoU.railing": 0.31659999847412107, + "IoU.cushion": 0.5084000015258789, + "IoU.base": 0.2713999938964844, + "IoU.box": 0.21979999542236328, + "IoU.column": 0.43150001525878906, + "IoU.signboard": 0.33599998474121096, + "IoU.chest of drawers": 0.38220001220703126, + "IoU.counter": 0.2665999984741211, + "IoU.sand": 0.3593000030517578, + "IoU.sink": 0.6295999908447265, + "IoU.skyscraper": 0.5252000045776367, + "IoU.fireplace": 0.7068000030517578, + "IoU.refrigerator": 0.7112999725341796, + "IoU.grandstand": 0.4061000061035156, + "IoU.path": 0.2478000068664551, + "IoU.stairs": 0.26799999237060546, + "IoU.runway": 0.6922000122070312, + "IoU.case": 0.4834000015258789, + "IoU.pool table": 0.8875, + "IoU.pillow": 0.5179000091552735, + "IoU.screen door": 0.66, + "IoU.stairway": 0.28530000686645507, + "IoU.river": 0.14109999656677247, + "IoU.bridge": 0.6118999862670899, + "IoU.bookcase": 0.29850000381469727, + "IoU.blind": 0.3397000122070313, + "IoU.coffee table": 0.5327000045776367, + "IoU.toilet": 0.7573999786376953, + "IoU.flower": 0.335099983215332, + "IoU.book": 0.42150001525878905, + "IoU.hill": 0.09100000381469726, + "IoU.bench": 0.46169998168945314, + "IoU.countertop": 0.574900016784668, + "IoU.stove": 0.6231000137329101, + "IoU.palm": 0.46299999237060546, + "IoU.kitchen island": 0.3372000122070313, + "IoU.computer": 0.5608000183105468, + "IoU.swivel chair": 0.42060001373291017, + "IoU.boat": 0.642300033569336, + "IoU.bar": 0.46169998168945314, + "IoU.arcade machine": 0.38729999542236326, + "IoU.hovel": 0.27260000228881837, + "IoU.bus": 0.7697000122070312, + "IoU.towel": 0.5425, + "IoU.light": 0.42279998779296873, + "IoU.truck": 0.14899999618530274, + "IoU.tower": 0.36529998779296874, + "IoU.chandelier": 0.575, + "IoU.awning": 0.37689998626708987, + "IoU.streetlight": 0.17940000534057618, + "IoU.booth": 0.5113000106811524, + "IoU.television receiver": 0.6216999816894532, + "IoU.airplane": 0.599900016784668, + "IoU.dirt track": 0.332599983215332, + "IoU.apparel": 0.2813999938964844, + "IoU.pole": 0.176200008392334, + "IoU.land": 0.026099998950958252, + "IoU.bannister": 0.11079999923706055, + "IoU.escalator": 0.303799991607666, + "IoU.ottoman": 0.45729999542236327, + "IoU.bottle": 0.13960000038146972, + "IoU.buffet": 0.5720999908447265, + "IoU.poster": 0.26780000686645505, + "IoU.stage": 0.12710000038146974, + "IoU.van": 0.3733000183105469, + "IoU.ship": 0.6383000183105468, + "IoU.fountain": 0.16860000610351564, + "IoU.conveyer belt": 0.5761000061035156, + "IoU.canopy": 0.2193000030517578, + "IoU.washer": 0.6341999816894531, + "IoU.plaything": 0.26059999465942385, + "IoU.swimming pool": 0.5463999938964844, + "IoU.stool": 0.2209000015258789, + "IoU.barrel": 0.05760000228881836, + "IoU.basket": 0.17489999771118164, + "IoU.waterfall": 0.5558000183105469, + "IoU.tent": 0.8172000122070312, + "IoU.bag": 0.07920000076293945, + "IoU.minibike": 0.6134000015258789, + "IoU.cradle": 0.7520999908447266, + "IoU.oven": 0.15539999961853027, + "IoU.ball": 0.3775, + "IoU.food": 0.5447999954223632, + "IoU.step": 0.14170000076293945, + "IoU.tank": 0.47299999237060547, + "IoU.trade name": 0.19360000610351563, + "IoU.microwave": 0.3240000152587891, + "IoU.pot": 0.335099983215332, + "IoU.animal": 0.539000015258789, + "IoU.bicycle": 0.5068999862670899, + "IoU.lake": 0.0856999969482422, + "IoU.dishwasher": 0.42270000457763673, + "IoU.screen": 0.6197999954223633, + "IoU.blanket": 0.05400000095367432, + "IoU.sculpture": 0.4634000015258789, + "IoU.hood": 0.4377000045776367, + "IoU.sconce": 0.2695000076293945, + "IoU.vase": 0.24159999847412109, + "IoU.traffic light": 0.27, + "IoU.tray": 0.05369999885559082, + "IoU.ashcan": 0.2418000030517578, + "IoU.fan": 0.43470001220703125, + "IoU.pier": 0.30639999389648437, + "IoU.crt screen": 0.014199999570846557, + "IoU.plate": 0.41220001220703123, + "IoU.monitor": 0.024600000381469728, + "IoU.bulletin board": 0.2902000045776367, + "IoU.shower": 0.027799999713897704, + "IoU.radiator": 0.5365000152587891, + "IoU.glass": 0.06400000095367432, + "IoU.clock": 0.16229999542236329, + "IoU.flag": 0.35380001068115235, + "Acc.wall": 0.8627999877929687, + "Acc.building": 0.9187000274658204, + "Acc.sky": 0.9638999938964844, + "Acc.floor": 0.8931999969482421, + "Acc.tree": 0.8837000274658203, + "Acc.ceiling": 0.8816999816894531, + "Acc.road": 0.8798000335693359, + "Acc.bed ": 0.9575, + "Acc.windowpane": 0.7655999755859375, + "Acc.grass": 0.8262000274658203, + "Acc.cabinet": 0.7220999908447265, + "Acc.sidewalk": 0.8219000244140625, + "Acc.person": 0.9126000213623047, + "Acc.earth": 0.4779999923706055, + "Acc.door": 0.6177000045776367, + "Acc.table": 0.6987000274658203, + "Acc.mountain": 0.7295999908447266, + "Acc.plant": 0.5834999847412109, + "Acc.curtain": 0.8601000213623047, + "Acc.chair": 0.6433999633789063, + "Acc.car": 0.9151000213623047, + "Acc.water": 0.6419000244140625, + "Acc.painting": 0.865, + "Acc.sofa": 0.7879000091552735, + "Acc.shelf": 0.590900001525879, + "Acc.house": 0.5502999877929687, + "Acc.sea": 0.783499984741211, + "Acc.mirror": 0.7704000091552734, + "Acc.rug": 0.7758000183105469, + "Acc.field": 0.5018999862670899, + "Acc.armchair": 0.6090000152587891, + "Acc.seat": 0.8144000244140625, + "Acc.fence": 0.4868000030517578, + "Acc.desk": 0.7306999969482422, + "Acc.rock": 0.5968000030517578, + "Acc.wardrobe": 0.6648000335693359, + "Acc.lamp": 0.6775, + "Acc.bathtub": 0.8673000335693359, + "Acc.railing": 0.44540000915527345, + "Acc.cushion": 0.6286999893188476, + "Acc.base": 0.5183000183105468, + "Acc.box": 0.28100000381469725, + "Acc.column": 0.5356999969482422, + "Acc.signboard": 0.4645999908447266, + "Acc.chest of drawers": 0.5377999877929688, + "Acc.counter": 0.37849998474121094, + "Acc.sand": 0.5690000152587891, + "Acc.sink": 0.7286000061035156, + "Acc.skyscraper": 0.664000015258789, + "Acc.fireplace": 0.8855999755859375, + "Acc.refrigerator": 0.8401000213623047, + "Acc.grandstand": 0.6873999786376953, + "Acc.path": 0.3375, + "Acc.stairs": 0.37849998474121094, + "Acc.runway": 0.8630000305175781, + "Acc.case": 0.6020999908447265, + "Acc.pool table": 0.9694000244140625, + "Acc.pillow": 0.6072000122070312, + "Acc.screen door": 0.765199966430664, + "Acc.stairway": 0.40099998474121096, + "Acc.river": 0.2642000007629395, + "Acc.bridge": 0.8252999877929688, + "Acc.bookcase": 0.4752000045776367, + "Acc.blind": 0.39130001068115233, + "Acc.coffee table": 0.8505000305175782, + "Acc.toilet": 0.888499984741211, + "Acc.flower": 0.5479000091552735, + "Acc.book": 0.6122999954223632, + "Acc.hill": 0.1775, + "Acc.bench": 0.5336999893188477, + "Acc.countertop": 0.7370999908447265, + "Acc.stove": 0.808499984741211, + "Acc.palm": 0.6966999816894531, + "Acc.kitchen island": 0.6993000030517578, + "Acc.computer": 0.7158999633789063, + "Acc.swivel chair": 0.6020999908447265, + "Acc.boat": 0.8297000122070313, + "Acc.bar": 0.5991999816894531, + "Acc.arcade machine": 0.45549999237060546, + "Acc.hovel": 0.3691999816894531, + "Acc.bus": 0.8954000091552734, + "Acc.towel": 0.6894999694824219, + "Acc.light": 0.5265000152587891, + "Acc.truck": 0.21319999694824218, + "Acc.tower": 0.5470000076293945, + "Acc.chandelier": 0.7855999755859375, + "Acc.awning": 0.472400016784668, + "Acc.streetlight": 0.24309999465942383, + "Acc.booth": 0.5702000045776368, + "Acc.television receiver": 0.7865000152587891, + "Acc.airplane": 0.7113999938964843, + "Acc.dirt track": 0.402599983215332, + "Acc.apparel": 0.40029998779296877, + "Acc.pole": 0.22620000839233398, + "Acc.land": 0.04710000038146973, + "Acc.bannister": 0.15760000228881835, + "Acc.escalator": 0.3754999923706055, + "Acc.ottoman": 0.668499984741211, + "Acc.bottle": 0.16719999313354492, + "Acc.buffet": 0.7055000305175781, + "Acc.poster": 0.3057999992370605, + "Acc.stage": 0.34639999389648435, + "Acc.van": 0.43340000152587893, + "Acc.ship": 0.6744999694824219, + "Acc.fountain": 0.17309999465942383, + "Acc.conveyer belt": 0.8011000061035156, + "Acc.canopy": 0.28399999618530275, + "Acc.washer": 0.6920999908447265, + "Acc.plaything": 0.41700000762939454, + "Acc.swimming pool": 0.7837000274658203, + "Acc.stool": 0.31059999465942384, + "Acc.barrel": 0.2384000015258789, + "Acc.basket": 0.21969999313354494, + "Acc.waterfall": 0.6329999923706054, + "Acc.tent": 0.9944999694824219, + "Acc.bag": 0.09329999923706055, + "Acc.minibike": 0.7820999908447266, + "Acc.cradle": 0.9587000274658203, + "Acc.oven": 0.3818000030517578, + "Acc.ball": 0.4815999984741211, + "Acc.food": 0.6170000076293946, + "Acc.step": 0.16209999084472657, + "Acc.tank": 0.5377999877929688, + "Acc.trade name": 0.21600000381469728, + "Acc.microwave": 0.3579000091552734, + "Acc.pot": 0.39360000610351564, + "Acc.animal": 0.6052999877929688, + "Acc.bicycle": 0.6823000335693359, + "Acc.lake": 0.13789999961853028, + "Acc.dishwasher": 0.5697999954223633, + "Acc.screen": 0.8455999755859375, + "Acc.blanket": 0.06119999885559082, + "Acc.sculpture": 0.6363999938964844, + "Acc.hood": 0.48389999389648436, + "Acc.sconce": 0.344900016784668, + "Acc.vase": 0.3818000030517578, + "Acc.traffic light": 0.47299999237060547, + "Acc.tray": 0.09039999961853028, + "Acc.ashcan": 0.3716999816894531, + "Acc.fan": 0.7137000274658203, + "Acc.pier": 0.48319999694824217, + "Acc.crt screen": 0.04389999866485596, + "Acc.plate": 0.5654999923706054, + "Acc.monitor": 0.03559999942779541, + "Acc.bulletin board": 0.4336999893188477, + "Acc.shower": 0.05079999923706055, + "Acc.radiator": 0.642300033569336, + "Acc.glass": 0.06980000019073486, + "Acc.clock": 0.18209999084472656, + "Acc.flag": 0.40759998321533203 + } + }, + "33": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.816, + "mIoU": 0.4526, + "mAcc": 0.5832999999999999, + "IoU.wall": 0.7594000244140625, + "IoU.building": 0.8156999969482421, + "IoU.sky": 0.9393000030517578, + "IoU.floor": 0.8043000030517579, + "IoU.tree": 0.7387999725341797, + "IoU.ceiling": 0.8308999633789063, + "IoU.road": 0.8108999633789062, + "IoU.bed ": 0.8634999847412109, + "IoU.windowpane": 0.6070999908447265, + "IoU.grass": 0.6686000061035157, + "IoU.cabinet": 0.5813999938964843, + "IoU.sidewalk": 0.6308000183105469, + "IoU.person": 0.7616999816894531, + "IoU.earth": 0.3309000015258789, + "IoU.door": 0.46369998931884765, + "IoU.table": 0.548499984741211, + "IoU.mountain": 0.574900016784668, + "IoU.plant": 0.49130001068115237, + "IoU.curtain": 0.7144999694824219, + "IoU.chair": 0.5027999877929688, + "IoU.car": 0.7945999908447265, + "IoU.water": 0.5211999893188477, + "IoU.painting": 0.6726999664306641, + "IoU.sofa": 0.6211999893188477, + "IoU.shelf": 0.42759998321533205, + "IoU.house": 0.47369998931884766, + "IoU.sea": 0.6311000061035156, + "IoU.mirror": 0.6488999938964843, + "IoU.rug": 0.660199966430664, + "IoU.field": 0.30190000534057615, + "IoU.armchair": 0.40369998931884765, + "IoU.seat": 0.579900016784668, + "IoU.fence": 0.41119998931884766, + "IoU.desk": 0.43, + "IoU.rock": 0.38990001678466796, + "IoU.wardrobe": 0.48279998779296873, + "IoU.lamp": 0.5181000137329101, + "IoU.bathtub": 0.8, + "IoU.railing": 0.3014999961853027, + "IoU.cushion": 0.4990999984741211, + "IoU.base": 0.271200008392334, + "IoU.box": 0.2128000068664551, + "IoU.column": 0.4286000061035156, + "IoU.signboard": 0.34299999237060547, + "IoU.chest of drawers": 0.3927000045776367, + "IoU.counter": 0.31459999084472656, + "IoU.sand": 0.36029998779296873, + "IoU.sink": 0.6472000122070313, + "IoU.skyscraper": 0.5381000137329102, + "IoU.fireplace": 0.7169999694824218, + "IoU.refrigerator": 0.7030000305175781, + "IoU.grandstand": 0.4068000030517578, + "IoU.path": 0.23110000610351564, + "IoU.stairs": 0.26549999237060545, + "IoU.runway": 0.686500015258789, + "IoU.case": 0.49990001678466794, + "IoU.pool table": 0.8866999816894531, + "IoU.pillow": 0.5456000137329101, + "IoU.screen door": 0.6725, + "IoU.stairway": 0.2788999938964844, + "IoU.river": 0.20059999465942382, + "IoU.bridge": 0.6880000305175781, + "IoU.bookcase": 0.31510000228881835, + "IoU.blind": 0.3584000015258789, + "IoU.coffee table": 0.5609000015258789, + "IoU.toilet": 0.798499984741211, + "IoU.flower": 0.33040000915527346, + "IoU.book": 0.42209999084472655, + "IoU.hill": 0.0734000015258789, + "IoU.bench": 0.40299999237060546, + "IoU.countertop": 0.5629999923706055, + "IoU.stove": 0.6608999633789062, + "IoU.palm": 0.4804000091552734, + "IoU.kitchen island": 0.3459000015258789, + "IoU.computer": 0.5836000061035156, + "IoU.swivel chair": 0.4531999969482422, + "IoU.boat": 0.6354000091552734, + "IoU.bar": 0.5156999969482422, + "IoU.arcade machine": 0.3977000045776367, + "IoU.hovel": 0.38110000610351563, + "IoU.bus": 0.7427999877929687, + "IoU.towel": 0.555, + "IoU.light": 0.39849998474121096, + "IoU.truck": 0.20290000915527343, + "IoU.tower": 0.286299991607666, + "IoU.chandelier": 0.5861999893188476, + "IoU.awning": 0.29770000457763673, + "IoU.streetlight": 0.19329999923706054, + "IoU.booth": 0.3561000061035156, + "IoU.television receiver": 0.6097000122070313, + "IoU.airplane": 0.683499984741211, + "IoU.dirt track": 0.1325, + "IoU.apparel": 0.3004999923706055, + "IoU.pole": 0.24790000915527344, + "IoU.land": 0.052100000381469724, + "IoU.bannister": 0.10229999542236329, + "IoU.escalator": 0.29219999313354494, + "IoU.ottoman": 0.46919998168945315, + "IoU.bottle": 0.3060000038146973, + "IoU.buffet": 0.5693000030517578, + "IoU.poster": 0.22899999618530273, + "IoU.stage": 0.12140000343322754, + "IoU.van": 0.3915000152587891, + "IoU.ship": 0.7216999816894532, + "IoU.fountain": 0.19190000534057616, + "IoU.conveyer belt": 0.6218000030517579, + "IoU.canopy": 0.20569999694824218, + "IoU.washer": 0.6920999908447265, + "IoU.plaything": 0.21979999542236328, + "IoU.swimming pool": 0.6004000091552735, + "IoU.stool": 0.28, + "IoU.barrel": 0.5029999923706054, + "IoU.basket": 0.19959999084472657, + "IoU.waterfall": 0.5595999908447266, + "IoU.tent": 0.8798999786376953, + "IoU.bag": 0.1361999988555908, + "IoU.minibike": 0.5925, + "IoU.cradle": 0.7516999816894532, + "IoU.oven": 0.143100004196167, + "IoU.ball": 0.34349998474121096, + "IoU.food": 0.5545999908447266, + "IoU.step": 0.14819999694824218, + "IoU.tank": 0.4791999816894531, + "IoU.trade name": 0.23420000076293945, + "IoU.microwave": 0.32729999542236327, + "IoU.pot": 0.33310001373291015, + "IoU.animal": 0.5781000137329102, + "IoU.bicycle": 0.5102999877929687, + "IoU.lake": 0.5572999954223633, + "IoU.dishwasher": 0.4922999954223633, + "IoU.screen": 0.5543999862670899, + "IoU.blanket": 0.06309999942779541, + "IoU.sculpture": 0.44349998474121094, + "IoU.hood": 0.4468000030517578, + "IoU.sconce": 0.35209999084472654, + "IoU.vase": 0.2425, + "IoU.traffic light": 0.2425, + "IoU.tray": 0.05170000076293945, + "IoU.ashcan": 0.26600000381469724, + "IoU.fan": 0.46830001831054685, + "IoU.pier": 0.32779998779296876, + "IoU.crt screen": 0.0005000000074505806, + "IoU.plate": 0.3788000106811523, + "IoU.monitor": 0.026500000953674316, + "IoU.bulletin board": 0.29620000839233396, + "IoU.shower": 0.008999999761581421, + "IoU.radiator": 0.5377000045776367, + "IoU.glass": 0.07849999904632568, + "IoU.clock": 0.23549999237060548, + "IoU.flag": 0.31209999084472656, + "Acc.wall": 0.8694000244140625, + "Acc.building": 0.9148999786376953, + "Acc.sky": 0.9644000244140625, + "Acc.floor": 0.8912000274658203, + "Acc.tree": 0.8825, + "Acc.ceiling": 0.8908000183105469, + "Acc.road": 0.8873999786376953, + "Acc.bed ": 0.9576000213623047, + "Acc.windowpane": 0.7613999938964844, + "Acc.grass": 0.8072000122070313, + "Acc.cabinet": 0.6962999725341796, + "Acc.sidewalk": 0.8070999908447266, + "Acc.person": 0.9263999938964844, + "Acc.earth": 0.43380001068115237, + "Acc.door": 0.620099983215332, + "Acc.table": 0.7136000061035156, + "Acc.mountain": 0.7291999816894531, + "Acc.plant": 0.5836999893188477, + "Acc.curtain": 0.8608000183105469, + "Acc.chair": 0.6536000061035157, + "Acc.car": 0.9172000122070313, + "Acc.water": 0.6498000335693359, + "Acc.painting": 0.8480999755859375, + "Acc.sofa": 0.7913999938964844, + "Acc.shelf": 0.6204999923706055, + "Acc.house": 0.5983000183105469, + "Acc.sea": 0.8719999694824219, + "Acc.mirror": 0.7793000030517578, + "Acc.rug": 0.7069000244140625, + "Acc.field": 0.5415000152587891, + "Acc.armchair": 0.6227999877929687, + "Acc.seat": 0.8312999725341796, + "Acc.fence": 0.6058000183105469, + "Acc.desk": 0.6980000305175781, + "Acc.rock": 0.6313999938964844, + "Acc.wardrobe": 0.6938999938964844, + "Acc.lamp": 0.6623999786376953, + "Acc.bathtub": 0.8529000091552734, + "Acc.railing": 0.4443999862670898, + "Acc.cushion": 0.6118999862670899, + "Acc.base": 0.45380001068115233, + "Acc.box": 0.26229999542236326, + "Acc.column": 0.5511999893188476, + "Acc.signboard": 0.46900001525878904, + "Acc.chest of drawers": 0.5884999847412109, + "Acc.counter": 0.4093000030517578, + "Acc.sand": 0.5131999969482421, + "Acc.sink": 0.7523999786376954, + "Acc.skyscraper": 0.6886000061035156, + "Acc.fireplace": 0.8983999633789063, + "Acc.refrigerator": 0.8601999664306641, + "Acc.grandstand": 0.6573999786376953, + "Acc.path": 0.3188999938964844, + "Acc.stairs": 0.38229999542236326, + "Acc.runway": 0.9173999786376953, + "Acc.case": 0.7001000213623046, + "Acc.pool table": 0.9708000183105469, + "Acc.pillow": 0.6730999755859375, + "Acc.screen door": 0.7605000305175781, + "Acc.stairway": 0.3741999816894531, + "Acc.river": 0.41459999084472654, + "Acc.bridge": 0.8523000335693359, + "Acc.bookcase": 0.5484000015258789, + "Acc.blind": 0.4272999954223633, + "Acc.coffee table": 0.8072000122070313, + "Acc.toilet": 0.889000015258789, + "Acc.flower": 0.5368000030517578, + "Acc.book": 0.6022999954223632, + "Acc.hill": 0.16370000839233398, + "Acc.bench": 0.492400016784668, + "Acc.countertop": 0.7220999908447265, + "Acc.stove": 0.8169999694824219, + "Acc.palm": 0.6827999877929688, + "Acc.kitchen island": 0.7418000030517579, + "Acc.computer": 0.7169999694824218, + "Acc.swivel chair": 0.6580999755859375, + "Acc.boat": 0.7756999969482422, + "Acc.bar": 0.6812000274658203, + "Acc.arcade machine": 0.44290000915527344, + "Acc.hovel": 0.445, + "Acc.bus": 0.9080999755859375, + "Acc.towel": 0.6894000244140625, + "Acc.light": 0.4638999938964844, + "Acc.truck": 0.28889999389648435, + "Acc.tower": 0.44119998931884763, + "Acc.chandelier": 0.7743000030517578, + "Acc.awning": 0.3711999893188477, + "Acc.streetlight": 0.2384000015258789, + "Acc.booth": 0.5411000061035156, + "Acc.television receiver": 0.7773999786376953, + "Acc.airplane": 0.7866000366210938, + "Acc.dirt track": 0.2680999946594238, + "Acc.apparel": 0.4406999969482422, + "Acc.pole": 0.3754999923706055, + "Acc.land": 0.11430000305175782, + "Acc.bannister": 0.16540000915527345, + "Acc.escalator": 0.37790000915527344, + "Acc.ottoman": 0.6338000106811523, + "Acc.bottle": 0.42880001068115237, + "Acc.buffet": 0.707699966430664, + "Acc.poster": 0.288700008392334, + "Acc.stage": 0.35, + "Acc.van": 0.4584000015258789, + "Acc.ship": 0.8473000335693359, + "Acc.fountain": 0.20760000228881836, + "Acc.conveyer belt": 0.8893000030517578, + "Acc.canopy": 0.285, + "Acc.washer": 0.6990000152587891, + "Acc.plaything": 0.33919998168945314, + "Acc.swimming pool": 0.8280000305175781, + "Acc.stool": 0.3858000183105469, + "Acc.barrel": 0.6245999908447266, + "Acc.basket": 0.2777000045776367, + "Acc.waterfall": 0.6561000061035156, + "Acc.tent": 0.9883999633789062, + "Acc.bag": 0.17239999771118164, + "Acc.minibike": 0.7902999877929687, + "Acc.cradle": 0.9673999786376953, + "Acc.oven": 0.3722999954223633, + "Acc.ball": 0.42209999084472655, + "Acc.food": 0.6363000106811524, + "Acc.step": 0.18920000076293944, + "Acc.tank": 0.5884000015258789, + "Acc.trade name": 0.27420000076293943, + "Acc.microwave": 0.36720001220703125, + "Acc.pot": 0.38790000915527345, + "Acc.animal": 0.6197000122070313, + "Acc.bicycle": 0.7173999786376953, + "Acc.lake": 0.6286999893188476, + "Acc.dishwasher": 0.6127999877929687, + "Acc.screen": 0.8788999938964843, + "Acc.blanket": 0.0680999994277954, + "Acc.sculpture": 0.5972999954223632, + "Acc.hood": 0.5056000137329102, + "Acc.sconce": 0.472400016784668, + "Acc.vase": 0.3716999816894531, + "Acc.traffic light": 0.412400016784668, + "Acc.tray": 0.09310000419616699, + "Acc.ashcan": 0.4102000045776367, + "Acc.fan": 0.6791999816894532, + "Acc.pier": 0.5320000076293945, + "Acc.crt screen": 0.001599999964237213, + "Acc.plate": 0.5011000061035156, + "Acc.monitor": 0.03789999961853027, + "Acc.bulletin board": 0.40110000610351565, + "Acc.shower": 0.03069999933242798, + "Acc.radiator": 0.6894000244140625, + "Acc.glass": 0.08760000228881835, + "Acc.clock": 0.2752000045776367, + "Acc.flag": 0.33689998626708983 + } + }, + "34": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8167, + "mIoU": 0.4445, + "mAcc": 0.5434, + "IoU.wall": 0.7563999938964844, + "IoU.building": 0.8151000213623046, + "IoU.sky": 0.9381999969482422, + "IoU.floor": 0.7981999969482422, + "IoU.tree": 0.7322000122070312, + "IoU.ceiling": 0.8244000244140625, + "IoU.road": 0.8126000213623047, + "IoU.bed ": 0.8605000305175782, + "IoU.windowpane": 0.6036999893188476, + "IoU.grass": 0.6647000122070312, + "IoU.cabinet": 0.5856999969482422, + "IoU.sidewalk": 0.6247000122070312, + "IoU.person": 0.7733000183105468, + "IoU.earth": 0.35970001220703124, + "IoU.door": 0.45860000610351564, + "IoU.table": 0.5504999923706054, + "IoU.mountain": 0.5695000076293946, + "IoU.plant": 0.4866999816894531, + "IoU.curtain": 0.7258999633789063, + "IoU.chair": 0.5063999938964844, + "IoU.car": 0.8163999938964843, + "IoU.water": 0.5402999877929687, + "IoU.painting": 0.6790000152587891, + "IoU.sofa": 0.6165000152587891, + "IoU.shelf": 0.42069999694824217, + "IoU.house": 0.4595000076293945, + "IoU.sea": 0.6343000030517578, + "IoU.mirror": 0.6512000274658203, + "IoU.rug": 0.6327999877929688, + "IoU.field": 0.2940999984741211, + "IoU.armchair": 0.38779998779296876, + "IoU.seat": 0.5920000076293945, + "IoU.fence": 0.38369998931884763, + "IoU.desk": 0.4463999938964844, + "IoU.rock": 0.4179999923706055, + "IoU.wardrobe": 0.48060001373291017, + "IoU.lamp": 0.5243999862670898, + "IoU.bathtub": 0.7637000274658203, + "IoU.railing": 0.29870000839233396, + "IoU.cushion": 0.5036000061035156, + "IoU.base": 0.24159999847412109, + "IoU.box": 0.21950000762939453, + "IoU.column": 0.4215999984741211, + "IoU.signboard": 0.33029998779296876, + "IoU.chest of drawers": 0.36080001831054687, + "IoU.counter": 0.2621999931335449, + "IoU.sand": 0.3625, + "IoU.sink": 0.6609999847412109, + "IoU.skyscraper": 0.529000015258789, + "IoU.fireplace": 0.7233999633789062, + "IoU.refrigerator": 0.7373000335693359, + "IoU.grandstand": 0.41900001525878905, + "IoU.path": 0.21799999237060547, + "IoU.stairs": 0.26440000534057617, + "IoU.runway": 0.6195000076293945, + "IoU.case": 0.4940999984741211, + "IoU.pool table": 0.9170999908447266, + "IoU.pillow": 0.5038999938964843, + "IoU.screen door": 0.6427999877929688, + "IoU.stairway": 0.3121999931335449, + "IoU.river": 0.18520000457763672, + "IoU.bridge": 0.711500015258789, + "IoU.bookcase": 0.3397999954223633, + "IoU.blind": 0.33430000305175783, + "IoU.coffee table": 0.5872000122070312, + "IoU.toilet": 0.8080999755859375, + "IoU.flower": 0.3131999969482422, + "IoU.book": 0.4186000061035156, + "IoU.hill": 0.0734000015258789, + "IoU.bench": 0.43720001220703125, + "IoU.countertop": 0.5336999893188477, + "IoU.stove": 0.7034999847412109, + "IoU.palm": 0.4197999954223633, + "IoU.kitchen island": 0.3193000030517578, + "IoU.computer": 0.6018000030517578, + "IoU.swivel chair": 0.4056999969482422, + "IoU.boat": 0.7034999847412109, + "IoU.bar": 0.4583000183105469, + "IoU.arcade machine": 0.3579999923706055, + "IoU.hovel": 0.3415999984741211, + "IoU.bus": 0.8169000244140625, + "IoU.towel": 0.5481999969482422, + "IoU.light": 0.26399999618530273, + "IoU.truck": 0.2075, + "IoU.tower": 0.3263999938964844, + "IoU.chandelier": 0.5806000137329101, + "IoU.awning": 0.225, + "IoU.streetlight": 0.1452999973297119, + "IoU.booth": 0.42029998779296873, + "IoU.television receiver": 0.6097999954223633, + "IoU.airplane": 0.6311999893188477, + "IoU.dirt track": 0.1559000015258789, + "IoU.apparel": 0.31010000228881834, + "IoU.pole": 0.24219999313354493, + "IoU.land": 0.00800000011920929, + "IoU.bannister": 0.06480000019073487, + "IoU.escalator": 0.1940999984741211, + "IoU.ottoman": 0.4659000015258789, + "IoU.bottle": 0.3308000183105469, + "IoU.buffet": 0.382599983215332, + "IoU.poster": 0.17899999618530274, + "IoU.stage": 0.1859000015258789, + "IoU.van": 0.3838000106811523, + "IoU.ship": 0.6679000091552735, + "IoU.fountain": 0.20120000839233398, + "IoU.conveyer belt": 0.7040000152587891, + "IoU.canopy": 0.21219999313354493, + "IoU.washer": 0.7163999938964843, + "IoU.plaything": 0.24739999771118165, + "IoU.swimming pool": 0.5288999938964843, + "IoU.stool": 0.23030000686645508, + "IoU.barrel": 0.5709000015258789, + "IoU.basket": 0.17780000686645508, + "IoU.waterfall": 0.5736999893188477, + "IoU.tent": 0.9144000244140625, + "IoU.bag": 0.090600004196167, + "IoU.minibike": 0.5629000091552734, + "IoU.cradle": 0.7390000152587891, + "IoU.oven": 0.183700008392334, + "IoU.ball": 0.35970001220703124, + "IoU.food": 0.5377000045776367, + "IoU.step": 0.11850000381469726, + "IoU.tank": 0.4841999816894531, + "IoU.trade name": 0.15489999771118165, + "IoU.microwave": 0.3215000152587891, + "IoU.pot": 0.33790000915527346, + "IoU.animal": 0.5586999893188477, + "IoU.bicycle": 0.4729000091552734, + "IoU.lake": 0.3320000076293945, + "IoU.dishwasher": 0.5152000045776367, + "IoU.screen": 0.663499984741211, + "IoU.blanket": 0.044600000381469725, + "IoU.sculpture": 0.3813999938964844, + "IoU.hood": 0.3777000045776367, + "IoU.sconce": 0.2936000061035156, + "IoU.vase": 0.25559999465942385, + "IoU.traffic light": 0.215, + "IoU.tray": 0.009200000166893006, + "IoU.ashcan": 0.3156999969482422, + "IoU.fan": 0.47380001068115235, + "IoU.pier": 0.2993000030517578, + "IoU.crt screen": 0.0, + "IoU.plate": 0.38729999542236326, + "IoU.monitor": 0.05400000095367432, + "IoU.bulletin board": 0.34560001373291016, + "IoU.shower": 0.00019999999552965163, + "IoU.radiator": 0.5038000106811523, + "IoU.glass": 0.06389999866485596, + "IoU.clock": 0.19889999389648438, + "IoU.flag": 0.31020000457763675, + "Acc.wall": 0.8968000030517578, + "Acc.building": 0.9238999938964844, + "Acc.sky": 0.9773000335693359, + "Acc.floor": 0.9109999847412109, + "Acc.tree": 0.8744000244140625, + "Acc.ceiling": 0.895199966430664, + "Acc.road": 0.9011000061035156, + "Acc.bed ": 0.950999984741211, + "Acc.windowpane": 0.7354000091552735, + "Acc.grass": 0.8043000030517579, + "Acc.cabinet": 0.7308999633789063, + "Acc.sidewalk": 0.7794999694824218, + "Acc.person": 0.8962000274658203, + "Acc.earth": 0.5215000152587891, + "Acc.door": 0.6190000152587891, + "Acc.table": 0.7152999877929688, + "Acc.mountain": 0.6962000274658203, + "Acc.plant": 0.5679999923706055, + "Acc.curtain": 0.8255999755859375, + "Acc.chair": 0.6375, + "Acc.car": 0.8954000091552734, + "Acc.water": 0.68, + "Acc.painting": 0.8263999938964843, + "Acc.sofa": 0.7755999755859375, + "Acc.shelf": 0.6013000106811524, + "Acc.house": 0.5709999847412109, + "Acc.sea": 0.8202999877929688, + "Acc.mirror": 0.7212999725341797, + "Acc.rug": 0.6669999694824219, + "Acc.field": 0.5097999954223633, + "Acc.armchair": 0.6075, + "Acc.seat": 0.7954000091552734, + "Acc.fence": 0.5377999877929688, + "Acc.desk": 0.6663999938964844, + "Acc.rock": 0.6104000091552735, + "Acc.wardrobe": 0.650199966430664, + "Acc.lamp": 0.6218000030517579, + "Acc.bathtub": 0.8008000183105469, + "Acc.railing": 0.4402000045776367, + "Acc.cushion": 0.595999984741211, + "Acc.base": 0.42529998779296874, + "Acc.box": 0.29389999389648436, + "Acc.column": 0.5322000122070313, + "Acc.signboard": 0.42279998779296873, + "Acc.chest of drawers": 0.5520999908447266, + "Acc.counter": 0.3491999816894531, + "Acc.sand": 0.49470001220703125, + "Acc.sink": 0.704800033569336, + "Acc.skyscraper": 0.620999984741211, + "Acc.fireplace": 0.8701999664306641, + "Acc.refrigerator": 0.82, + "Acc.grandstand": 0.6772000122070313, + "Acc.path": 0.2854000091552734, + "Acc.stairs": 0.35259998321533204, + "Acc.runway": 0.8079000091552735, + "Acc.case": 0.6288999938964843, + "Acc.pool table": 0.9518000030517578, + "Acc.pillow": 0.5804000091552735, + "Acc.screen door": 0.6805000305175781, + "Acc.stairway": 0.422400016784668, + "Acc.river": 0.42330001831054687, + "Acc.bridge": 0.8430999755859375, + "Acc.bookcase": 0.5545999908447266, + "Acc.blind": 0.37310001373291013, + "Acc.coffee table": 0.7758000183105469, + "Acc.toilet": 0.8679000091552734, + "Acc.flower": 0.45349998474121095, + "Acc.book": 0.5427000045776367, + "Acc.hill": 0.14229999542236327, + "Acc.bench": 0.5054000091552734, + "Acc.countertop": 0.7131999969482422, + "Acc.stove": 0.7716999816894531, + "Acc.palm": 0.5356999969482422, + "Acc.kitchen island": 0.5191999816894531, + "Acc.computer": 0.6972000122070312, + "Acc.swivel chair": 0.505099983215332, + "Acc.boat": 0.826500015258789, + "Acc.bar": 0.584900016784668, + "Acc.arcade machine": 0.390099983215332, + "Acc.hovel": 0.37349998474121093, + "Acc.bus": 0.8979000091552735, + "Acc.towel": 0.6508999633789062, + "Acc.light": 0.2770999908447266, + "Acc.truck": 0.27350000381469725, + "Acc.tower": 0.44779998779296876, + "Acc.chandelier": 0.7001000213623046, + "Acc.awning": 0.24510000228881837, + "Acc.streetlight": 0.15520000457763672, + "Acc.booth": 0.4804000091552734, + "Acc.television receiver": 0.7173000335693359, + "Acc.airplane": 0.6818000030517578, + "Acc.dirt track": 0.2281999969482422, + "Acc.apparel": 0.43020000457763674, + "Acc.pole": 0.32630001068115233, + "Acc.land": 0.01190000057220459, + "Acc.bannister": 0.08289999961853027, + "Acc.escalator": 0.2181999969482422, + "Acc.ottoman": 0.5961999893188477, + "Acc.bottle": 0.47700000762939454, + "Acc.buffet": 0.4452000045776367, + "Acc.poster": 0.25190000534057616, + "Acc.stage": 0.3068000030517578, + "Acc.van": 0.44599998474121094, + "Acc.ship": 0.7326000213623047, + "Acc.fountain": 0.20690000534057618, + "Acc.conveyer belt": 0.8073999786376953, + "Acc.canopy": 0.3086000061035156, + "Acc.washer": 0.7255000305175782, + "Acc.plaything": 0.3877000045776367, + "Acc.swimming pool": 0.6733999633789063, + "Acc.stool": 0.2744000053405762, + "Acc.barrel": 0.6172000122070312, + "Acc.basket": 0.21450000762939453, + "Acc.waterfall": 0.6222000122070312, + "Acc.tent": 0.9883000183105469, + "Acc.bag": 0.10079999923706055, + "Acc.minibike": 0.6661000061035156, + "Acc.cradle": 0.9545999908447266, + "Acc.oven": 0.505099983215332, + "Acc.ball": 0.43209999084472656, + "Acc.food": 0.6202000045776367, + "Acc.step": 0.13210000038146974, + "Acc.tank": 0.5584000015258789, + "Acc.trade name": 0.16420000076293945, + "Acc.microwave": 0.335099983215332, + "Acc.pot": 0.37650001525878907, + "Acc.animal": 0.5866999816894531, + "Acc.bicycle": 0.6761000061035156, + "Acc.lake": 0.347400016784668, + "Acc.dishwasher": 0.6045000076293945, + "Acc.screen": 0.9009999847412109, + "Acc.blanket": 0.04840000152587891, + "Acc.sculpture": 0.5470000076293945, + "Acc.hood": 0.39419998168945314, + "Acc.sconce": 0.34509998321533203, + "Acc.vase": 0.33130001068115233, + "Acc.traffic light": 0.2755999946594238, + "Acc.tray": 0.012200000286102295, + "Acc.ashcan": 0.45939998626708983, + "Acc.fan": 0.5645999908447266, + "Acc.pier": 0.3959000015258789, + "Acc.crt screen": 0.0, + "Acc.plate": 0.47009998321533203, + "Acc.monitor": 0.07110000133514405, + "Acc.bulletin board": 0.40599998474121096, + "Acc.shower": 0.00019999999552965163, + "Acc.radiator": 0.5679000091552734, + "Acc.glass": 0.06820000171661376, + "Acc.clock": 0.23139999389648438, + "Acc.flag": 0.32810001373291015 + } + }, + "35": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8349, + "mIoU": 0.5002, + "mAcc": 0.6271, + "IoU.wall": 0.7783000183105468, + "IoU.building": 0.8311000061035156, + "IoU.sky": 0.9370999908447266, + "IoU.floor": 0.8191999816894531, + "IoU.tree": 0.7434999847412109, + "IoU.ceiling": 0.8372000122070312, + "IoU.road": 0.8358999633789063, + "IoU.bed ": 0.8966999816894531, + "IoU.windowpane": 0.6231999969482422, + "IoU.grass": 0.6920999908447265, + "IoU.cabinet": 0.6252000045776367, + "IoU.sidewalk": 0.6544999694824218, + "IoU.person": 0.8072000122070313, + "IoU.earth": 0.3756999969482422, + "IoU.door": 0.5166999816894531, + "IoU.table": 0.5986999893188476, + "IoU.mountain": 0.5770999908447265, + "IoU.plant": 0.5054999923706055, + "IoU.curtain": 0.7536000061035156, + "IoU.chair": 0.5640999984741211, + "IoU.car": 0.8458000183105469, + "IoU.water": 0.6104999923706055, + "IoU.painting": 0.7119000244140625, + "IoU.sofa": 0.6969000244140625, + "IoU.shelf": 0.4134000015258789, + "IoU.house": 0.4991999816894531, + "IoU.sea": 0.6852999877929687, + "IoU.mirror": 0.6866999816894531, + "IoU.rug": 0.6795999908447266, + "IoU.field": 0.3716999816894531, + "IoU.armchair": 0.44919998168945313, + "IoU.seat": 0.638499984741211, + "IoU.fence": 0.4433000183105469, + "IoU.desk": 0.5018999862670899, + "IoU.rock": 0.49270000457763674, + "IoU.wardrobe": 0.5754000091552735, + "IoU.lamp": 0.5675, + "IoU.bathtub": 0.8623000335693359, + "IoU.railing": 0.3893999862670898, + "IoU.cushion": 0.5979999923706054, + "IoU.base": 0.3372999954223633, + "IoU.box": 0.26129999160766604, + "IoU.column": 0.4843000030517578, + "IoU.signboard": 0.33919998168945314, + "IoU.chest of drawers": 0.33189998626708983, + "IoU.counter": 0.34689998626708984, + "IoU.sand": 0.5329000091552735, + "IoU.sink": 0.7119999694824218, + "IoU.skyscraper": 0.5416999816894531, + "IoU.fireplace": 0.7248999786376953, + "IoU.refrigerator": 0.7311000061035157, + "IoU.grandstand": 0.5115000152587891, + "IoU.path": 0.2531999969482422, + "IoU.stairs": 0.26850000381469724, + "IoU.runway": 0.7312999725341797, + "IoU.case": 0.5684999847412109, + "IoU.pool table": 0.9148999786376953, + "IoU.pillow": 0.5702999877929688, + "IoU.screen door": 0.6138999938964844, + "IoU.stairway": 0.3089999961853027, + "IoU.river": 0.195, + "IoU.bridge": 0.5506000137329101, + "IoU.bookcase": 0.3206999969482422, + "IoU.blind": 0.42139999389648436, + "IoU.coffee table": 0.5706999969482421, + "IoU.toilet": 0.800199966430664, + "IoU.flower": 0.37279998779296875, + "IoU.book": 0.46279998779296877, + "IoU.hill": 0.1425, + "IoU.bench": 0.48330001831054686, + "IoU.countertop": 0.6029000091552734, + "IoU.stove": 0.734800033569336, + "IoU.palm": 0.4961000061035156, + "IoU.kitchen island": 0.46360000610351565, + "IoU.computer": 0.760199966430664, + "IoU.swivel chair": 0.5268000030517578, + "IoU.boat": 0.6462999725341797, + "IoU.bar": 0.5220999908447266, + "IoU.arcade machine": 0.7427999877929687, + "IoU.hovel": 0.46869998931884765, + "IoU.bus": 0.9080999755859375, + "IoU.towel": 0.6422000122070313, + "IoU.light": 0.447599983215332, + "IoU.truck": 0.21850000381469725, + "IoU.tower": 0.31209999084472656, + "IoU.chandelier": 0.6406999969482422, + "IoU.awning": 0.2944000053405762, + "IoU.streetlight": 0.19770000457763673, + "IoU.booth": 0.38419998168945313, + "IoU.television receiver": 0.6961000061035156, + "IoU.airplane": 0.6283000183105468, + "IoU.dirt track": 0.015399999618530273, + "IoU.apparel": 0.3454000091552734, + "IoU.pole": 0.16149999618530272, + "IoU.land": 0.02619999885559082, + "IoU.bannister": 0.1090999984741211, + "IoU.escalator": 0.5306999969482422, + "IoU.ottoman": 0.48650001525878905, + "IoU.bottle": 0.3192000007629395, + "IoU.buffet": 0.48700000762939455, + "IoU.poster": 0.1452999973297119, + "IoU.stage": 0.17610000610351562, + "IoU.van": 0.4093000030517578, + "IoU.ship": 0.15460000038146973, + "IoU.fountain": 0.2944000053405762, + "IoU.conveyer belt": 0.6863999938964844, + "IoU.canopy": 0.2809000015258789, + "IoU.washer": 0.6988999938964844, + "IoU.plaything": 0.27969999313354493, + "IoU.swimming pool": 0.7018000030517578, + "IoU.stool": 0.36950000762939456, + "IoU.barrel": 0.49599998474121093, + "IoU.basket": 0.3472999954223633, + "IoU.waterfall": 0.46299999237060546, + "IoU.tent": 0.8797000122070312, + "IoU.bag": 0.171299991607666, + "IoU.minibike": 0.7187999725341797, + "IoU.cradle": 0.814800033569336, + "IoU.oven": 0.34310001373291016, + "IoU.ball": 0.5, + "IoU.food": 0.5052000045776367, + "IoU.step": 0.09310000419616699, + "IoU.tank": 0.5647000122070313, + "IoU.trade name": 0.19360000610351563, + "IoU.microwave": 0.7198999786376953, + "IoU.pot": 0.49970001220703125, + "IoU.animal": 0.6476999664306641, + "IoU.bicycle": 0.5831999969482422, + "IoU.lake": 0.5608000183105468, + "IoU.dishwasher": 0.6541999816894531, + "IoU.screen": 0.5443999862670899, + "IoU.blanket": 0.15899999618530272, + "IoU.sculpture": 0.7094000244140625, + "IoU.hood": 0.5275999832153321, + "IoU.sconce": 0.36209999084472655, + "IoU.vase": 0.3579999923706055, + "IoU.traffic light": 0.28600000381469726, + "IoU.tray": 0.10069999694824219, + "IoU.ashcan": 0.38709999084472657, + "IoU.fan": 0.5213000106811524, + "IoU.pier": 0.22969999313354492, + "IoU.crt screen": 0.0525, + "IoU.plate": 0.5095000076293945, + "IoU.monitor": 0.2453000068664551, + "IoU.bulletin board": 0.49349998474121093, + "IoU.shower": 0.009399999976158141, + "IoU.radiator": 0.6033000183105469, + "IoU.glass": 0.1621999931335449, + "IoU.clock": 0.35569999694824217, + "IoU.flag": 0.5502000045776367, + "Acc.wall": 0.8776999664306641, + "Acc.building": 0.9351000213623046, + "Acc.sky": 0.9612999725341796, + "Acc.floor": 0.8980000305175782, + "Acc.tree": 0.8933999633789063, + "Acc.ceiling": 0.9062999725341797, + "Acc.road": 0.8991000366210937, + "Acc.bed ": 0.9680000305175781, + "Acc.windowpane": 0.7811000061035156, + "Acc.grass": 0.8337000274658203, + "Acc.cabinet": 0.7498999786376953, + "Acc.sidewalk": 0.8112000274658203, + "Acc.person": 0.9252999877929687, + "Acc.earth": 0.5229999923706055, + "Acc.door": 0.6577999877929688, + "Acc.table": 0.7559999847412109, + "Acc.mountain": 0.7051000213623047, + "Acc.plant": 0.5974000167846679, + "Acc.curtain": 0.8702999877929688, + "Acc.chair": 0.702699966430664, + "Acc.car": 0.9270999908447266, + "Acc.water": 0.7461000061035157, + "Acc.painting": 0.8663999938964844, + "Acc.sofa": 0.8523000335693359, + "Acc.shelf": 0.5443000030517579, + "Acc.house": 0.6473999786376953, + "Acc.sea": 0.8637000274658203, + "Acc.mirror": 0.7813999938964844, + "Acc.rug": 0.8027999877929688, + "Acc.field": 0.5583000183105469, + "Acc.armchair": 0.6256000137329102, + "Acc.seat": 0.8420999908447265, + "Acc.fence": 0.6, + "Acc.desk": 0.7595999908447265, + "Acc.rock": 0.6830999755859375, + "Acc.wardrobe": 0.7533000183105468, + "Acc.lamp": 0.7305000305175782, + "Acc.bathtub": 0.9016999816894531, + "Acc.railing": 0.4884000015258789, + "Acc.cushion": 0.7158000183105468, + "Acc.base": 0.6263000106811524, + "Acc.box": 0.33360000610351564, + "Acc.column": 0.5922999954223633, + "Acc.signboard": 0.42790000915527343, + "Acc.chest of drawers": 0.639000015258789, + "Acc.counter": 0.47959999084472654, + "Acc.sand": 0.7554000091552734, + "Acc.sink": 0.7773999786376953, + "Acc.skyscraper": 0.6325999832153321, + "Acc.fireplace": 0.9188999938964844, + "Acc.refrigerator": 0.826500015258789, + "Acc.grandstand": 0.7186000061035156, + "Acc.path": 0.37, + "Acc.stairs": 0.3668000030517578, + "Acc.runway": 0.9643000030517578, + "Acc.case": 0.7206999969482422, + "Acc.pool table": 0.9725, + "Acc.pillow": 0.6647000122070312, + "Acc.screen door": 0.7098999786376953, + "Acc.stairway": 0.47139999389648435, + "Acc.river": 0.37900001525878907, + "Acc.bridge": 0.6390999984741211, + "Acc.bookcase": 0.5465000152587891, + "Acc.blind": 0.47119998931884766, + "Acc.coffee table": 0.8501999664306641, + "Acc.toilet": 0.9033000183105468, + "Acc.flower": 0.5386999893188477, + "Acc.book": 0.6648999786376953, + "Acc.hill": 0.26329999923706054, + "Acc.bench": 0.5804000091552735, + "Acc.countertop": 0.7413999938964844, + "Acc.stove": 0.8583000183105469, + "Acc.palm": 0.7027999877929687, + "Acc.kitchen island": 0.7502999877929688, + "Acc.computer": 0.9154000091552734, + "Acc.swivel chair": 0.7188999938964844, + "Acc.boat": 0.8608999633789063, + "Acc.bar": 0.6919999694824219, + "Acc.arcade machine": 0.8456999969482422, + "Acc.hovel": 0.507400016784668, + "Acc.bus": 0.9584999847412109, + "Acc.towel": 0.7787999725341797, + "Acc.light": 0.5572999954223633, + "Acc.truck": 0.29639999389648436, + "Acc.tower": 0.4513999938964844, + "Acc.chandelier": 0.8179000091552734, + "Acc.awning": 0.34330001831054685, + "Acc.streetlight": 0.29139999389648436, + "Acc.booth": 0.43709999084472656, + "Acc.television receiver": 0.8213999938964843, + "Acc.airplane": 0.6869000244140625, + "Acc.dirt track": 0.05050000190734863, + "Acc.apparel": 0.44599998474121094, + "Acc.pole": 0.20860000610351562, + "Acc.land": 0.05440000057220459, + "Acc.bannister": 0.14829999923706055, + "Acc.escalator": 0.7820999908447266, + "Acc.ottoman": 0.7041999816894531, + "Acc.bottle": 0.5113000106811524, + "Acc.buffet": 0.6386000061035156, + "Acc.poster": 0.17299999237060548, + "Acc.stage": 0.42619998931884767, + "Acc.van": 0.5081999969482421, + "Acc.ship": 0.15850000381469725, + "Acc.fountain": 0.3022999954223633, + "Acc.conveyer belt": 0.9451000213623046, + "Acc.canopy": 0.30170000076293946, + "Acc.washer": 0.7416000366210938, + "Acc.plaything": 0.3865999984741211, + "Acc.swimming pool": 0.8937999725341796, + "Acc.stool": 0.5420000076293945, + "Acc.barrel": 0.6479000091552735, + "Acc.basket": 0.47400001525878904, + "Acc.waterfall": 0.6729000091552735, + "Acc.tent": 0.9887000274658203, + "Acc.bag": 0.18860000610351563, + "Acc.minibike": 0.805, + "Acc.cradle": 0.9737999725341797, + "Acc.oven": 0.537400016784668, + "Acc.ball": 0.595, + "Acc.food": 0.5529999923706055, + "Acc.step": 0.12710000038146974, + "Acc.tank": 0.6547000122070312, + "Acc.trade name": 0.20440000534057617, + "Acc.microwave": 0.8016000366210938, + "Acc.pot": 0.6086999893188476, + "Acc.animal": 0.6805000305175781, + "Acc.bicycle": 0.7625, + "Acc.lake": 0.7393000030517578, + "Acc.dishwasher": 0.7458000183105469, + "Acc.screen": 0.7287999725341797, + "Acc.blanket": 0.19799999237060548, + "Acc.sculpture": 0.8030999755859375, + "Acc.hood": 0.6719000244140625, + "Acc.sconce": 0.469900016784668, + "Acc.vase": 0.5543000030517579, + "Acc.traffic light": 0.4859999847412109, + "Acc.tray": 0.1402999973297119, + "Acc.ashcan": 0.5488999938964844, + "Acc.fan": 0.757300033569336, + "Acc.pier": 0.4431999969482422, + "Acc.crt screen": 0.12739999771118163, + "Acc.plate": 0.7269999694824218, + "Acc.monitor": 0.30639999389648437, + "Acc.bulletin board": 0.6698000335693359, + "Acc.shower": 0.05, + "Acc.radiator": 0.7195999908447266, + "Acc.glass": 0.1775, + "Acc.clock": 0.39930000305175783, + "Acc.flag": 0.6252999877929688 + } + }, + "36": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8331000000000001, + "mIoU": 0.4964, + "mAcc": 0.6256, + "IoU.wall": 0.774000015258789, + "IoU.building": 0.8338999938964844, + "IoU.sky": 0.9375, + "IoU.floor": 0.8144999694824219, + "IoU.tree": 0.7408999633789063, + "IoU.ceiling": 0.8325, + "IoU.road": 0.8323000335693359, + "IoU.bed ": 0.8968000030517578, + "IoU.windowpane": 0.6186999893188476, + "IoU.grass": 0.6944999694824219, + "IoU.cabinet": 0.6129000091552734, + "IoU.sidewalk": 0.6452999877929687, + "IoU.person": 0.800199966430664, + "IoU.earth": 0.38540000915527345, + "IoU.door": 0.5027999877929688, + "IoU.table": 0.5913999938964843, + "IoU.mountain": 0.5881999969482422, + "IoU.plant": 0.5065000152587891, + "IoU.curtain": 0.7245999908447266, + "IoU.chair": 0.5540999984741211, + "IoU.car": 0.8391000366210938, + "IoU.water": 0.6045999908447266, + "IoU.painting": 0.7033999633789062, + "IoU.sofa": 0.7091000366210938, + "IoU.shelf": 0.40799999237060547, + "IoU.house": 0.5611999893188476, + "IoU.sea": 0.6918000030517578, + "IoU.mirror": 0.6941999816894531, + "IoU.rug": 0.6604000091552734, + "IoU.field": 0.3820000076293945, + "IoU.armchair": 0.47650001525878904, + "IoU.seat": 0.6418000030517578, + "IoU.fence": 0.4425, + "IoU.desk": 0.4988999938964844, + "IoU.rock": 0.4595000076293945, + "IoU.wardrobe": 0.5615999984741211, + "IoU.lamp": 0.5568000030517578, + "IoU.bathtub": 0.8572000122070312, + "IoU.railing": 0.3840999984741211, + "IoU.cushion": 0.5961999893188477, + "IoU.base": 0.3370999908447266, + "IoU.box": 0.2459000015258789, + "IoU.column": 0.4588999938964844, + "IoU.signboard": 0.342599983215332, + "IoU.chest of drawers": 0.34240001678466797, + "IoU.counter": 0.3275, + "IoU.sand": 0.4836000061035156, + "IoU.sink": 0.6883999633789063, + "IoU.skyscraper": 0.5602999877929687, + "IoU.fireplace": 0.7156999969482422, + "IoU.refrigerator": 0.7608999633789062, + "IoU.grandstand": 0.5461999893188476, + "IoU.path": 0.24540000915527344, + "IoU.stairs": 0.253700008392334, + "IoU.runway": 0.7391999816894531, + "IoU.case": 0.5493000030517579, + "IoU.pool table": 0.9129000091552735, + "IoU.pillow": 0.576500015258789, + "IoU.screen door": 0.648499984741211, + "IoU.stairway": 0.31610000610351563, + "IoU.river": 0.1975, + "IoU.bridge": 0.590900001525879, + "IoU.bookcase": 0.3609999847412109, + "IoU.blind": 0.43450000762939456, + "IoU.coffee table": 0.5770999908447265, + "IoU.toilet": 0.7776000213623047, + "IoU.flower": 0.31760000228881835, + "IoU.book": 0.4672000122070312, + "IoU.hill": 0.12899999618530272, + "IoU.bench": 0.4881999969482422, + "IoU.countertop": 0.5934000015258789, + "IoU.stove": 0.7445999908447266, + "IoU.palm": 0.5056999969482422, + "IoU.kitchen island": 0.43939998626708987, + "IoU.computer": 0.7663999938964844, + "IoU.swivel chair": 0.4786000061035156, + "IoU.boat": 0.6936000061035156, + "IoU.bar": 0.49529998779296874, + "IoU.arcade machine": 0.6848999786376954, + "IoU.hovel": 0.4881999969482422, + "IoU.bus": 0.9012999725341797, + "IoU.towel": 0.6286000061035156, + "IoU.light": 0.4361000061035156, + "IoU.truck": 0.2877000045776367, + "IoU.tower": 0.2954000091552734, + "IoU.chandelier": 0.6254000091552734, + "IoU.awning": 0.29920000076293946, + "IoU.streetlight": 0.1946999931335449, + "IoU.booth": 0.40180000305175784, + "IoU.television receiver": 0.6551000213623047, + "IoU.airplane": 0.5913999938964843, + "IoU.dirt track": 0.015, + "IoU.apparel": 0.37189998626708987, + "IoU.pole": 0.13789999961853028, + "IoU.land": 0.029100000858306885, + "IoU.bannister": 0.10510000228881836, + "IoU.escalator": 0.4916999816894531, + "IoU.ottoman": 0.48630001068115236, + "IoU.bottle": 0.34330001831054685, + "IoU.buffet": 0.4627000045776367, + "IoU.poster": 0.17780000686645508, + "IoU.stage": 0.175, + "IoU.van": 0.41150001525878904, + "IoU.ship": 0.22430000305175782, + "IoU.fountain": 0.22680000305175782, + "IoU.conveyer belt": 0.6566999816894531, + "IoU.canopy": 0.24290000915527343, + "IoU.washer": 0.7366999816894532, + "IoU.plaything": 0.25760000228881835, + "IoU.swimming pool": 0.643499984741211, + "IoU.stool": 0.35650001525878905, + "IoU.barrel": 0.462599983215332, + "IoU.basket": 0.28889999389648435, + "IoU.waterfall": 0.5597999954223633, + "IoU.tent": 0.8644999694824219, + "IoU.bag": 0.15489999771118165, + "IoU.minibike": 0.7034999847412109, + "IoU.cradle": 0.8112999725341797, + "IoU.oven": 0.38310001373291014, + "IoU.ball": 0.48970001220703124, + "IoU.food": 0.5079000091552734, + "IoU.step": 0.05389999866485596, + "IoU.tank": 0.5359999847412109, + "IoU.trade name": 0.21059999465942383, + "IoU.microwave": 0.7908000183105469, + "IoU.pot": 0.44099998474121094, + "IoU.animal": 0.6419999694824219, + "IoU.bicycle": 0.5822000122070312, + "IoU.lake": 0.648499984741211, + "IoU.dishwasher": 0.6315000152587891, + "IoU.screen": 0.5215000152587891, + "IoU.blanket": 0.1818000030517578, + "IoU.sculpture": 0.6920999908447265, + "IoU.hood": 0.504900016784668, + "IoU.sconce": 0.37970001220703126, + "IoU.vase": 0.35819999694824217, + "IoU.traffic light": 0.2677000045776367, + "IoU.tray": 0.08270000457763672, + "IoU.ashcan": 0.38919998168945313, + "IoU.fan": 0.5291999816894531, + "IoU.pier": 0.18479999542236328, + "IoU.crt screen": 0.0421999979019165, + "IoU.plate": 0.48509998321533204, + "IoU.monitor": 0.22549999237060547, + "IoU.bulletin board": 0.49970001220703125, + "IoU.shower": 0.01100000023841858, + "IoU.radiator": 0.5683000183105469, + "IoU.glass": 0.13510000228881835, + "IoU.clock": 0.32880001068115233, + "IoU.flag": 0.5433000183105469, + "Acc.wall": 0.8730999755859375, + "Acc.building": 0.9380000305175781, + "Acc.sky": 0.9606999969482422, + "Acc.floor": 0.8908999633789062, + "Acc.tree": 0.8968000030517578, + "Acc.ceiling": 0.9026999664306641, + "Acc.road": 0.8913999938964844, + "Acc.bed ": 0.9662999725341797, + "Acc.windowpane": 0.7705000305175781, + "Acc.grass": 0.8258999633789063, + "Acc.cabinet": 0.74, + "Acc.sidewalk": 0.8098999786376954, + "Acc.person": 0.9219999694824219, + "Acc.earth": 0.543499984741211, + "Acc.door": 0.652300033569336, + "Acc.table": 0.7513999938964844, + "Acc.mountain": 0.7161000061035157, + "Acc.plant": 0.590900001525879, + "Acc.curtain": 0.8637000274658203, + "Acc.chair": 0.6947000122070313, + "Acc.car": 0.9298000335693359, + "Acc.water": 0.7441999816894531, + "Acc.painting": 0.8668000030517579, + "Acc.sofa": 0.850999984741211, + "Acc.shelf": 0.5429999923706055, + "Acc.house": 0.6762999725341797, + "Acc.sea": 0.8798999786376953, + "Acc.mirror": 0.7931999969482422, + "Acc.rug": 0.8019000244140625, + "Acc.field": 0.545, + "Acc.armchair": 0.6576000213623047, + "Acc.seat": 0.8463999938964843, + "Acc.fence": 0.6079999923706054, + "Acc.desk": 0.7643000030517578, + "Acc.rock": 0.6252999877929688, + "Acc.wardrobe": 0.7593000030517578, + "Acc.lamp": 0.7369000244140625, + "Acc.bathtub": 0.9069999694824219, + "Acc.railing": 0.49759998321533205, + "Acc.cushion": 0.7172000122070312, + "Acc.base": 0.6561000061035156, + "Acc.box": 0.31079999923706053, + "Acc.column": 0.5545000076293946, + "Acc.signboard": 0.42950000762939455, + "Acc.chest of drawers": 0.6379999923706055, + "Acc.counter": 0.4243000030517578, + "Acc.sand": 0.74, + "Acc.sink": 0.7665000152587891, + "Acc.skyscraper": 0.66, + "Acc.fireplace": 0.927300033569336, + "Acc.refrigerator": 0.8581999969482422, + "Acc.grandstand": 0.7055999755859375, + "Acc.path": 0.36439998626708986, + "Acc.stairs": 0.3436000061035156, + "Acc.runway": 0.967699966430664, + "Acc.case": 0.6893000030517578, + "Acc.pool table": 0.9751000213623047, + "Acc.pillow": 0.6726000213623047, + "Acc.screen door": 0.7526999664306641, + "Acc.stairway": 0.47189998626708984, + "Acc.river": 0.35720001220703124, + "Acc.bridge": 0.7098999786376953, + "Acc.bookcase": 0.5304000091552734, + "Acc.blind": 0.505099983215332, + "Acc.coffee table": 0.8522000122070312, + "Acc.toilet": 0.9058000183105469, + "Acc.flower": 0.49740001678466794, + "Acc.book": 0.6680000305175782, + "Acc.hill": 0.2538999938964844, + "Acc.bench": 0.5884999847412109, + "Acc.countertop": 0.722699966430664, + "Acc.stove": 0.8677999877929687, + "Acc.palm": 0.7055000305175781, + "Acc.kitchen island": 0.7144999694824219, + "Acc.computer": 0.917300033569336, + "Acc.swivel chair": 0.7194000244140625, + "Acc.boat": 0.855, + "Acc.bar": 0.6612000274658203, + "Acc.arcade machine": 0.7762000274658203, + "Acc.hovel": 0.533499984741211, + "Acc.bus": 0.9623999786376953, + "Acc.towel": 0.7715000152587891, + "Acc.light": 0.548499984741211, + "Acc.truck": 0.38919998168945313, + "Acc.tower": 0.4711000061035156, + "Acc.chandelier": 0.8027999877929688, + "Acc.awning": 0.3536000061035156, + "Acc.streetlight": 0.29389999389648436, + "Acc.booth": 0.4445000076293945, + "Acc.television receiver": 0.811500015258789, + "Acc.airplane": 0.6563999938964844, + "Acc.dirt track": 0.060900001525878905, + "Acc.apparel": 0.4797999954223633, + "Acc.pole": 0.173700008392334, + "Acc.land": 0.05610000133514404, + "Acc.bannister": 0.1390999984741211, + "Acc.escalator": 0.7498000335693359, + "Acc.ottoman": 0.7001000213623046, + "Acc.bottle": 0.5572000122070313, + "Acc.buffet": 0.632400016784668, + "Acc.poster": 0.20989999771118165, + "Acc.stage": 0.4502000045776367, + "Acc.van": 0.5047000122070312, + "Acc.ship": 0.22950000762939454, + "Acc.fountain": 0.233799991607666, + "Acc.conveyer belt": 0.9269999694824219, + "Acc.canopy": 0.29329999923706057, + "Acc.washer": 0.7570999908447266, + "Acc.plaything": 0.3945000076293945, + "Acc.swimming pool": 0.8906999969482422, + "Acc.stool": 0.505099983215332, + "Acc.barrel": 0.6508999633789062, + "Acc.basket": 0.4209999847412109, + "Acc.waterfall": 0.7415000152587891, + "Acc.tent": 0.9879000091552734, + "Acc.bag": 0.17399999618530274, + "Acc.minibike": 0.7993000030517579, + "Acc.cradle": 0.9737000274658203, + "Acc.oven": 0.5031000137329101, + "Acc.ball": 0.5886999893188477, + "Acc.food": 0.56, + "Acc.step": 0.07449999809265137, + "Acc.tank": 0.6522000122070313, + "Acc.trade name": 0.226299991607666, + "Acc.microwave": 0.8806999969482422, + "Acc.pot": 0.5306999969482422, + "Acc.animal": 0.6737999725341797, + "Acc.bicycle": 0.7501999664306641, + "Acc.lake": 0.7473000335693359, + "Acc.dishwasher": 0.7268000030517578, + "Acc.screen": 0.7551000213623047, + "Acc.blanket": 0.21870000839233397, + "Acc.sculpture": 0.8137000274658203, + "Acc.hood": 0.6679000091552735, + "Acc.sconce": 0.48009998321533204, + "Acc.vase": 0.5579999923706055, + "Acc.traffic light": 0.47869998931884766, + "Acc.tray": 0.141899995803833, + "Acc.ashcan": 0.5327000045776367, + "Acc.fan": 0.7391000366210938, + "Acc.pier": 0.44860000610351564, + "Acc.crt screen": 0.11329999923706055, + "Acc.plate": 0.7030000305175781, + "Acc.monitor": 0.2722999954223633, + "Acc.bulletin board": 0.7008999633789063, + "Acc.shower": 0.05, + "Acc.radiator": 0.6927999877929687, + "Acc.glass": 0.14810000419616698, + "Acc.clock": 0.38459999084472657, + "Acc.flag": 0.6295000076293945 + } + }, + "37": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8281000000000001, + "mIoU": 0.4827, + "mAcc": 0.6137, + "IoU.wall": 0.7733000183105468, + "IoU.building": 0.8316999816894531, + "IoU.sky": 0.9362999725341797, + "IoU.floor": 0.8055000305175781, + "IoU.tree": 0.7393000030517578, + "IoU.ceiling": 0.8291000366210938, + "IoU.road": 0.8316000366210937, + "IoU.bed ": 0.8788999938964843, + "IoU.windowpane": 0.6163999938964844, + "IoU.grass": 0.6933000183105469, + "IoU.cabinet": 0.5927000045776367, + "IoU.sidewalk": 0.6445999908447265, + "IoU.person": 0.7966999816894531, + "IoU.earth": 0.36029998779296873, + "IoU.door": 0.500099983215332, + "IoU.table": 0.5772999954223633, + "IoU.mountain": 0.5654999923706054, + "IoU.plant": 0.5011000061035156, + "IoU.curtain": 0.7231999969482422, + "IoU.chair": 0.5415999984741211, + "IoU.car": 0.8381999969482422, + "IoU.water": 0.5561999893188476, + "IoU.painting": 0.7005000305175781, + "IoU.sofa": 0.6805999755859375, + "IoU.shelf": 0.41880001068115236, + "IoU.house": 0.5397000122070312, + "IoU.sea": 0.6461000061035156, + "IoU.mirror": 0.6522000122070313, + "IoU.rug": 0.6562000274658203, + "IoU.field": 0.3315000152587891, + "IoU.armchair": 0.4438999938964844, + "IoU.seat": 0.6043999862670898, + "IoU.fence": 0.400099983215332, + "IoU.desk": 0.485, + "IoU.rock": 0.447599983215332, + "IoU.wardrobe": 0.5318999862670899, + "IoU.lamp": 0.5547999954223632, + "IoU.bathtub": 0.8476000213623047, + "IoU.railing": 0.3384000015258789, + "IoU.cushion": 0.5606999969482422, + "IoU.base": 0.32349998474121094, + "IoU.box": 0.24600000381469728, + "IoU.column": 0.46900001525878904, + "IoU.signboard": 0.33240001678466796, + "IoU.chest of drawers": 0.37970001220703126, + "IoU.counter": 0.3825, + "IoU.sand": 0.45110000610351564, + "IoU.sink": 0.6826999664306641, + "IoU.skyscraper": 0.5779000091552734, + "IoU.fireplace": 0.7355000305175782, + "IoU.refrigerator": 0.7327999877929687, + "IoU.grandstand": 0.5122999954223633, + "IoU.path": 0.2484000015258789, + "IoU.stairs": 0.21219999313354493, + "IoU.runway": 0.7269999694824218, + "IoU.case": 0.5179999923706055, + "IoU.pool table": 0.9063999938964844, + "IoU.pillow": 0.5254999923706055, + "IoU.screen door": 0.6322999954223633, + "IoU.stairway": 0.28079999923706056, + "IoU.river": 0.193799991607666, + "IoU.bridge": 0.6755999755859375, + "IoU.bookcase": 0.34880001068115235, + "IoU.blind": 0.4377000045776367, + "IoU.coffee table": 0.5529999923706055, + "IoU.toilet": 0.7952999877929687, + "IoU.flower": 0.30840000152587893, + "IoU.book": 0.45720001220703127, + "IoU.hill": 0.12199999809265137, + "IoU.bench": 0.42080001831054686, + "IoU.countertop": 0.6033000183105469, + "IoU.stove": 0.6970999908447265, + "IoU.palm": 0.49770000457763675, + "IoU.kitchen island": 0.4565999984741211, + "IoU.computer": 0.7502999877929688, + "IoU.swivel chair": 0.46630001068115234, + "IoU.boat": 0.6672000122070313, + "IoU.bar": 0.5127999877929688, + "IoU.arcade machine": 0.7412000274658204, + "IoU.hovel": 0.36119998931884767, + "IoU.bus": 0.8595999908447266, + "IoU.towel": 0.663499984741211, + "IoU.light": 0.4129000091552734, + "IoU.truck": 0.2627000045776367, + "IoU.tower": 0.2901000022888184, + "IoU.chandelier": 0.6268999862670899, + "IoU.awning": 0.27610000610351565, + "IoU.streetlight": 0.18959999084472656, + "IoU.booth": 0.4166999816894531, + "IoU.television receiver": 0.6541000366210937, + "IoU.airplane": 0.5663999938964843, + "IoU.dirt track": 0.0, + "IoU.apparel": 0.34349998474121096, + "IoU.pole": 0.16030000686645507, + "IoU.land": 0.022799999713897706, + "IoU.bannister": 0.108100004196167, + "IoU.escalator": 0.4931000137329102, + "IoU.ottoman": 0.44290000915527344, + "IoU.bottle": 0.3434000015258789, + "IoU.buffet": 0.5331999969482422, + "IoU.poster": 0.19010000228881835, + "IoU.stage": 0.2068000030517578, + "IoU.van": 0.44290000915527344, + "IoU.ship": 0.18350000381469728, + "IoU.fountain": 0.21549999237060546, + "IoU.conveyer belt": 0.6687000274658204, + "IoU.canopy": 0.2234000015258789, + "IoU.washer": 0.7319000244140625, + "IoU.plaything": 0.24049999237060546, + "IoU.swimming pool": 0.605099983215332, + "IoU.stool": 0.3809000015258789, + "IoU.barrel": 0.39040000915527345, + "IoU.basket": 0.2972999954223633, + "IoU.waterfall": 0.6512000274658203, + "IoU.tent": 0.7808999633789062, + "IoU.bag": 0.14489999771118164, + "IoU.minibike": 0.6794999694824219, + "IoU.cradle": 0.7848000335693359, + "IoU.oven": 0.293700008392334, + "IoU.ball": 0.5045999908447265, + "IoU.food": 0.5345999908447265, + "IoU.step": 0.07050000190734863, + "IoU.tank": 0.5409999847412109, + "IoU.trade name": 0.20579999923706055, + "IoU.microwave": 0.752300033569336, + "IoU.pot": 0.4229000091552734, + "IoU.animal": 0.6179999923706054, + "IoU.bicycle": 0.5756000137329101, + "IoU.lake": 0.11979999542236328, + "IoU.dishwasher": 0.5595000076293946, + "IoU.screen": 0.5311999893188477, + "IoU.blanket": 0.13600000381469726, + "IoU.sculpture": 0.6648999786376953, + "IoU.hood": 0.5259999847412109, + "IoU.sconce": 0.36, + "IoU.vase": 0.33799999237060546, + "IoU.traffic light": 0.2625, + "IoU.tray": 0.08100000381469727, + "IoU.ashcan": 0.39919998168945314, + "IoU.fan": 0.522400016784668, + "IoU.pier": 0.17739999771118165, + "IoU.crt screen": 0.04059999942779541, + "IoU.plate": 0.4975, + "IoU.monitor": 0.24159999847412109, + "IoU.bulletin board": 0.48200000762939454, + "IoU.shower": 0.012200000286102295, + "IoU.radiator": 0.5265999984741211, + "IoU.glass": 0.11909999847412109, + "IoU.clock": 0.3078000068664551, + "IoU.flag": 0.5872000122070312, + "Acc.wall": 0.8772000122070313, + "Acc.building": 0.933499984741211, + "Acc.sky": 0.959800033569336, + "Acc.floor": 0.8862000274658203, + "Acc.tree": 0.8952999877929687, + "Acc.ceiling": 0.8948000335693359, + "Acc.road": 0.895, + "Acc.bed ": 0.961500015258789, + "Acc.windowpane": 0.7644999694824218, + "Acc.grass": 0.8370999908447265, + "Acc.cabinet": 0.7213999938964843, + "Acc.sidewalk": 0.8030000305175782, + "Acc.person": 0.9194000244140625, + "Acc.earth": 0.5084000015258789, + "Acc.door": 0.6352000045776367, + "Acc.table": 0.737699966430664, + "Acc.mountain": 0.7093000030517578, + "Acc.plant": 0.5840000152587891, + "Acc.curtain": 0.8597000122070313, + "Acc.chair": 0.6854000091552734, + "Acc.car": 0.9266000366210938, + "Acc.water": 0.6930000305175781, + "Acc.painting": 0.8541999816894531, + "Acc.sofa": 0.832699966430664, + "Acc.shelf": 0.5720000076293945, + "Acc.house": 0.6748999786376954, + "Acc.sea": 0.8344999694824219, + "Acc.mirror": 0.7681999969482421, + "Acc.rug": 0.7844000244140625, + "Acc.field": 0.5016999816894532, + "Acc.armchair": 0.6220000076293946, + "Acc.seat": 0.8441000366210938, + "Acc.fence": 0.5593000030517579, + "Acc.desk": 0.7688999938964843, + "Acc.rock": 0.5979999923706054, + "Acc.wardrobe": 0.7641999816894531, + "Acc.lamp": 0.7426000213623047, + "Acc.bathtub": 0.9201000213623047, + "Acc.railing": 0.45669998168945314, + "Acc.cushion": 0.7075, + "Acc.base": 0.6131000137329101, + "Acc.box": 0.31379999160766603, + "Acc.column": 0.5818000030517578, + "Acc.signboard": 0.42220001220703124, + "Acc.chest of drawers": 0.6422000122070313, + "Acc.counter": 0.4906000137329102, + "Acc.sand": 0.6658000183105469, + "Acc.sink": 0.7647000122070312, + "Acc.skyscraper": 0.6781999969482422, + "Acc.fireplace": 0.9026999664306641, + "Acc.refrigerator": 0.8301000213623047, + "Acc.grandstand": 0.7175, + "Acc.path": 0.3711999893188477, + "Acc.stairs": 0.31010000228881834, + "Acc.runway": 0.9645999908447266, + "Acc.case": 0.6676999664306641, + "Acc.pool table": 0.9733000183105469, + "Acc.pillow": 0.6172999954223632, + "Acc.screen door": 0.7554000091552734, + "Acc.stairway": 0.4413999938964844, + "Acc.river": 0.43979999542236325, + "Acc.bridge": 0.8187000274658203, + "Acc.bookcase": 0.5590999984741211, + "Acc.blind": 0.524000015258789, + "Acc.coffee table": 0.8602999877929688, + "Acc.toilet": 0.9077999877929688, + "Acc.flower": 0.48709999084472655, + "Acc.book": 0.6475, + "Acc.hill": 0.20440000534057617, + "Acc.bench": 0.5066999816894531, + "Acc.countertop": 0.7491999816894531, + "Acc.stove": 0.825, + "Acc.palm": 0.6881999969482422, + "Acc.kitchen island": 0.8055999755859375, + "Acc.computer": 0.8987999725341796, + "Acc.swivel chair": 0.6918000030517578, + "Acc.boat": 0.8502999877929688, + "Acc.bar": 0.6320000076293946, + "Acc.arcade machine": 0.8361000061035156, + "Acc.hovel": 0.392599983215332, + "Acc.bus": 0.94, + "Acc.towel": 0.7886000061035157, + "Acc.light": 0.5129000091552735, + "Acc.truck": 0.35650001525878905, + "Acc.tower": 0.4534000015258789, + "Acc.chandelier": 0.8037999725341797, + "Acc.awning": 0.3352000045776367, + "Acc.streetlight": 0.27049999237060546, + "Acc.booth": 0.4366999816894531, + "Acc.television receiver": 0.7751000213623047, + "Acc.airplane": 0.6586000061035157, + "Acc.dirt track": 0.0, + "Acc.apparel": 0.4366999816894531, + "Acc.pole": 0.20309999465942383, + "Acc.land": 0.03900000095367431, + "Acc.bannister": 0.15560000419616699, + "Acc.escalator": 0.6919999694824219, + "Acc.ottoman": 0.6776000213623047, + "Acc.bottle": 0.5688999938964844, + "Acc.buffet": 0.7266000366210937, + "Acc.poster": 0.23079999923706054, + "Acc.stage": 0.45919998168945314, + "Acc.van": 0.5404000091552734, + "Acc.ship": 0.18959999084472656, + "Acc.fountain": 0.2206999969482422, + "Acc.conveyer belt": 0.9523000335693359, + "Acc.canopy": 0.2978000068664551, + "Acc.washer": 0.7559999847412109, + "Acc.plaything": 0.36889999389648437, + "Acc.swimming pool": 0.8704000091552735, + "Acc.stool": 0.4995000076293945, + "Acc.barrel": 0.6509999847412109, + "Acc.basket": 0.4097999954223633, + "Acc.waterfall": 0.8912000274658203, + "Acc.tent": 0.9915000152587891, + "Acc.bag": 0.16620000839233398, + "Acc.minibike": 0.7801000213623047, + "Acc.cradle": 0.9697000122070313, + "Acc.oven": 0.4143000030517578, + "Acc.ball": 0.5522999954223633, + "Acc.food": 0.5825, + "Acc.step": 0.09430000305175781, + "Acc.tank": 0.6416000366210938, + "Acc.trade name": 0.22010000228881835, + "Acc.microwave": 0.8494999694824219, + "Acc.pot": 0.5036999893188476, + "Acc.animal": 0.6491999816894531, + "Acc.bicycle": 0.7288999938964844, + "Acc.lake": 0.13949999809265137, + "Acc.dishwasher": 0.6801000213623047, + "Acc.screen": 0.7773999786376953, + "Acc.blanket": 0.15630000114440917, + "Acc.sculpture": 0.7875, + "Acc.hood": 0.697699966430664, + "Acc.sconce": 0.44490001678466795, + "Acc.vase": 0.5409000015258789, + "Acc.traffic light": 0.4815999984741211, + "Acc.tray": 0.13880000114440919, + "Acc.ashcan": 0.5484000015258789, + "Acc.fan": 0.7213999938964843, + "Acc.pier": 0.447599983215332, + "Acc.crt screen": 0.11060000419616699, + "Acc.plate": 0.7041999816894531, + "Acc.monitor": 0.2827000045776367, + "Acc.bulletin board": 0.6931999969482422, + "Acc.shower": 0.048899998664855955, + "Acc.radiator": 0.6502999877929687, + "Acc.glass": 0.12920000076293944, + "Acc.clock": 0.3518000030517578, + "Acc.flag": 0.670199966430664 + } + }, + "38": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8215, + "mIoU": 0.4683, + "mAcc": 0.6009, + "IoU.wall": 0.7618000030517578, + "IoU.building": 0.8269999694824218, + "IoU.sky": 0.9366999816894531, + "IoU.floor": 0.8037999725341797, + "IoU.tree": 0.7361000061035157, + "IoU.ceiling": 0.8248999786376953, + "IoU.road": 0.8272000122070312, + "IoU.bed ": 0.8688999938964844, + "IoU.windowpane": 0.61, + "IoU.grass": 0.7030999755859375, + "IoU.cabinet": 0.5738000106811524, + "IoU.sidewalk": 0.6404000091552734, + "IoU.person": 0.7877999877929688, + "IoU.earth": 0.36900001525878906, + "IoU.door": 0.43509998321533205, + "IoU.table": 0.5525, + "IoU.mountain": 0.585999984741211, + "IoU.plant": 0.5004000091552734, + "IoU.curtain": 0.7123999786376953, + "IoU.chair": 0.5195999908447265, + "IoU.car": 0.8275, + "IoU.water": 0.5397000122070312, + "IoU.painting": 0.6883999633789063, + "IoU.sofa": 0.6533000183105468, + "IoU.shelf": 0.41880001068115236, + "IoU.house": 0.5109999847412109, + "IoU.sea": 0.6216999816894532, + "IoU.mirror": 0.6444000244140625, + "IoU.rug": 0.6727999877929688, + "IoU.field": 0.29950000762939455, + "IoU.armchair": 0.4284999847412109, + "IoU.seat": 0.6118000030517579, + "IoU.fence": 0.37779998779296875, + "IoU.desk": 0.485, + "IoU.rock": 0.46630001068115234, + "IoU.wardrobe": 0.5031999969482421, + "IoU.lamp": 0.5441999816894532, + "IoU.bathtub": 0.8069999694824219, + "IoU.railing": 0.3461000061035156, + "IoU.cushion": 0.5459000015258789, + "IoU.base": 0.3034000015258789, + "IoU.box": 0.22690000534057617, + "IoU.column": 0.45529998779296876, + "IoU.signboard": 0.33180000305175783, + "IoU.chest of drawers": 0.34330001831054685, + "IoU.counter": 0.3445999908447266, + "IoU.sand": 0.37439998626708987, + "IoU.sink": 0.6691000366210937, + "IoU.skyscraper": 0.5933000183105469, + "IoU.fireplace": 0.7161000061035157, + "IoU.refrigerator": 0.6658999633789062, + "IoU.grandstand": 0.5172999954223633, + "IoU.path": 0.21440000534057618, + "IoU.stairs": 0.24219999313354493, + "IoU.runway": 0.72, + "IoU.case": 0.489900016784668, + "IoU.pool table": 0.9052999877929687, + "IoU.pillow": 0.5209999847412109, + "IoU.screen door": 0.5609999847412109, + "IoU.stairway": 0.3007999992370605, + "IoU.river": 0.1784000015258789, + "IoU.bridge": 0.6462000274658203, + "IoU.bookcase": 0.3196999931335449, + "IoU.blind": 0.40819999694824216, + "IoU.coffee table": 0.5436000061035157, + "IoU.toilet": 0.8179000091552734, + "IoU.flower": 0.3693000030517578, + "IoU.book": 0.4341999816894531, + "IoU.hill": 0.09649999618530274, + "IoU.bench": 0.39380001068115233, + "IoU.countertop": 0.576500015258789, + "IoU.stove": 0.6943000030517578, + "IoU.palm": 0.47580001831054686, + "IoU.kitchen island": 0.3547999954223633, + "IoU.computer": 0.6544000244140625, + "IoU.swivel chair": 0.4675, + "IoU.boat": 0.6794000244140626, + "IoU.bar": 0.4722999954223633, + "IoU.arcade machine": 0.5836000061035156, + "IoU.hovel": 0.24389999389648437, + "IoU.bus": 0.8631999969482422, + "IoU.towel": 0.620099983215332, + "IoU.light": 0.43290000915527344, + "IoU.truck": 0.2901000022888184, + "IoU.tower": 0.3315999984741211, + "IoU.chandelier": 0.6043999862670898, + "IoU.awning": 0.2894000053405762, + "IoU.streetlight": 0.18620000839233397, + "IoU.booth": 0.42400001525878905, + "IoU.television receiver": 0.6004999923706055, + "IoU.airplane": 0.5620999908447266, + "IoU.dirt track": 0.06340000152587891, + "IoU.apparel": 0.3306999969482422, + "IoU.pole": 0.13930000305175783, + "IoU.land": 0.056599998474121095, + "IoU.bannister": 0.10729999542236328, + "IoU.escalator": 0.33029998779296876, + "IoU.ottoman": 0.4395000076293945, + "IoU.bottle": 0.16110000610351563, + "IoU.buffet": 0.4875, + "IoU.poster": 0.22360000610351563, + "IoU.stage": 0.19059999465942382, + "IoU.van": 0.3990999984741211, + "IoU.ship": 0.5808000183105468, + "IoU.fountain": 0.21270000457763671, + "IoU.conveyer belt": 0.5638000106811524, + "IoU.canopy": 0.21260000228881837, + "IoU.washer": 0.7326000213623047, + "IoU.plaything": 0.24870000839233397, + "IoU.swimming pool": 0.5556000137329101, + "IoU.stool": 0.3356999969482422, + "IoU.barrel": 0.37209999084472656, + "IoU.basket": 0.24879999160766603, + "IoU.waterfall": 0.6801000213623047, + "IoU.tent": 0.8555000305175782, + "IoU.bag": 0.10930000305175781, + "IoU.minibike": 0.6802999877929687, + "IoU.cradle": 0.7938999938964844, + "IoU.oven": 0.31120000839233397, + "IoU.ball": 0.38549999237060545, + "IoU.food": 0.5175, + "IoU.step": 0.10359999656677246, + "IoU.tank": 0.4715999984741211, + "IoU.trade name": 0.21670000076293947, + "IoU.microwave": 0.7330000305175781, + "IoU.pot": 0.33049999237060546, + "IoU.animal": 0.6161000061035157, + "IoU.bicycle": 0.575999984741211, + "IoU.lake": 0.13180000305175782, + "IoU.dishwasher": 0.49770000457763675, + "IoU.screen": 0.5318000030517578, + "IoU.blanket": 0.151899995803833, + "IoU.sculpture": 0.5556999969482422, + "IoU.hood": 0.47009998321533203, + "IoU.sconce": 0.33529998779296877, + "IoU.vase": 0.3288999938964844, + "IoU.traffic light": 0.2475, + "IoU.tray": 0.05610000133514404, + "IoU.ashcan": 0.3972999954223633, + "IoU.fan": 0.5115000152587891, + "IoU.pier": 0.23879999160766602, + "IoU.crt screen": 0.03140000104904175, + "IoU.plate": 0.4777000045776367, + "IoU.monitor": 0.22459999084472657, + "IoU.bulletin board": 0.46799999237060547, + "IoU.shower": 0.009300000071525573, + "IoU.radiator": 0.555, + "IoU.glass": 0.09079999923706054, + "IoU.clock": 0.27920000076293944, + "IoU.flag": 0.6043000030517578, + "Acc.wall": 0.8687000274658203, + "Acc.building": 0.9287999725341797, + "Acc.sky": 0.9593000030517578, + "Acc.floor": 0.8843000030517578, + "Acc.tree": 0.8933000183105468, + "Acc.ceiling": 0.8937999725341796, + "Acc.road": 0.8876000213623046, + "Acc.bed ": 0.9605000305175782, + "Acc.windowpane": 0.772300033569336, + "Acc.grass": 0.8434999847412109, + "Acc.cabinet": 0.7026000213623047, + "Acc.sidewalk": 0.8091000366210938, + "Acc.person": 0.9169999694824219, + "Acc.earth": 0.5208000183105469, + "Acc.door": 0.5783000183105469, + "Acc.table": 0.7201999664306641, + "Acc.mountain": 0.7333999633789062, + "Acc.plant": 0.5906000137329102, + "Acc.curtain": 0.8468000030517578, + "Acc.chair": 0.669000015258789, + "Acc.car": 0.9236000061035157, + "Acc.water": 0.6794000244140626, + "Acc.painting": 0.8526000213623047, + "Acc.sofa": 0.835999984741211, + "Acc.shelf": 0.5872999954223633, + "Acc.house": 0.6480999755859375, + "Acc.sea": 0.7769000244140625, + "Acc.mirror": 0.7530999755859376, + "Acc.rug": 0.7962000274658203, + "Acc.field": 0.41200000762939454, + "Acc.armchair": 0.5904999923706055, + "Acc.seat": 0.8280000305175781, + "Acc.fence": 0.5561000061035156, + "Acc.desk": 0.7894000244140625, + "Acc.rock": 0.6220000076293946, + "Acc.wardrobe": 0.7040000152587891, + "Acc.lamp": 0.729800033569336, + "Acc.bathtub": 0.8666000366210938, + "Acc.railing": 0.452599983215332, + "Acc.cushion": 0.69, + "Acc.base": 0.5695999908447266, + "Acc.box": 0.3021999931335449, + "Acc.column": 0.5836000061035156, + "Acc.signboard": 0.42080001831054686, + "Acc.chest of drawers": 0.6426000213623047, + "Acc.counter": 0.45790000915527346, + "Acc.sand": 0.611500015258789, + "Acc.sink": 0.7519000244140625, + "Acc.skyscraper": 0.6862999725341797, + "Acc.fireplace": 0.8966000366210938, + "Acc.refrigerator": 0.7806999969482422, + "Acc.grandstand": 0.7308999633789063, + "Acc.path": 0.31209999084472656, + "Acc.stairs": 0.3840000152587891, + "Acc.runway": 0.9701000213623047, + "Acc.case": 0.6537999725341797, + "Acc.pool table": 0.9704000091552735, + "Acc.pillow": 0.5970000076293945, + "Acc.screen door": 0.6379000091552735, + "Acc.stairway": 0.4375, + "Acc.river": 0.4483000183105469, + "Acc.bridge": 0.8601000213623047, + "Acc.bookcase": 0.492400016784668, + "Acc.blind": 0.45669998168945314, + "Acc.coffee table": 0.8579000091552734, + "Acc.toilet": 0.8991999816894531, + "Acc.flower": 0.5515999984741211, + "Acc.book": 0.6231999969482422, + "Acc.hill": 0.1896999931335449, + "Acc.bench": 0.49840000152587893, + "Acc.countertop": 0.7208999633789063, + "Acc.stove": 0.7959999847412109, + "Acc.palm": 0.6954000091552734, + "Acc.kitchen island": 0.8073000335693359, + "Acc.computer": 0.7736000061035156, + "Acc.swivel chair": 0.665, + "Acc.boat": 0.8543000030517578, + "Acc.bar": 0.6527999877929688, + "Acc.arcade machine": 0.6618000030517578, + "Acc.hovel": 0.24709999084472656, + "Acc.bus": 0.9301000213623047, + "Acc.towel": 0.7551000213623047, + "Acc.light": 0.5247999954223633, + "Acc.truck": 0.4059000015258789, + "Acc.tower": 0.49139999389648437, + "Acc.chandelier": 0.7865000152587891, + "Acc.awning": 0.36470001220703124, + "Acc.streetlight": 0.2564999961853027, + "Acc.booth": 0.46560001373291016, + "Acc.television receiver": 0.7665000152587891, + "Acc.airplane": 0.6527999877929688, + "Acc.dirt track": 0.120600004196167, + "Acc.apparel": 0.4361999893188477, + "Acc.pole": 0.18059999465942383, + "Acc.land": 0.09369999885559083, + "Acc.bannister": 0.163799991607666, + "Acc.escalator": 0.4247999954223633, + "Acc.ottoman": 0.6611000061035156, + "Acc.bottle": 0.18829999923706053, + "Acc.buffet": 0.6940000152587891, + "Acc.poster": 0.2671999931335449, + "Acc.stage": 0.46279998779296877, + "Acc.van": 0.47119998931884766, + "Acc.ship": 0.5988999938964844, + "Acc.fountain": 0.22170000076293944, + "Acc.conveyer belt": 0.9326999664306641, + "Acc.canopy": 0.31629999160766603, + "Acc.washer": 0.7511000061035156, + "Acc.plaything": 0.3683000183105469, + "Acc.swimming pool": 0.8573000335693359, + "Acc.stool": 0.4897999954223633, + "Acc.barrel": 0.7516999816894532, + "Acc.basket": 0.36709999084472655, + "Acc.waterfall": 0.8718000030517579, + "Acc.tent": 0.995, + "Acc.bag": 0.12609999656677245, + "Acc.minibike": 0.7768000030517578, + "Acc.cradle": 0.9754000091552735, + "Acc.oven": 0.48520000457763673, + "Acc.ball": 0.4184000015258789, + "Acc.food": 0.5841999816894531, + "Acc.step": 0.1402000045776367, + "Acc.tank": 0.5709999847412109, + "Acc.trade name": 0.23440000534057617, + "Acc.microwave": 0.8237000274658203, + "Acc.pot": 0.38610000610351564, + "Acc.animal": 0.6516999816894531, + "Acc.bicycle": 0.7569999694824219, + "Acc.lake": 0.17030000686645508, + "Acc.dishwasher": 0.6075, + "Acc.screen": 0.742699966430664, + "Acc.blanket": 0.18069999694824218, + "Acc.sculpture": 0.6554000091552734, + "Acc.hood": 0.617599983215332, + "Acc.sconce": 0.41119998931884766, + "Acc.vase": 0.5109000015258789, + "Acc.traffic light": 0.457599983215332, + "Acc.tray": 0.09779999732971191, + "Acc.ashcan": 0.5584000015258789, + "Acc.fan": 0.7380999755859375, + "Acc.pier": 0.5688999938964844, + "Acc.crt screen": 0.105600004196167, + "Acc.plate": 0.6437000274658203, + "Acc.monitor": 0.2769000053405762, + "Acc.bulletin board": 0.7141000366210938, + "Acc.shower": 0.052300000190734865, + "Acc.radiator": 0.691500015258789, + "Acc.glass": 0.09720000267028808, + "Acc.clock": 0.32729999542236327, + "Acc.flag": 0.6708000183105469 + } + }, + "39": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8165, + "mIoU": 0.4594, + "mAcc": 0.5912, + "IoU.wall": 0.7572000122070313, + "IoU.building": 0.8218000030517578, + "IoU.sky": 0.9376999664306641, + "IoU.floor": 0.8033000183105469, + "IoU.tree": 0.7237000274658203, + "IoU.ceiling": 0.8256999969482421, + "IoU.road": 0.8108999633789062, + "IoU.bed ": 0.8643000030517578, + "IoU.windowpane": 0.6068000030517579, + "IoU.grass": 0.6716999816894531, + "IoU.cabinet": 0.5738999938964844, + "IoU.sidewalk": 0.6290999984741211, + "IoU.person": 0.7770999908447266, + "IoU.earth": 0.34689998626708984, + "IoU.door": 0.4325, + "IoU.table": 0.5520000076293945, + "IoU.mountain": 0.5831000137329102, + "IoU.plant": 0.49650001525878906, + "IoU.curtain": 0.7252999877929688, + "IoU.chair": 0.5075, + "IoU.car": 0.8176999664306641, + "IoU.water": 0.5425, + "IoU.painting": 0.6831999969482422, + "IoU.sofa": 0.625099983215332, + "IoU.shelf": 0.44290000915527344, + "IoU.house": 0.5215999984741211, + "IoU.sea": 0.5283000183105468, + "IoU.mirror": 0.64, + "IoU.rug": 0.6768000030517578, + "IoU.field": 0.26920000076293943, + "IoU.armchair": 0.3718000030517578, + "IoU.seat": 0.5863000106811523, + "IoU.fence": 0.32939998626708983, + "IoU.desk": 0.43540000915527344, + "IoU.rock": 0.4434000015258789, + "IoU.wardrobe": 0.5327999877929688, + "IoU.lamp": 0.519000015258789, + "IoU.bathtub": 0.8022000122070313, + "IoU.railing": 0.3163999938964844, + "IoU.cushion": 0.5115000152587891, + "IoU.base": 0.3138999938964844, + "IoU.box": 0.2315999984741211, + "IoU.column": 0.45549999237060546, + "IoU.signboard": 0.33, + "IoU.chest of drawers": 0.31729999542236326, + "IoU.counter": 0.29149999618530276, + "IoU.sand": 0.395099983215332, + "IoU.sink": 0.6554000091552734, + "IoU.skyscraper": 0.6509999847412109, + "IoU.fireplace": 0.6894000244140625, + "IoU.refrigerator": 0.6861000061035156, + "IoU.grandstand": 0.4090999984741211, + "IoU.path": 0.2375, + "IoU.stairs": 0.19690000534057617, + "IoU.runway": 0.715, + "IoU.case": 0.4656999969482422, + "IoU.pool table": 0.9091999816894532, + "IoU.pillow": 0.49720001220703125, + "IoU.screen door": 0.5750999832153321, + "IoU.stairway": 0.23530000686645508, + "IoU.river": 0.16299999237060547, + "IoU.bridge": 0.6662000274658203, + "IoU.bookcase": 0.3178000068664551, + "IoU.blind": 0.37259998321533205, + "IoU.coffee table": 0.500099983215332, + "IoU.toilet": 0.8094999694824219, + "IoU.flower": 0.37040000915527344, + "IoU.book": 0.425, + "IoU.hill": 0.07619999885559083, + "IoU.bench": 0.38150001525878907, + "IoU.countertop": 0.5466999816894531, + "IoU.stove": 0.6862999725341797, + "IoU.palm": 0.467599983215332, + "IoU.kitchen island": 0.3484000015258789, + "IoU.computer": 0.6823999786376953, + "IoU.swivel chair": 0.41900001525878905, + "IoU.boat": 0.5988000106811523, + "IoU.bar": 0.5025999832153321, + "IoU.arcade machine": 0.625999984741211, + "IoU.hovel": 0.487400016784668, + "IoU.bus": 0.7808000183105469, + "IoU.towel": 0.5613999938964844, + "IoU.light": 0.40419998168945315, + "IoU.truck": 0.27850000381469725, + "IoU.tower": 0.29870000839233396, + "IoU.chandelier": 0.5795000076293946, + "IoU.awning": 0.28670000076293944, + "IoU.streetlight": 0.17799999237060546, + "IoU.booth": 0.42369998931884767, + "IoU.television receiver": 0.6145000076293945, + "IoU.airplane": 0.5731000137329102, + "IoU.dirt track": 0.037300000190734865, + "IoU.apparel": 0.311299991607666, + "IoU.pole": 0.14, + "IoU.land": 0.07449999809265137, + "IoU.bannister": 0.06400000095367432, + "IoU.escalator": 0.29600000381469727, + "IoU.ottoman": 0.41889999389648436, + "IoU.bottle": 0.22229999542236328, + "IoU.buffet": 0.5797000122070313, + "IoU.poster": 0.2822999954223633, + "IoU.stage": 0.136899995803833, + "IoU.van": 0.40889999389648435, + "IoU.ship": 0.8968000030517578, + "IoU.fountain": 0.14409999847412108, + "IoU.conveyer belt": 0.6537999725341797, + "IoU.canopy": 0.27040000915527346, + "IoU.washer": 0.6815000152587891, + "IoU.plaything": 0.21579999923706056, + "IoU.swimming pool": 0.5604000091552734, + "IoU.stool": 0.28829999923706057, + "IoU.barrel": 0.5499000167846679, + "IoU.basket": 0.21610000610351562, + "IoU.waterfall": 0.5431000137329102, + "IoU.tent": 0.8726000213623046, + "IoU.bag": 0.13840000152587892, + "IoU.minibike": 0.5893999862670899, + "IoU.cradle": 0.7895999908447265, + "IoU.oven": 0.17969999313354493, + "IoU.ball": 0.5104999923706055, + "IoU.food": 0.524900016784668, + "IoU.step": 0.10779999732971192, + "IoU.tank": 0.5061000061035156, + "IoU.trade name": 0.20040000915527345, + "IoU.microwave": 0.36400001525878906, + "IoU.pot": 0.3654999923706055, + "IoU.animal": 0.5599000167846679, + "IoU.bicycle": 0.5197000122070312, + "IoU.lake": 0.584900016784668, + "IoU.dishwasher": 0.4222999954223633, + "IoU.screen": 0.6206000137329102, + "IoU.blanket": 0.09319999694824219, + "IoU.sculpture": 0.5318000030517578, + "IoU.hood": 0.5004999923706055, + "IoU.sconce": 0.2609000015258789, + "IoU.vase": 0.2784000015258789, + "IoU.traffic light": 0.24719999313354493, + "IoU.tray": 0.030199999809265136, + "IoU.ashcan": 0.3420999908447266, + "IoU.fan": 0.4736000061035156, + "IoU.pier": 0.25659999847412107, + "IoU.crt screen": 0.03619999885559082, + "IoU.plate": 0.4634000015258789, + "IoU.monitor": 0.14829999923706055, + "IoU.bulletin board": 0.41259998321533203, + "IoU.shower": 0.012899999618530273, + "IoU.radiator": 0.580999984741211, + "IoU.glass": 0.06699999809265136, + "IoU.clock": 0.2722999954223633, + "IoU.flag": 0.3238999938964844, + "Acc.wall": 0.8680000305175781, + "Acc.building": 0.92, + "Acc.sky": 0.960999984741211, + "Acc.floor": 0.8869000244140625, + "Acc.tree": 0.8841999816894531, + "Acc.ceiling": 0.8881999969482421, + "Acc.road": 0.8762999725341797, + "Acc.bed ": 0.9602999877929688, + "Acc.windowpane": 0.7670999908447266, + "Acc.grass": 0.8322000122070312, + "Acc.cabinet": 0.7116999816894531, + "Acc.sidewalk": 0.8070999908447266, + "Acc.person": 0.9120999908447266, + "Acc.earth": 0.4722999954223633, + "Acc.door": 0.5940000152587891, + "Acc.table": 0.7054000091552735, + "Acc.mountain": 0.7336000061035156, + "Acc.plant": 0.5966999816894532, + "Acc.curtain": 0.854800033569336, + "Acc.chair": 0.6519999694824219, + "Acc.car": 0.9191000366210937, + "Acc.water": 0.6555999755859375, + "Acc.painting": 0.8369999694824218, + "Acc.sofa": 0.8248999786376953, + "Acc.shelf": 0.6256999969482422, + "Acc.house": 0.6686000061035157, + "Acc.sea": 0.725, + "Acc.mirror": 0.7498000335693359, + "Acc.rug": 0.7761000061035156, + "Acc.field": 0.41779998779296873, + "Acc.armchair": 0.5006000137329102, + "Acc.seat": 0.8194999694824219, + "Acc.fence": 0.4533000183105469, + "Acc.desk": 0.7487000274658203, + "Acc.rock": 0.6025, + "Acc.wardrobe": 0.7629000091552735, + "Acc.lamp": 0.7119000244140625, + "Acc.bathtub": 0.8848000335693359, + "Acc.railing": 0.4286000061035156, + "Acc.cushion": 0.6345000076293945, + "Acc.base": 0.5840999984741211, + "Acc.box": 0.28700000762939454, + "Acc.column": 0.5670999908447265, + "Acc.signboard": 0.4172999954223633, + "Acc.chest of drawers": 0.6125, + "Acc.counter": 0.4134000015258789, + "Acc.sand": 0.5993000030517578, + "Acc.sink": 0.7325, + "Acc.skyscraper": 0.8226000213623047, + "Acc.fireplace": 0.8736000061035156, + "Acc.refrigerator": 0.8072000122070313, + "Acc.grandstand": 0.7258000183105469, + "Acc.path": 0.37090000152587893, + "Acc.stairs": 0.3034000015258789, + "Acc.runway": 0.9344000244140624, + "Acc.case": 0.6420999908447266, + "Acc.pool table": 0.971500015258789, + "Acc.pillow": 0.5884999847412109, + "Acc.screen door": 0.6815000152587891, + "Acc.stairway": 0.35209999084472654, + "Acc.river": 0.42119998931884767, + "Acc.bridge": 0.8223000335693359, + "Acc.bookcase": 0.48650001525878905, + "Acc.blind": 0.4218000030517578, + "Acc.coffee table": 0.8469000244140625, + "Acc.toilet": 0.8947000122070312, + "Acc.flower": 0.5227000045776368, + "Acc.book": 0.6090000152587891, + "Acc.hill": 0.13770000457763673, + "Acc.bench": 0.49939998626708987, + "Acc.countertop": 0.7154000091552735, + "Acc.stove": 0.8170999908447265, + "Acc.palm": 0.6894000244140625, + "Acc.kitchen island": 0.764000015258789, + "Acc.computer": 0.8388999938964844, + "Acc.swivel chair": 0.5772999954223633, + "Acc.boat": 0.8422000122070312, + "Acc.bar": 0.6956999969482421, + "Acc.arcade machine": 0.7148999786376953, + "Acc.hovel": 0.5654999923706054, + "Acc.bus": 0.9209999847412109, + "Acc.towel": 0.7405999755859375, + "Acc.light": 0.47779998779296873, + "Acc.truck": 0.387599983215332, + "Acc.tower": 0.3940000152587891, + "Acc.chandelier": 0.7602999877929687, + "Acc.awning": 0.34110000610351565, + "Acc.streetlight": 0.24049999237060546, + "Acc.booth": 0.5268000030517578, + "Acc.television receiver": 0.7641999816894531, + "Acc.airplane": 0.6572000122070313, + "Acc.dirt track": 0.04090000152587891, + "Acc.apparel": 0.4325, + "Acc.pole": 0.16870000839233398, + "Acc.land": 0.14170000076293945, + "Acc.bannister": 0.10829999923706055, + "Acc.escalator": 0.3372999954223633, + "Acc.ottoman": 0.6393000030517578, + "Acc.bottle": 0.28170000076293944, + "Acc.buffet": 0.7756999969482422, + "Acc.poster": 0.3438999938964844, + "Acc.stage": 0.41069999694824216, + "Acc.van": 0.4983000183105469, + "Acc.ship": 0.9431999969482422, + "Acc.fountain": 0.14869999885559082, + "Acc.conveyer belt": 0.9252999877929687, + "Acc.canopy": 0.34080001831054685, + "Acc.washer": 0.7077999877929687, + "Acc.plaything": 0.33290000915527346, + "Acc.swimming pool": 0.8109999847412109, + "Acc.stool": 0.43720001220703125, + "Acc.barrel": 0.6537999725341797, + "Acc.basket": 0.2979999923706055, + "Acc.waterfall": 0.7013999938964843, + "Acc.tent": 0.9941000366210937, + "Acc.bag": 0.16440000534057617, + "Acc.minibike": 0.7025, + "Acc.cradle": 0.9680000305175781, + "Acc.oven": 0.47650001525878904, + "Acc.ball": 0.6211000061035157, + "Acc.food": 0.5947000122070313, + "Acc.step": 0.14239999771118164, + "Acc.tank": 0.6020000076293945, + "Acc.trade name": 0.21170000076293946, + "Acc.microwave": 0.4070999908447266, + "Acc.pot": 0.42520000457763674, + "Acc.animal": 0.6022999954223632, + "Acc.bicycle": 0.7316000366210937, + "Acc.lake": 0.7463999938964844, + "Acc.dishwasher": 0.52, + "Acc.screen": 0.9245999908447265, + "Acc.blanket": 0.10569999694824218, + "Acc.sculpture": 0.6618000030517578, + "Acc.hood": 0.544099998474121, + "Acc.sconce": 0.33040000915527346, + "Acc.vase": 0.44040000915527344, + "Acc.traffic light": 0.43340000152587893, + "Acc.tray": 0.04639999866485596, + "Acc.ashcan": 0.49189998626708986, + "Acc.fan": 0.7079000091552734, + "Acc.pier": 0.6163999938964844, + "Acc.crt screen": 0.10649999618530273, + "Acc.plate": 0.5945000076293945, + "Acc.monitor": 0.17440000534057618, + "Acc.bulletin board": 0.6045000076293945, + "Acc.shower": 0.03910000085830689, + "Acc.radiator": 0.6968000030517578, + "Acc.glass": 0.07309999942779541, + "Acc.clock": 0.3240999984741211, + "Acc.flag": 0.3666999816894531 + } + }, + "40": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8166, + "mIoU": 0.45399999999999996, + "mAcc": 0.5886, + "IoU.wall": 0.7566000366210938, + "IoU.building": 0.8233000183105469, + "IoU.sky": 0.9377999877929688, + "IoU.floor": 0.8066000366210937, + "IoU.tree": 0.7316999816894532, + "IoU.ceiling": 0.827699966430664, + "IoU.road": 0.8193000030517578, + "IoU.bed ": 0.8634999847412109, + "IoU.windowpane": 0.6002999877929688, + "IoU.grass": 0.65, + "IoU.cabinet": 0.5945000076293945, + "IoU.sidewalk": 0.6234000015258789, + "IoU.person": 0.775, + "IoU.earth": 0.34630001068115235, + "IoU.door": 0.4381999969482422, + "IoU.table": 0.5216999816894531, + "IoU.mountain": 0.577599983215332, + "IoU.plant": 0.5011999893188477, + "IoU.curtain": 0.7131999969482422, + "IoU.chair": 0.4983000183105469, + "IoU.car": 0.8043000030517579, + "IoU.water": 0.5577999877929688, + "IoU.painting": 0.6687000274658204, + "IoU.sofa": 0.6224000167846679, + "IoU.shelf": 0.4256999969482422, + "IoU.house": 0.49279998779296874, + "IoU.sea": 0.6070999908447265, + "IoU.mirror": 0.6444999694824218, + "IoU.rug": 0.672300033569336, + "IoU.field": 0.25860000610351563, + "IoU.armchair": 0.38630001068115233, + "IoU.seat": 0.5820000076293945, + "IoU.fence": 0.31489999771118166, + "IoU.desk": 0.41880001068115236, + "IoU.rock": 0.4445000076293945, + "IoU.wardrobe": 0.5238000106811523, + "IoU.lamp": 0.515099983215332, + "IoU.bathtub": 0.7344000244140625, + "IoU.railing": 0.30889999389648437, + "IoU.cushion": 0.5168000030517578, + "IoU.base": 0.30739999771118165, + "IoU.box": 0.22049999237060547, + "IoU.column": 0.4420000076293945, + "IoU.signboard": 0.33540000915527346, + "IoU.chest of drawers": 0.37709999084472656, + "IoU.counter": 0.2918000030517578, + "IoU.sand": 0.347400016784668, + "IoU.sink": 0.6759999847412109, + "IoU.skyscraper": 0.6386999893188476, + "IoU.fireplace": 0.6908000183105468, + "IoU.refrigerator": 0.6912999725341797, + "IoU.grandstand": 0.4590999984741211, + "IoU.path": 0.2428000068664551, + "IoU.stairs": 0.25610000610351563, + "IoU.runway": 0.6943000030517578, + "IoU.case": 0.49759998321533205, + "IoU.pool table": 0.9148000335693359, + "IoU.pillow": 0.5279999923706055, + "IoU.screen door": 0.6068999862670899, + "IoU.stairway": 0.26239999771118167, + "IoU.river": 0.2656999969482422, + "IoU.bridge": 0.642300033569336, + "IoU.bookcase": 0.3231999969482422, + "IoU.blind": 0.397599983215332, + "IoU.coffee table": 0.5215999984741211, + "IoU.toilet": 0.7565000152587891, + "IoU.flower": 0.34029998779296877, + "IoU.book": 0.42200000762939455, + "IoU.hill": 0.07780000209808349, + "IoU.bench": 0.36720001220703125, + "IoU.countertop": 0.5943000030517578, + "IoU.stove": 0.6755000305175781, + "IoU.palm": 0.47900001525878905, + "IoU.kitchen island": 0.29870000839233396, + "IoU.computer": 0.6791000366210938, + "IoU.swivel chair": 0.3990000152587891, + "IoU.boat": 0.6722000122070313, + "IoU.bar": 0.5068000030517578, + "IoU.arcade machine": 0.5795000076293946, + "IoU.hovel": 0.5043999862670898, + "IoU.bus": 0.749000015258789, + "IoU.towel": 0.5572999954223633, + "IoU.light": 0.39790000915527346, + "IoU.truck": 0.19700000762939454, + "IoU.tower": 0.34330001831054685, + "IoU.chandelier": 0.5825, + "IoU.awning": 0.33229999542236327, + "IoU.streetlight": 0.18040000915527343, + "IoU.booth": 0.4211000061035156, + "IoU.television receiver": 0.6061000061035157, + "IoU.airplane": 0.5509999847412109, + "IoU.dirt track": 0.25610000610351563, + "IoU.apparel": 0.3370000076293945, + "IoU.pole": 0.17329999923706055, + "IoU.land": 0.08010000228881836, + "IoU.bannister": 0.09329999923706055, + "IoU.escalator": 0.28219999313354494, + "IoU.ottoman": 0.47380001068115235, + "IoU.bottle": 0.33619998931884765, + "IoU.buffet": 0.6358000183105469, + "IoU.poster": 0.28969999313354494, + "IoU.stage": 0.14449999809265138, + "IoU.van": 0.4013999938964844, + "IoU.ship": 0.4734000015258789, + "IoU.fountain": 0.20979999542236327, + "IoU.conveyer belt": 0.6638999938964844, + "IoU.canopy": 0.27030000686645506, + "IoU.washer": 0.6988999938964844, + "IoU.plaything": 0.2306999969482422, + "IoU.swimming pool": 0.5608000183105468, + "IoU.stool": 0.23899999618530274, + "IoU.barrel": 0.26860000610351564, + "IoU.basket": 0.198799991607666, + "IoU.waterfall": 0.5456000137329101, + "IoU.tent": 0.8687000274658203, + "IoU.bag": 0.10829999923706055, + "IoU.minibike": 0.5213000106811524, + "IoU.cradle": 0.7672000122070313, + "IoU.oven": 0.149399995803833, + "IoU.ball": 0.38549999237060545, + "IoU.food": 0.4916999816894531, + "IoU.step": 0.11739999771118165, + "IoU.tank": 0.5088000106811523, + "IoU.trade name": 0.2521999931335449, + "IoU.microwave": 0.34880001068115235, + "IoU.pot": 0.35970001220703124, + "IoU.animal": 0.5495000076293945, + "IoU.bicycle": 0.467599983215332, + "IoU.lake": 0.5115999984741211, + "IoU.dishwasher": 0.5118000030517578, + "IoU.screen": 0.6518000030517578, + "IoU.blanket": 0.09149999618530273, + "IoU.sculpture": 0.40650001525878904, + "IoU.hood": 0.4608000183105469, + "IoU.sconce": 0.27799999237060546, + "IoU.vase": 0.2588999938964844, + "IoU.traffic light": 0.25229999542236325, + "IoU.tray": 0.039900000095367434, + "IoU.ashcan": 0.2919000053405762, + "IoU.fan": 0.457599983215332, + "IoU.pier": 0.2093000030517578, + "IoU.crt screen": 0.033399999141693115, + "IoU.plate": 0.42080001831054686, + "IoU.monitor": 0.038900001049041746, + "IoU.bulletin board": 0.4179999923706055, + "IoU.shower": 0.003100000023841858, + "IoU.radiator": 0.5706000137329101, + "IoU.glass": 0.07579999923706054, + "IoU.clock": 0.23059999465942382, + "IoU.flag": 0.325, + "Acc.wall": 0.8666999816894532, + "Acc.building": 0.9209999847412109, + "Acc.sky": 0.9595999908447266, + "Acc.floor": 0.8855000305175781, + "Acc.tree": 0.8919000244140625, + "Acc.ceiling": 0.8895999908447265, + "Acc.road": 0.8894999694824218, + "Acc.bed ": 0.9569000244140625, + "Acc.windowpane": 0.755, + "Acc.grass": 0.8211000061035156, + "Acc.cabinet": 0.7391999816894531, + "Acc.sidewalk": 0.7959999847412109, + "Acc.person": 0.9154000091552734, + "Acc.earth": 0.46130001068115234, + "Acc.door": 0.5854000091552735, + "Acc.table": 0.6727999877929688, + "Acc.mountain": 0.7512000274658203, + "Acc.plant": 0.5956999969482422, + "Acc.curtain": 0.8497000122070313, + "Acc.chair": 0.660199966430664, + "Acc.car": 0.900199966430664, + "Acc.water": 0.6773999786376953, + "Acc.painting": 0.8651999664306641, + "Acc.sofa": 0.8202999877929688, + "Acc.shelf": 0.5833000183105469, + "Acc.house": 0.6368000030517578, + "Acc.sea": 0.8316999816894531, + "Acc.mirror": 0.7454000091552735, + "Acc.rug": 0.7970999908447266, + "Acc.field": 0.43270000457763674, + "Acc.armchair": 0.542400016784668, + "Acc.seat": 0.8097000122070312, + "Acc.fence": 0.4311999893188477, + "Acc.desk": 0.7361000061035157, + "Acc.rock": 0.5838999938964844, + "Acc.wardrobe": 0.7016999816894531, + "Acc.lamp": 0.7080000305175781, + "Acc.bathtub": 0.7897000122070312, + "Acc.railing": 0.4395000076293945, + "Acc.cushion": 0.6498999786376953, + "Acc.base": 0.5693000030517578, + "Acc.box": 0.2815999984741211, + "Acc.column": 0.557400016784668, + "Acc.signboard": 0.43470001220703125, + "Acc.chest of drawers": 0.5841999816894531, + "Acc.counter": 0.405099983215332, + "Acc.sand": 0.605, + "Acc.sink": 0.7520999908447266, + "Acc.skyscraper": 0.8069000244140625, + "Acc.fireplace": 0.88, + "Acc.refrigerator": 0.8512999725341797, + "Acc.grandstand": 0.7116000366210937, + "Acc.path": 0.3716999816894531, + "Acc.stairs": 0.3775, + "Acc.runway": 0.9022000122070313, + "Acc.case": 0.6608000183105469, + "Acc.pool table": 0.9734999847412109, + "Acc.pillow": 0.6245000076293945, + "Acc.screen door": 0.7273999786376953, + "Acc.stairway": 0.3691999816894531, + "Acc.river": 0.5338999938964843, + "Acc.bridge": 0.8468000030517578, + "Acc.bookcase": 0.47700000762939454, + "Acc.blind": 0.4702999877929688, + "Acc.coffee table": 0.8416999816894531, + "Acc.toilet": 0.8955999755859375, + "Acc.flower": 0.5118999862670899, + "Acc.book": 0.6174000167846679, + "Acc.hill": 0.14199999809265137, + "Acc.bench": 0.46119998931884765, + "Acc.countertop": 0.7504000091552734, + "Acc.stove": 0.8041999816894532, + "Acc.palm": 0.6934999847412109, + "Acc.kitchen island": 0.7043000030517578, + "Acc.computer": 0.8458999633789063, + "Acc.swivel chair": 0.5231999969482422, + "Acc.boat": 0.8547000122070313, + "Acc.bar": 0.6616000366210938, + "Acc.arcade machine": 0.7375, + "Acc.hovel": 0.5981999969482422, + "Acc.bus": 0.8994000244140625, + "Acc.towel": 0.7338999938964844, + "Acc.light": 0.48700000762939455, + "Acc.truck": 0.29889999389648436, + "Acc.tower": 0.5166999816894531, + "Acc.chandelier": 0.7455999755859375, + "Acc.awning": 0.4118000030517578, + "Acc.streetlight": 0.2503000068664551, + "Acc.booth": 0.515099983215332, + "Acc.television receiver": 0.7516999816894532, + "Acc.airplane": 0.6493000030517578, + "Acc.dirt track": 0.34810001373291016, + "Acc.apparel": 0.4741999816894531, + "Acc.pole": 0.22079999923706053, + "Acc.land": 0.1722999954223633, + "Acc.bannister": 0.13590000152587892, + "Acc.escalator": 0.32360000610351564, + "Acc.ottoman": 0.6569999694824219, + "Acc.bottle": 0.5452999877929687, + "Acc.buffet": 0.7525, + "Acc.poster": 0.3347999954223633, + "Acc.stage": 0.3606000137329102, + "Acc.van": 0.4759999847412109, + "Acc.ship": 0.48060001373291017, + "Acc.fountain": 0.21420000076293946, + "Acc.conveyer belt": 0.9263999938964844, + "Acc.canopy": 0.3146999931335449, + "Acc.washer": 0.7194000244140625, + "Acc.plaything": 0.3502999877929687, + "Acc.swimming pool": 0.8397000122070313, + "Acc.stool": 0.35560001373291017, + "Acc.barrel": 0.6286000061035156, + "Acc.basket": 0.26540000915527345, + "Acc.waterfall": 0.6494000244140625, + "Acc.tent": 0.9941000366210937, + "Acc.bag": 0.120600004196167, + "Acc.minibike": 0.6272000122070313, + "Acc.cradle": 0.9656999969482422, + "Acc.oven": 0.39360000610351564, + "Acc.ball": 0.46310001373291015, + "Acc.food": 0.5704000091552734, + "Acc.step": 0.14359999656677247, + "Acc.tank": 0.607599983215332, + "Acc.trade name": 0.2826000022888184, + "Acc.microwave": 0.3990999984741211, + "Acc.pot": 0.42220001220703124, + "Acc.animal": 0.6054999923706055, + "Acc.bicycle": 0.7486000061035156, + "Acc.lake": 0.6426000213623047, + "Acc.dishwasher": 0.6783999633789063, + "Acc.screen": 0.8881999969482421, + "Acc.blanket": 0.10470000267028809, + "Acc.sculpture": 0.6163999938964844, + "Acc.hood": 0.5356000137329101, + "Acc.sconce": 0.37459999084472656, + "Acc.vase": 0.40700000762939453, + "Acc.traffic light": 0.48880001068115236, + "Acc.tray": 0.059800000190734864, + "Acc.ashcan": 0.3883000183105469, + "Acc.fan": 0.7326000213623047, + "Acc.pier": 0.4936000061035156, + "Acc.crt screen": 0.11050000190734863, + "Acc.plate": 0.5602000045776367, + "Acc.monitor": 0.04199999809265137, + "Acc.bulletin board": 0.6327999877929688, + "Acc.shower": 0.012100000381469727, + "Acc.radiator": 0.6951999664306641, + "Acc.glass": 0.08510000228881837, + "Acc.clock": 0.2725, + "Acc.flag": 0.3622999954223633 + } + }, + "41": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8145, + "mIoU": 0.44, + "mAcc": 0.5678, + "IoU.wall": 0.7555000305175781, + "IoU.building": 0.8173000335693359, + "IoU.sky": 0.9395999908447266, + "IoU.floor": 0.8043000030517579, + "IoU.tree": 0.7344999694824219, + "IoU.ceiling": 0.8280000305175781, + "IoU.road": 0.811500015258789, + "IoU.bed ": 0.8613999938964844, + "IoU.windowpane": 0.6063000106811524, + "IoU.grass": 0.6730999755859375, + "IoU.cabinet": 0.5790999984741211, + "IoU.sidewalk": 0.6247999954223633, + "IoU.person": 0.7791999816894531, + "IoU.earth": 0.3543000030517578, + "IoU.door": 0.45970001220703127, + "IoU.table": 0.5379000091552735, + "IoU.mountain": 0.5893000030517578, + "IoU.plant": 0.5015000152587891, + "IoU.curtain": 0.7187999725341797, + "IoU.chair": 0.4990999984741211, + "IoU.car": 0.7962999725341797, + "IoU.water": 0.5070000076293946, + "IoU.painting": 0.6719999694824219, + "IoU.sofa": 0.6233000183105468, + "IoU.shelf": 0.4136000061035156, + "IoU.house": 0.4236000061035156, + "IoU.sea": 0.5222999954223633, + "IoU.mirror": 0.6352999877929687, + "IoU.rug": 0.6816000366210937, + "IoU.field": 0.29100000381469726, + "IoU.armchair": 0.41119998931884766, + "IoU.seat": 0.610999984741211, + "IoU.fence": 0.36840000152587893, + "IoU.desk": 0.40060001373291015, + "IoU.rock": 0.4002000045776367, + "IoU.wardrobe": 0.455, + "IoU.lamp": 0.5158000183105469, + "IoU.bathtub": 0.7605999755859375, + "IoU.railing": 0.3268999862670898, + "IoU.cushion": 0.5202000045776367, + "IoU.base": 0.3, + "IoU.box": 0.2234000015258789, + "IoU.column": 0.44880001068115233, + "IoU.signboard": 0.33419998168945314, + "IoU.chest of drawers": 0.35619998931884767, + "IoU.counter": 0.26479999542236327, + "IoU.sand": 0.38060001373291014, + "IoU.sink": 0.637400016784668, + "IoU.skyscraper": 0.5556000137329101, + "IoU.fireplace": 0.6934999847412109, + "IoU.refrigerator": 0.7288999938964844, + "IoU.grandstand": 0.44529998779296875, + "IoU.path": 0.23469999313354492, + "IoU.stairs": 0.2747999954223633, + "IoU.runway": 0.7058000183105468, + "IoU.case": 0.47470001220703123, + "IoU.pool table": 0.8847000122070312, + "IoU.pillow": 0.5297000122070312, + "IoU.screen door": 0.5704000091552734, + "IoU.stairway": 0.2705999946594238, + "IoU.river": 0.15109999656677245, + "IoU.bridge": 0.6163999938964844, + "IoU.bookcase": 0.2925, + "IoU.blind": 0.3706000137329102, + "IoU.coffee table": 0.5168999862670899, + "IoU.toilet": 0.765199966430664, + "IoU.flower": 0.3509999847412109, + "IoU.book": 0.43, + "IoU.hill": 0.08609999656677246, + "IoU.bench": 0.46529998779296877, + "IoU.countertop": 0.5838999938964844, + "IoU.stove": 0.6430999755859375, + "IoU.palm": 0.47650001525878904, + "IoU.kitchen island": 0.344900016784668, + "IoU.computer": 0.565, + "IoU.swivel chair": 0.4097999954223633, + "IoU.boat": 0.675, + "IoU.bar": 0.4540999984741211, + "IoU.arcade machine": 0.39169998168945314, + "IoU.hovel": 0.38369998931884763, + "IoU.bus": 0.7094000244140625, + "IoU.towel": 0.5329000091552735, + "IoU.light": 0.41939998626708985, + "IoU.truck": 0.12760000228881835, + "IoU.tower": 0.34240001678466797, + "IoU.chandelier": 0.5786000061035156, + "IoU.awning": 0.37310001373291013, + "IoU.streetlight": 0.18, + "IoU.booth": 0.49220001220703125, + "IoU.television receiver": 0.6268000030517578, + "IoU.airplane": 0.567400016784668, + "IoU.dirt track": 0.31209999084472656, + "IoU.apparel": 0.3081999969482422, + "IoU.pole": 0.20280000686645508, + "IoU.land": 0.059699997901916504, + "IoU.bannister": 0.08289999961853027, + "IoU.escalator": 0.32169998168945313, + "IoU.ottoman": 0.45369998931884764, + "IoU.bottle": 0.1447000026702881, + "IoU.buffet": 0.5638000106811524, + "IoU.poster": 0.24170000076293946, + "IoU.stage": 0.11850000381469726, + "IoU.van": 0.40450000762939453, + "IoU.ship": 0.4563999938964844, + "IoU.fountain": 0.16829999923706054, + "IoU.conveyer belt": 0.5152000045776367, + "IoU.canopy": 0.23659999847412108, + "IoU.washer": 0.6469000244140625, + "IoU.plaything": 0.235, + "IoU.swimming pool": 0.575, + "IoU.stool": 0.22100000381469725, + "IoU.barrel": 0.0125, + "IoU.basket": 0.19120000839233398, + "IoU.waterfall": 0.5745999908447266, + "IoU.tent": 0.8573999786376953, + "IoU.bag": 0.07789999961853028, + "IoU.minibike": 0.6061000061035157, + "IoU.cradle": 0.7391999816894531, + "IoU.oven": 0.15079999923706056, + "IoU.ball": 0.36779998779296874, + "IoU.food": 0.5590999984741211, + "IoU.step": 0.14289999961853028, + "IoU.tank": 0.47119998931884766, + "IoU.trade name": 0.19, + "IoU.microwave": 0.3284000015258789, + "IoU.pot": 0.332599983215332, + "IoU.animal": 0.549099998474121, + "IoU.bicycle": 0.5081999969482421, + "IoU.lake": 0.18229999542236328, + "IoU.dishwasher": 0.42650001525878906, + "IoU.screen": 0.6229000091552734, + "IoU.blanket": 0.06369999885559081, + "IoU.sculpture": 0.46610000610351565, + "IoU.hood": 0.469900016784668, + "IoU.sconce": 0.2769000053405762, + "IoU.vase": 0.24239999771118165, + "IoU.traffic light": 0.26680000305175783, + "IoU.tray": 0.05239999771118164, + "IoU.ashcan": 0.26530000686645505, + "IoU.fan": 0.4316999816894531, + "IoU.pier": 0.27850000381469725, + "IoU.crt screen": 0.0017000000178813935, + "IoU.plate": 0.40099998474121096, + "IoU.monitor": 0.037599999904632565, + "IoU.bulletin board": 0.35830001831054686, + "IoU.shower": 0.01740000009536743, + "IoU.radiator": 0.5579000091552735, + "IoU.glass": 0.06309999942779541, + "IoU.clock": 0.19520000457763673, + "IoU.flag": 0.3409000015258789, + "Acc.wall": 0.8619999694824219, + "Acc.building": 0.9213999938964844, + "Acc.sky": 0.9643000030517578, + "Acc.floor": 0.8895999908447265, + "Acc.tree": 0.8841000366210937, + "Acc.ceiling": 0.8847000122070312, + "Acc.road": 0.8843000030517578, + "Acc.bed ": 0.9541000366210938, + "Acc.windowpane": 0.7637999725341796, + "Acc.grass": 0.8408999633789063, + "Acc.cabinet": 0.714800033569336, + "Acc.sidewalk": 0.8141999816894532, + "Acc.person": 0.9151000213623047, + "Acc.earth": 0.47720001220703123, + "Acc.door": 0.6331000137329101, + "Acc.table": 0.6944000244140625, + "Acc.mountain": 0.7398999786376953, + "Acc.plant": 0.5925, + "Acc.curtain": 0.8601999664306641, + "Acc.chair": 0.6519999694824219, + "Acc.car": 0.9093000030517578, + "Acc.water": 0.640199966430664, + "Acc.painting": 0.8731999969482422, + "Acc.sofa": 0.7958000183105469, + "Acc.shelf": 0.589900016784668, + "Acc.house": 0.5647999954223633, + "Acc.sea": 0.7691999816894531, + "Acc.mirror": 0.7533999633789062, + "Acc.rug": 0.7908999633789062, + "Acc.field": 0.46549999237060546, + "Acc.armchair": 0.6131000137329101, + "Acc.seat": 0.8213999938964843, + "Acc.fence": 0.5261000061035156, + "Acc.desk": 0.7468000030517579, + "Acc.rock": 0.6134000015258789, + "Acc.wardrobe": 0.6613999938964844, + "Acc.lamp": 0.6908000183105468, + "Acc.bathtub": 0.8238999938964844, + "Acc.railing": 0.46439998626708984, + "Acc.cushion": 0.6427999877929688, + "Acc.base": 0.5459999847412109, + "Acc.box": 0.2843000030517578, + "Acc.column": 0.562400016784668, + "Acc.signboard": 0.45849998474121095, + "Acc.chest of drawers": 0.5168999862670899, + "Acc.counter": 0.3759000015258789, + "Acc.sand": 0.5577000045776367, + "Acc.sink": 0.7347000122070313, + "Acc.skyscraper": 0.6954000091552734, + "Acc.fireplace": 0.9019000244140625, + "Acc.refrigerator": 0.8351999664306641, + "Acc.grandstand": 0.6802999877929687, + "Acc.path": 0.330099983215332, + "Acc.stairs": 0.3893999862670898, + "Acc.runway": 0.8902999877929687, + "Acc.case": 0.5986999893188476, + "Acc.pool table": 0.9695999908447266, + "Acc.pillow": 0.6170000076293946, + "Acc.screen door": 0.6844999694824219, + "Acc.stairway": 0.3806999969482422, + "Acc.river": 0.291299991607666, + "Acc.bridge": 0.8287999725341797, + "Acc.bookcase": 0.47580001831054686, + "Acc.blind": 0.419900016784668, + "Acc.coffee table": 0.8526000213623047, + "Acc.toilet": 0.8837000274658203, + "Acc.flower": 0.5525, + "Acc.book": 0.6331000137329101, + "Acc.hill": 0.176200008392334, + "Acc.bench": 0.5477000045776367, + "Acc.countertop": 0.7356999969482422, + "Acc.stove": 0.799000015258789, + "Acc.palm": 0.6968000030517578, + "Acc.kitchen island": 0.7469000244140624, + "Acc.computer": 0.7216999816894532, + "Acc.swivel chair": 0.5852999877929688, + "Acc.boat": 0.8290000152587891, + "Acc.bar": 0.602599983215332, + "Acc.arcade machine": 0.46349998474121096, + "Acc.hovel": 0.49470001220703125, + "Acc.bus": 0.8969000244140625, + "Acc.towel": 0.7083000183105469, + "Acc.light": 0.5238999938964843, + "Acc.truck": 0.19079999923706054, + "Acc.tower": 0.5329999923706055, + "Acc.chandelier": 0.7920999908447266, + "Acc.awning": 0.4766999816894531, + "Acc.streetlight": 0.24540000915527344, + "Acc.booth": 0.5661999893188476, + "Acc.television receiver": 0.7754000091552734, + "Acc.airplane": 0.6693000030517579, + "Acc.dirt track": 0.4075, + "Acc.apparel": 0.4384000015258789, + "Acc.pole": 0.2628000068664551, + "Acc.land": 0.11350000381469727, + "Acc.bannister": 0.1165999984741211, + "Acc.escalator": 0.4052000045776367, + "Acc.ottoman": 0.6437000274658203, + "Acc.bottle": 0.17450000762939452, + "Acc.buffet": 0.7036000061035156, + "Acc.poster": 0.2770999908447266, + "Acc.stage": 0.32599998474121095, + "Acc.van": 0.4722999954223633, + "Acc.ship": 0.4715999984741211, + "Acc.fountain": 0.17319999694824217, + "Acc.conveyer belt": 0.7422000122070312, + "Acc.canopy": 0.29469999313354495, + "Acc.washer": 0.6791000366210938, + "Acc.plaything": 0.36849998474121093, + "Acc.swimming pool": 0.8059999847412109, + "Acc.stool": 0.3004999923706055, + "Acc.barrel": 0.03119999885559082, + "Acc.basket": 0.23559999465942383, + "Acc.waterfall": 0.6737000274658204, + "Acc.tent": 0.9945999908447266, + "Acc.bag": 0.08819999694824218, + "Acc.minibike": 0.7291000366210938, + "Acc.cradle": 0.9586000061035156, + "Acc.oven": 0.41319999694824217, + "Acc.ball": 0.46810001373291016, + "Acc.food": 0.6483999633789063, + "Acc.step": 0.16280000686645507, + "Acc.tank": 0.5347999954223632, + "Acc.trade name": 0.21069999694824218, + "Acc.microwave": 0.3615999984741211, + "Acc.pot": 0.3965000152587891, + "Acc.animal": 0.6127000045776367, + "Acc.bicycle": 0.6944000244140625, + "Acc.lake": 0.288700008392334, + "Acc.dishwasher": 0.5647999954223633, + "Acc.screen": 0.8569999694824219, + "Acc.blanket": 0.07230000019073486, + "Acc.sculpture": 0.6494999694824218, + "Acc.hood": 0.5159000015258789, + "Acc.sconce": 0.3509000015258789, + "Acc.vase": 0.375, + "Acc.traffic light": 0.45619998931884764, + "Acc.tray": 0.08899999618530273, + "Acc.ashcan": 0.40959999084472654, + "Acc.fan": 0.7118000030517578, + "Acc.pier": 0.47299999237060547, + "Acc.crt screen": 0.005199999809265137, + "Acc.plate": 0.5413999938964844, + "Acc.monitor": 0.05300000190734863, + "Acc.bulletin board": 0.5672000122070312, + "Acc.shower": 0.04949999809265137, + "Acc.radiator": 0.6516000366210938, + "Acc.glass": 0.06920000076293946, + "Acc.clock": 0.21809999465942384, + "Acc.flag": 0.39189998626708983 + } + }, + "42": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8169, + "mIoU": 0.45380000000000004, + "mAcc": 0.585, + "IoU.wall": 0.7591000366210937, + "IoU.building": 0.8201000213623046, + "IoU.sky": 0.9393000030517578, + "IoU.floor": 0.8045999908447266, + "IoU.tree": 0.7390000152587891, + "IoU.ceiling": 0.830999984741211, + "IoU.road": 0.8120999908447266, + "IoU.bed ": 0.8622000122070312, + "IoU.windowpane": 0.6061999893188477, + "IoU.grass": 0.6615000152587891, + "IoU.cabinet": 0.5811000061035156, + "IoU.sidewalk": 0.6254999923706055, + "IoU.person": 0.7654000091552734, + "IoU.earth": 0.33799999237060546, + "IoU.door": 0.47009998321533203, + "IoU.table": 0.5475, + "IoU.mountain": 0.5754999923706055, + "IoU.plant": 0.4988999938964844, + "IoU.curtain": 0.7190000152587891, + "IoU.chair": 0.5038999938964843, + "IoU.car": 0.7986000061035157, + "IoU.water": 0.5254999923706055, + "IoU.painting": 0.6793000030517579, + "IoU.sofa": 0.6302999877929687, + "IoU.shelf": 0.4193000030517578, + "IoU.house": 0.482400016784668, + "IoU.sea": 0.6220000076293946, + "IoU.mirror": 0.6444000244140625, + "IoU.rug": 0.6663999938964844, + "IoU.field": 0.2830999946594238, + "IoU.armchair": 0.43540000915527344, + "IoU.seat": 0.5850999832153321, + "IoU.fence": 0.4115999984741211, + "IoU.desk": 0.43150001525878906, + "IoU.rock": 0.3875, + "IoU.wardrobe": 0.4879999923706055, + "IoU.lamp": 0.5236000061035156, + "IoU.bathtub": 0.7487000274658203, + "IoU.railing": 0.3188999938964844, + "IoU.cushion": 0.5020999908447266, + "IoU.base": 0.28329999923706056, + "IoU.box": 0.2109000015258789, + "IoU.column": 0.445, + "IoU.signboard": 0.3427999877929688, + "IoU.chest of drawers": 0.38529998779296876, + "IoU.counter": 0.30739999771118165, + "IoU.sand": 0.3681999969482422, + "IoU.sink": 0.6552999877929687, + "IoU.skyscraper": 0.5647000122070313, + "IoU.fireplace": 0.705, + "IoU.refrigerator": 0.7154000091552735, + "IoU.grandstand": 0.4458000183105469, + "IoU.path": 0.2275, + "IoU.stairs": 0.27049999237060546, + "IoU.runway": 0.6933999633789063, + "IoU.case": 0.5013000106811524, + "IoU.pool table": 0.8876000213623046, + "IoU.pillow": 0.55, + "IoU.screen door": 0.5886000061035156, + "IoU.stairway": 0.2620000076293945, + "IoU.river": 0.2281999969482422, + "IoU.bridge": 0.6705000305175781, + "IoU.bookcase": 0.3106999969482422, + "IoU.blind": 0.3933000183105469, + "IoU.coffee table": 0.5665999984741211, + "IoU.toilet": 0.8055000305175781, + "IoU.flower": 0.3479000091552734, + "IoU.book": 0.42389999389648436, + "IoU.hill": 0.07940000057220459, + "IoU.bench": 0.40200000762939453, + "IoU.countertop": 0.5763999938964843, + "IoU.stove": 0.6859999847412109, + "IoU.palm": 0.49520000457763674, + "IoU.kitchen island": 0.3240999984741211, + "IoU.computer": 0.6111000061035157, + "IoU.swivel chair": 0.43189998626708986, + "IoU.boat": 0.6944000244140625, + "IoU.bar": 0.5006000137329102, + "IoU.arcade machine": 0.41389999389648435, + "IoU.hovel": 0.4656999969482422, + "IoU.bus": 0.7019999694824218, + "IoU.towel": 0.545099983215332, + "IoU.light": 0.39930000305175783, + "IoU.truck": 0.16049999237060547, + "IoU.tower": 0.23639999389648436, + "IoU.chandelier": 0.5877000045776367, + "IoU.awning": 0.283700008392334, + "IoU.streetlight": 0.1925, + "IoU.booth": 0.33599998474121096, + "IoU.television receiver": 0.6061000061035157, + "IoU.airplane": 0.6181000137329101, + "IoU.dirt track": 0.17399999618530274, + "IoU.apparel": 0.29319999694824217, + "IoU.pole": 0.25659999847412107, + "IoU.land": 0.06630000114440918, + "IoU.bannister": 0.11199999809265136, + "IoU.escalator": 0.2902000045776367, + "IoU.ottoman": 0.46599998474121096, + "IoU.bottle": 0.3370999908447266, + "IoU.buffet": 0.5420000076293945, + "IoU.poster": 0.21549999237060546, + "IoU.stage": 0.11539999961853027, + "IoU.van": 0.40099998474121096, + "IoU.ship": 0.7269000244140625, + "IoU.fountain": 0.19120000839233398, + "IoU.conveyer belt": 0.5543000030517579, + "IoU.canopy": 0.22270000457763672, + "IoU.washer": 0.6738999938964844, + "IoU.plaything": 0.2175, + "IoU.swimming pool": 0.6275, + "IoU.stool": 0.25809999465942385, + "IoU.barrel": 0.43560001373291013, + "IoU.basket": 0.22549999237060547, + "IoU.waterfall": 0.5924000167846679, + "IoU.tent": 0.8983999633789063, + "IoU.bag": 0.1390999984741211, + "IoU.minibike": 0.5547999954223632, + "IoU.cradle": 0.7458999633789063, + "IoU.oven": 0.143100004196167, + "IoU.ball": 0.3290000152587891, + "IoU.food": 0.5431999969482422, + "IoU.step": 0.15619999885559083, + "IoU.tank": 0.49630001068115237, + "IoU.trade name": 0.24059999465942383, + "IoU.microwave": 0.33689998626708983, + "IoU.pot": 0.32549999237060545, + "IoU.animal": 0.5940999984741211, + "IoU.bicycle": 0.48520000457763673, + "IoU.lake": 0.555099983215332, + "IoU.dishwasher": 0.5209000015258789, + "IoU.screen": 0.5797000122070313, + "IoU.blanket": 0.04489999771118164, + "IoU.sculpture": 0.4681999969482422, + "IoU.hood": 0.4556999969482422, + "IoU.sconce": 0.3468000030517578, + "IoU.vase": 0.2428000068664551, + "IoU.traffic light": 0.24709999084472656, + "IoU.tray": 0.048899998664855955, + "IoU.ashcan": 0.28600000381469726, + "IoU.fan": 0.4779000091552734, + "IoU.pier": 0.3079999923706055, + "IoU.crt screen": 0.00029999999329447744, + "IoU.plate": 0.36950000762939456, + "IoU.monitor": 0.030199999809265136, + "IoU.bulletin board": 0.33549999237060546, + "IoU.shower": 0.005199999809265137, + "IoU.radiator": 0.5740999984741211, + "IoU.glass": 0.07920000076293945, + "IoU.clock": 0.23819999694824218, + "IoU.flag": 0.31829999923706054, + "Acc.wall": 0.867699966430664, + "Acc.building": 0.9175, + "Acc.sky": 0.9647000122070313, + "Acc.floor": 0.8886000061035156, + "Acc.tree": 0.8830999755859374, + "Acc.ceiling": 0.8919999694824219, + "Acc.road": 0.8887999725341796, + "Acc.bed ": 0.9544999694824219, + "Acc.windowpane": 0.7591999816894531, + "Acc.grass": 0.8087999725341797, + "Acc.cabinet": 0.6976000213623047, + "Acc.sidewalk": 0.7998000335693359, + "Acc.person": 0.9275, + "Acc.earth": 0.4416999816894531, + "Acc.door": 0.6340999984741211, + "Acc.table": 0.7102999877929688, + "Acc.mountain": 0.736500015258789, + "Acc.plant": 0.5938000106811523, + "Acc.curtain": 0.8597000122070313, + "Acc.chair": 0.658499984741211, + "Acc.car": 0.9148999786376953, + "Acc.water": 0.6447000122070312, + "Acc.painting": 0.8541999816894531, + "Acc.sofa": 0.8168000030517578, + "Acc.shelf": 0.6154999923706055, + "Acc.house": 0.6234999847412109, + "Acc.sea": 0.8731999969482422, + "Acc.mirror": 0.7615000152587891, + "Acc.rug": 0.7144999694824219, + "Acc.field": 0.5018999862670899, + "Acc.armchair": 0.6304000091552734, + "Acc.seat": 0.8291999816894531, + "Acc.fence": 0.5906000137329102, + "Acc.desk": 0.7122000122070312, + "Acc.rock": 0.629000015258789, + "Acc.wardrobe": 0.7066000366210937, + "Acc.lamp": 0.6694000244140625, + "Acc.bathtub": 0.8126000213623047, + "Acc.railing": 0.475, + "Acc.cushion": 0.6118000030517579, + "Acc.base": 0.4602000045776367, + "Acc.box": 0.25459999084472656, + "Acc.column": 0.5770999908447265, + "Acc.signboard": 0.4597999954223633, + "Acc.chest of drawers": 0.5888999938964844, + "Acc.counter": 0.4079000091552734, + "Acc.sand": 0.5059999847412109, + "Acc.sink": 0.7544999694824219, + "Acc.skyscraper": 0.7073000335693359, + "Acc.fireplace": 0.9022000122070313, + "Acc.refrigerator": 0.8587999725341797, + "Acc.grandstand": 0.6781999969482422, + "Acc.path": 0.316200008392334, + "Acc.stairs": 0.3870000076293945, + "Acc.runway": 0.9355000305175781, + "Acc.case": 0.7116000366210937, + "Acc.pool table": 0.970999984741211, + "Acc.pillow": 0.6872000122070312, + "Acc.screen door": 0.6794000244140626, + "Acc.stairway": 0.35520000457763673, + "Acc.river": 0.4722999954223633, + "Acc.bridge": 0.8587999725341797, + "Acc.bookcase": 0.5411999893188476, + "Acc.blind": 0.4620000076293945, + "Acc.coffee table": 0.8183000183105469, + "Acc.toilet": 0.8863999938964844, + "Acc.flower": 0.5386999893188477, + "Acc.book": 0.6168999862670899, + "Acc.hill": 0.1672999954223633, + "Acc.bench": 0.4983000183105469, + "Acc.countertop": 0.7266999816894532, + "Acc.stove": 0.8126999664306641, + "Acc.palm": 0.6919000244140625, + "Acc.kitchen island": 0.7579000091552734, + "Acc.computer": 0.749000015258789, + "Acc.swivel chair": 0.6206999969482422, + "Acc.boat": 0.8091000366210938, + "Acc.bar": 0.6673999786376953, + "Acc.arcade machine": 0.46189998626708983, + "Acc.hovel": 0.5349000167846679, + "Acc.bus": 0.9101000213623047, + "Acc.towel": 0.6966000366210937, + "Acc.light": 0.4631999969482422, + "Acc.truck": 0.24209999084472655, + "Acc.tower": 0.36900001525878906, + "Acc.chandelier": 0.7716999816894531, + "Acc.awning": 0.3540999984741211, + "Acc.streetlight": 0.24030000686645508, + "Acc.booth": 0.5354000091552734, + "Acc.television receiver": 0.785199966430664, + "Acc.airplane": 0.7077999877929687, + "Acc.dirt track": 0.25489999771118166, + "Acc.apparel": 0.43139999389648437, + "Acc.pole": 0.3909000015258789, + "Acc.land": 0.16590000152587892, + "Acc.bannister": 0.16680000305175782, + "Acc.escalator": 0.38099998474121094, + "Acc.ottoman": 0.6122000122070312, + "Acc.bottle": 0.4922999954223633, + "Acc.buffet": 0.6711000061035156, + "Acc.poster": 0.2625, + "Acc.stage": 0.3325, + "Acc.van": 0.47380001068115235, + "Acc.ship": 0.7975, + "Acc.fountain": 0.20379999160766601, + "Acc.conveyer belt": 0.86, + "Acc.canopy": 0.2940999984741211, + "Acc.washer": 0.6787999725341797, + "Acc.plaything": 0.32229999542236326, + "Acc.swimming pool": 0.8408000183105468, + "Acc.stool": 0.36439998626708986, + "Acc.barrel": 0.5704999923706054, + "Acc.basket": 0.3111000061035156, + "Acc.waterfall": 0.712699966430664, + "Acc.tent": 0.9891000366210938, + "Acc.bag": 0.17549999237060546, + "Acc.minibike": 0.7083999633789062, + "Acc.cradle": 0.9681999969482422, + "Acc.oven": 0.3908000183105469, + "Acc.ball": 0.39240001678466796, + "Acc.food": 0.6537999725341797, + "Acc.step": 0.19200000762939454, + "Acc.tank": 0.606599998474121, + "Acc.trade name": 0.2795999908447266, + "Acc.microwave": 0.37779998779296875, + "Acc.pot": 0.38549999237060545, + "Acc.animal": 0.6404000091552734, + "Acc.bicycle": 0.7313999938964844, + "Acc.lake": 0.6372000122070313, + "Acc.dishwasher": 0.6456999969482422, + "Acc.screen": 0.9065000152587891, + "Acc.blanket": 0.04989999771118164, + "Acc.sculpture": 0.6122000122070312, + "Acc.hood": 0.5277999877929688, + "Acc.sconce": 0.46360000610351565, + "Acc.vase": 0.37200000762939456, + "Acc.traffic light": 0.4140999984741211, + "Acc.tray": 0.08960000038146973, + "Acc.ashcan": 0.4363999938964844, + "Acc.fan": 0.6830000305175781, + "Acc.pier": 0.5083000183105468, + "Acc.crt screen": 0.0009000000357627869, + "Acc.plate": 0.47819999694824217, + "Acc.monitor": 0.040300002098083494, + "Acc.bulletin board": 0.484900016784668, + "Acc.shower": 0.022899999618530273, + "Acc.radiator": 0.7266000366210937, + "Acc.glass": 0.08850000381469726, + "Acc.clock": 0.28959999084472654, + "Acc.flag": 0.34619998931884766 + } + }, + "43": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8178, + "mIoU": 0.4457, + "mAcc": 0.5446, + "IoU.wall": 0.7572000122070313, + "IoU.building": 0.8208000183105468, + "IoU.sky": 0.9376999664306641, + "IoU.floor": 0.7962000274658203, + "IoU.tree": 0.7316000366210937, + "IoU.ceiling": 0.8269999694824218, + "IoU.road": 0.8155000305175781, + "IoU.bed ": 0.8608999633789063, + "IoU.windowpane": 0.6008000183105469, + "IoU.grass": 0.6562999725341797, + "IoU.cabinet": 0.582599983215332, + "IoU.sidewalk": 0.6245999908447266, + "IoU.person": 0.7758000183105469, + "IoU.earth": 0.36020000457763673, + "IoU.door": 0.46849998474121096, + "IoU.table": 0.5527999877929688, + "IoU.mountain": 0.5679999923706055, + "IoU.plant": 0.49650001525878906, + "IoU.curtain": 0.7286000061035156, + "IoU.chair": 0.51, + "IoU.car": 0.8183999633789063, + "IoU.water": 0.5491999816894532, + "IoU.painting": 0.6847000122070312, + "IoU.sofa": 0.6288999938964843, + "IoU.shelf": 0.4152000045776367, + "IoU.house": 0.47049999237060547, + "IoU.sea": 0.6086000061035156, + "IoU.mirror": 0.639000015258789, + "IoU.rug": 0.6370999908447266, + "IoU.field": 0.273799991607666, + "IoU.armchair": 0.4231000137329102, + "IoU.seat": 0.5841999816894531, + "IoU.fence": 0.38540000915527345, + "IoU.desk": 0.43979999542236325, + "IoU.rock": 0.40830001831054685, + "IoU.wardrobe": 0.46490001678466797, + "IoU.lamp": 0.5315000152587891, + "IoU.bathtub": 0.7136000061035156, + "IoU.railing": 0.30590000152587893, + "IoU.cushion": 0.514900016784668, + "IoU.base": 0.25309999465942384, + "IoU.box": 0.23340000152587892, + "IoU.column": 0.4381999969482422, + "IoU.signboard": 0.3290000152587891, + "IoU.chest of drawers": 0.3641999816894531, + "IoU.counter": 0.26040000915527345, + "IoU.sand": 0.36880001068115237, + "IoU.sink": 0.6606999969482422, + "IoU.skyscraper": 0.5666999816894531, + "IoU.fireplace": 0.7236000061035156, + "IoU.refrigerator": 0.7506999969482422, + "IoU.grandstand": 0.45720001220703127, + "IoU.path": 0.21450000762939453, + "IoU.stairs": 0.2745999908447266, + "IoU.runway": 0.6338000106811523, + "IoU.case": 0.47830001831054686, + "IoU.pool table": 0.9169000244140625, + "IoU.pillow": 0.5129000091552735, + "IoU.screen door": 0.6052999877929688, + "IoU.stairway": 0.30450000762939455, + "IoU.river": 0.2059000015258789, + "IoU.bridge": 0.7152999877929688, + "IoU.bookcase": 0.33439998626708983, + "IoU.blind": 0.38369998931884763, + "IoU.coffee table": 0.5800999832153321, + "IoU.toilet": 0.8136000061035156, + "IoU.flower": 0.33279998779296877, + "IoU.book": 0.41700000762939454, + "IoU.hill": 0.07050000190734863, + "IoU.bench": 0.43779998779296875, + "IoU.countertop": 0.5540000152587891, + "IoU.stove": 0.6912999725341797, + "IoU.palm": 0.419900016784668, + "IoU.kitchen island": 0.33180000305175783, + "IoU.computer": 0.6140000152587891, + "IoU.swivel chair": 0.4008000183105469, + "IoU.boat": 0.717699966430664, + "IoU.bar": 0.4690999984741211, + "IoU.arcade machine": 0.3606000137329102, + "IoU.hovel": 0.42650001525878906, + "IoU.bus": 0.7758000183105469, + "IoU.towel": 0.5459999847412109, + "IoU.light": 0.26059999465942385, + "IoU.truck": 0.21420000076293946, + "IoU.tower": 0.293799991607666, + "IoU.chandelier": 0.5790000152587891, + "IoU.awning": 0.22739999771118163, + "IoU.streetlight": 0.15069999694824218, + "IoU.booth": 0.3747999954223633, + "IoU.television receiver": 0.6036000061035156, + "IoU.airplane": 0.5974000167846679, + "IoU.dirt track": 0.12789999961853027, + "IoU.apparel": 0.30829999923706053, + "IoU.pole": 0.25040000915527344, + "IoU.land": 0.016200000047683717, + "IoU.bannister": 0.06739999771118164, + "IoU.escalator": 0.19110000610351563, + "IoU.ottoman": 0.47389999389648435, + "IoU.bottle": 0.32060001373291014, + "IoU.buffet": 0.3838999938964844, + "IoU.poster": 0.16540000915527345, + "IoU.stage": 0.17280000686645508, + "IoU.van": 0.39779998779296877, + "IoU.ship": 0.4786000061035156, + "IoU.fountain": 0.1975, + "IoU.conveyer belt": 0.5856999969482422, + "IoU.canopy": 0.228799991607666, + "IoU.washer": 0.7069999694824218, + "IoU.plaything": 0.22780000686645507, + "IoU.swimming pool": 0.5515000152587891, + "IoU.stool": 0.21879999160766603, + "IoU.barrel": 0.539000015258789, + "IoU.basket": 0.20610000610351562, + "IoU.waterfall": 0.6122999954223632, + "IoU.tent": 0.9297000122070312, + "IoU.bag": 0.0815999984741211, + "IoU.minibike": 0.5363000106811523, + "IoU.cradle": 0.7369999694824219, + "IoU.oven": 0.18469999313354493, + "IoU.ball": 0.3686000061035156, + "IoU.food": 0.5272000122070313, + "IoU.step": 0.13350000381469726, + "IoU.tank": 0.509000015258789, + "IoU.trade name": 0.16079999923706054, + "IoU.microwave": 0.31610000610351563, + "IoU.pot": 0.33040000915527346, + "IoU.animal": 0.5747000122070313, + "IoU.bicycle": 0.46169998168945314, + "IoU.lake": 0.48779998779296874, + "IoU.dishwasher": 0.5397000122070312, + "IoU.screen": 0.6666999816894531, + "IoU.blanket": 0.035999999046325684, + "IoU.sculpture": 0.4184000015258789, + "IoU.hood": 0.39360000610351564, + "IoU.sconce": 0.28200000762939453, + "IoU.vase": 0.2552000045776367, + "IoU.traffic light": 0.20829999923706055, + "IoU.tray": 0.009599999785423278, + "IoU.ashcan": 0.32990001678466796, + "IoU.fan": 0.4829999923706055, + "IoU.pier": 0.29850000381469727, + "IoU.crt screen": 0.0, + "IoU.plate": 0.37270000457763675, + "IoU.monitor": 0.10359999656677246, + "IoU.bulletin board": 0.3641999816894531, + "IoU.shower": 0.0, + "IoU.radiator": 0.5265999984741211, + "IoU.glass": 0.0671999979019165, + "IoU.clock": 0.20190000534057617, + "IoU.flag": 0.3136000061035156, + "Acc.wall": 0.8952999877929687, + "Acc.building": 0.9281999969482422, + "Acc.sky": 0.9773999786376953, + "Acc.floor": 0.9076999664306641, + "Acc.tree": 0.8744999694824219, + "Acc.ceiling": 0.8973999786376953, + "Acc.road": 0.9062000274658203, + "Acc.bed ": 0.9506999969482421, + "Acc.windowpane": 0.7330000305175781, + "Acc.grass": 0.8043000030517579, + "Acc.cabinet": 0.7311000061035157, + "Acc.sidewalk": 0.7748000335693359, + "Acc.person": 0.8969999694824219, + "Acc.earth": 0.5231999969482422, + "Acc.door": 0.6356000137329102, + "Acc.table": 0.7169000244140625, + "Acc.mountain": 0.6980999755859375, + "Acc.plant": 0.5781999969482422, + "Acc.curtain": 0.8212000274658203, + "Acc.chair": 0.6456999969482422, + "Acc.car": 0.8943000030517578, + "Acc.water": 0.6743000030517579, + "Acc.painting": 0.8341999816894531, + "Acc.sofa": 0.7947000122070312, + "Acc.shelf": 0.5966999816894532, + "Acc.house": 0.5843999862670899, + "Acc.sea": 0.8105000305175781, + "Acc.mirror": 0.7012999725341796, + "Acc.rug": 0.6733000183105469, + "Acc.field": 0.47009998321533203, + "Acc.armchair": 0.625999984741211, + "Acc.seat": 0.7805999755859375, + "Acc.fence": 0.5281999969482422, + "Acc.desk": 0.6630000305175782, + "Acc.rock": 0.5972000122070312, + "Acc.wardrobe": 0.6459999847412109, + "Acc.lamp": 0.6302000045776367, + "Acc.bathtub": 0.7575, + "Acc.railing": 0.46180000305175783, + "Acc.cushion": 0.6031000137329101, + "Acc.base": 0.4306000137329102, + "Acc.box": 0.3011000061035156, + "Acc.column": 0.560099983215332, + "Acc.signboard": 0.4179999923706055, + "Acc.chest of drawers": 0.5515999984741211, + "Acc.counter": 0.35279998779296873, + "Acc.sand": 0.48889999389648436, + "Acc.sink": 0.7069000244140625, + "Acc.skyscraper": 0.6552999877929687, + "Acc.fireplace": 0.8791999816894531, + "Acc.refrigerator": 0.8216000366210937, + "Acc.grandstand": 0.6883000183105469, + "Acc.path": 0.2861000061035156, + "Acc.stairs": 0.36779998779296874, + "Acc.runway": 0.8261000061035156, + "Acc.case": 0.624000015258789, + "Acc.pool table": 0.9527999877929687, + "Acc.pillow": 0.5913000106811523, + "Acc.screen door": 0.6644999694824218, + "Acc.stairway": 0.40560001373291016, + "Acc.river": 0.45439998626708983, + "Acc.bridge": 0.8408000183105468, + "Acc.bookcase": 0.5581000137329102, + "Acc.blind": 0.42540000915527343, + "Acc.coffee table": 0.7808999633789062, + "Acc.toilet": 0.8665000152587891, + "Acc.flower": 0.46299999237060546, + "Acc.book": 0.54, + "Acc.hill": 0.13550000190734862, + "Acc.bench": 0.5181000137329101, + "Acc.countertop": 0.7140000152587891, + "Acc.stove": 0.7555999755859375, + "Acc.palm": 0.5281999969482422, + "Acc.kitchen island": 0.5497000122070312, + "Acc.computer": 0.7101999664306641, + "Acc.swivel chair": 0.49650001525878906, + "Acc.boat": 0.8202999877929688, + "Acc.bar": 0.6041999816894531, + "Acc.arcade machine": 0.3952000045776367, + "Acc.hovel": 0.4595000076293945, + "Acc.bus": 0.902300033569336, + "Acc.towel": 0.6605999755859375, + "Acc.light": 0.27290000915527346, + "Acc.truck": 0.28719999313354494, + "Acc.tower": 0.40049999237060546, + "Acc.chandelier": 0.7011000061035156, + "Acc.awning": 0.24549999237060546, + "Acc.streetlight": 0.16200000762939454, + "Acc.booth": 0.4372999954223633, + "Acc.television receiver": 0.7227999877929687, + "Acc.airplane": 0.6437999725341796, + "Acc.dirt track": 0.17219999313354492, + "Acc.apparel": 0.42139999389648436, + "Acc.pole": 0.3402000045776367, + "Acc.land": 0.029800000190734862, + "Acc.bannister": 0.0840999984741211, + "Acc.escalator": 0.21479999542236328, + "Acc.ottoman": 0.582400016784668, + "Acc.bottle": 0.46450000762939453, + "Acc.buffet": 0.4354999923706055, + "Acc.poster": 0.21260000228881837, + "Acc.stage": 0.2852000045776367, + "Acc.van": 0.457599983215332, + "Acc.ship": 0.49810001373291013, + "Acc.fountain": 0.20299999237060548, + "Acc.conveyer belt": 0.7308000183105469, + "Acc.canopy": 0.3189999961853027, + "Acc.washer": 0.7133000183105469, + "Acc.plaything": 0.3509000015258789, + "Acc.swimming pool": 0.6908999633789062, + "Acc.stool": 0.2706999969482422, + "Acc.barrel": 0.5861000061035156, + "Acc.basket": 0.24879999160766603, + "Acc.waterfall": 0.6841000366210938, + "Acc.tent": 0.9893000030517578, + "Acc.bag": 0.08930000305175781, + "Acc.minibike": 0.6179999923706054, + "Acc.cradle": 0.9561000061035156, + "Acc.oven": 0.5229999923706055, + "Acc.ball": 0.42889999389648437, + "Acc.food": 0.634900016784668, + "Acc.step": 0.14760000228881837, + "Acc.tank": 0.5911999893188477, + "Acc.trade name": 0.16920000076293945, + "Acc.microwave": 0.3290999984741211, + "Acc.pot": 0.3731999969482422, + "Acc.animal": 0.6034999847412109, + "Acc.bicycle": 0.6819999694824219, + "Acc.lake": 0.5345000076293945, + "Acc.dishwasher": 0.6213999938964844, + "Acc.screen": 0.8941999816894531, + "Acc.blanket": 0.03920000076293945, + "Acc.sculpture": 0.5479000091552735, + "Acc.hood": 0.4097999954223633, + "Acc.sconce": 0.32599998474121095, + "Acc.vase": 0.33529998779296877, + "Acc.traffic light": 0.2688999938964844, + "Acc.tray": 0.012599999904632569, + "Acc.ashcan": 0.5077999877929688, + "Acc.fan": 0.5772000122070312, + "Acc.pier": 0.39380001068115233, + "Acc.crt screen": 0.0, + "Acc.plate": 0.445, + "Acc.monitor": 0.13079999923706054, + "Acc.bulletin board": 0.44209999084472656, + "Acc.shower": 0.0, + "Acc.radiator": 0.5843999862670899, + "Acc.glass": 0.07199999809265137, + "Acc.clock": 0.23420000076293945, + "Acc.flag": 0.3340999984741211 + } + }, + "44": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8334999999999999, + "mIoU": 0.4985, + "mAcc": 0.6261, + "IoU.wall": 0.7738999938964843, + "IoU.building": 0.8356999969482422, + "IoU.sky": 0.936500015258789, + "IoU.floor": 0.8154000091552734, + "IoU.tree": 0.7380000305175781, + "IoU.ceiling": 0.8358999633789063, + "IoU.road": 0.8297000122070313, + "IoU.bed ": 0.8969999694824219, + "IoU.windowpane": 0.6193000030517578, + "IoU.grass": 0.6837999725341797, + "IoU.cabinet": 0.6161999893188477, + "IoU.sidewalk": 0.6513999938964844, + "IoU.person": 0.8020999908447266, + "IoU.earth": 0.3791999816894531, + "IoU.door": 0.5072999954223633, + "IoU.table": 0.5984999847412109, + "IoU.mountain": 0.58, + "IoU.plant": 0.5068000030517578, + "IoU.curtain": 0.7244999694824219, + "IoU.chair": 0.5643999862670899, + "IoU.car": 0.8451999664306641, + "IoU.water": 0.6125, + "IoU.painting": 0.6993000030517578, + "IoU.sofa": 0.7195999908447266, + "IoU.shelf": 0.4152000045776367, + "IoU.house": 0.5559000015258789, + "IoU.sea": 0.6956999969482421, + "IoU.mirror": 0.6883999633789063, + "IoU.rug": 0.6683000183105469, + "IoU.field": 0.34720001220703123, + "IoU.armchair": 0.48970001220703124, + "IoU.seat": 0.6386000061035156, + "IoU.fence": 0.45540000915527346, + "IoU.desk": 0.4834999847412109, + "IoU.rock": 0.47970001220703123, + "IoU.wardrobe": 0.5847999954223633, + "IoU.lamp": 0.5597000122070312, + "IoU.bathtub": 0.8679000091552734, + "IoU.railing": 0.37849998474121094, + "IoU.cushion": 0.5977000045776367, + "IoU.base": 0.3259000015258789, + "IoU.box": 0.25549999237060544, + "IoU.column": 0.45860000610351564, + "IoU.signboard": 0.33799999237060546, + "IoU.chest of drawers": 0.33919998168945314, + "IoU.counter": 0.3691999816894531, + "IoU.sand": 0.502400016784668, + "IoU.sink": 0.7022000122070312, + "IoU.skyscraper": 0.5840999984741211, + "IoU.fireplace": 0.7247000122070313, + "IoU.refrigerator": 0.7426000213623047, + "IoU.grandstand": 0.5420999908447266, + "IoU.path": 0.2521999931335449, + "IoU.stairs": 0.24350000381469727, + "IoU.runway": 0.7348999786376953, + "IoU.case": 0.5645000076293946, + "IoU.pool table": 0.9133999633789063, + "IoU.pillow": 0.5881000137329102, + "IoU.screen door": 0.6936000061035156, + "IoU.stairway": 0.311299991607666, + "IoU.river": 0.15619999885559083, + "IoU.bridge": 0.5743000030517578, + "IoU.bookcase": 0.35319999694824217, + "IoU.blind": 0.44279998779296875, + "IoU.coffee table": 0.5779000091552734, + "IoU.toilet": 0.8204000091552734, + "IoU.flower": 0.3315000152587891, + "IoU.book": 0.4722999954223633, + "IoU.hill": 0.12010000228881836, + "IoU.bench": 0.49139999389648437, + "IoU.countertop": 0.5802000045776368, + "IoU.stove": 0.7463999938964844, + "IoU.palm": 0.5215000152587891, + "IoU.kitchen island": 0.4531999969482422, + "IoU.computer": 0.7702999877929687, + "IoU.swivel chair": 0.5095999908447265, + "IoU.boat": 0.644000015258789, + "IoU.bar": 0.5590000152587891, + "IoU.arcade machine": 0.7151999664306641, + "IoU.hovel": 0.485, + "IoU.bus": 0.9061000061035156, + "IoU.towel": 0.6229999923706054, + "IoU.light": 0.42450000762939455, + "IoU.truck": 0.2901000022888184, + "IoU.tower": 0.30100000381469727, + "IoU.chandelier": 0.6329000091552734, + "IoU.awning": 0.2965999984741211, + "IoU.streetlight": 0.19680000305175782, + "IoU.booth": 0.41650001525878905, + "IoU.television receiver": 0.6409999847412109, + "IoU.airplane": 0.5725, + "IoU.dirt track": 0.006700000166893006, + "IoU.apparel": 0.3747999954223633, + "IoU.pole": 0.15119999885559082, + "IoU.land": 0.027699999809265137, + "IoU.bannister": 0.10779999732971192, + "IoU.escalator": 0.5070999908447266, + "IoU.ottoman": 0.484900016784668, + "IoU.bottle": 0.3397999954223633, + "IoU.buffet": 0.48080001831054686, + "IoU.poster": 0.14609999656677247, + "IoU.stage": 0.1647999954223633, + "IoU.van": 0.41779998779296873, + "IoU.ship": 0.21760000228881837, + "IoU.fountain": 0.25739999771118166, + "IoU.conveyer belt": 0.6725, + "IoU.canopy": 0.1947999954223633, + "IoU.washer": 0.7279000091552734, + "IoU.plaything": 0.30290000915527343, + "IoU.swimming pool": 0.6648999786376953, + "IoU.stool": 0.359900016784668, + "IoU.barrel": 0.5002999877929688, + "IoU.basket": 0.32610000610351564, + "IoU.waterfall": 0.5488000106811524, + "IoU.tent": 0.8976999664306641, + "IoU.bag": 0.1468000030517578, + "IoU.minibike": 0.7033000183105469, + "IoU.cradle": 0.7994000244140625, + "IoU.oven": 0.3615999984741211, + "IoU.ball": 0.48009998321533204, + "IoU.food": 0.49590000152587893, + "IoU.step": 0.08680000305175781, + "IoU.tank": 0.5513000106811523, + "IoU.trade name": 0.18920000076293944, + "IoU.microwave": 0.7618000030517578, + "IoU.pot": 0.47450000762939454, + "IoU.animal": 0.6336000061035156, + "IoU.bicycle": 0.5850999832153321, + "IoU.lake": 0.6375999832153321, + "IoU.dishwasher": 0.6302999877929687, + "IoU.screen": 0.5466999816894531, + "IoU.blanket": 0.14100000381469727, + "IoU.sculpture": 0.6561000061035156, + "IoU.hood": 0.5461000061035156, + "IoU.sconce": 0.3691999816894531, + "IoU.vase": 0.34970001220703123, + "IoU.traffic light": 0.27829999923706056, + "IoU.tray": 0.08649999618530274, + "IoU.ashcan": 0.39439998626708983, + "IoU.fan": 0.534099998474121, + "IoU.pier": 0.18729999542236328, + "IoU.crt screen": 0.04369999885559082, + "IoU.plate": 0.48630001068115236, + "IoU.monitor": 0.23209999084472657, + "IoU.bulletin board": 0.489900016784668, + "IoU.shower": 0.01350000023841858, + "IoU.radiator": 0.5543000030517579, + "IoU.glass": 0.15279999732971192, + "IoU.clock": 0.32310001373291014, + "IoU.flag": 0.47869998931884766, + "Acc.wall": 0.8723000335693359, + "Acc.building": 0.9409999847412109, + "Acc.sky": 0.9601999664306641, + "Acc.floor": 0.8908999633789062, + "Acc.tree": 0.8941000366210937, + "Acc.ceiling": 0.9044999694824218, + "Acc.road": 0.8876999664306641, + "Acc.bed ": 0.9677999877929687, + "Acc.windowpane": 0.7693000030517578, + "Acc.grass": 0.8076999664306641, + "Acc.cabinet": 0.7523999786376954, + "Acc.sidewalk": 0.8169999694824219, + "Acc.person": 0.9216999816894531, + "Acc.earth": 0.5352000045776367, + "Acc.door": 0.673499984741211, + "Acc.table": 0.7554000091552734, + "Acc.mountain": 0.7001000213623046, + "Acc.plant": 0.5811000061035156, + "Acc.curtain": 0.8683000183105469, + "Acc.chair": 0.7080000305175781, + "Acc.car": 0.928499984741211, + "Acc.water": 0.7580000305175781, + "Acc.painting": 0.8723000335693359, + "Acc.sofa": 0.8512999725341797, + "Acc.shelf": 0.5527000045776367, + "Acc.house": 0.6520999908447266, + "Acc.sea": 0.8741999816894531, + "Acc.mirror": 0.8063999938964844, + "Acc.rug": 0.8037000274658204, + "Acc.field": 0.5349000167846679, + "Acc.armchair": 0.6688999938964844, + "Acc.seat": 0.8416999816894531, + "Acc.fence": 0.6179999923706054, + "Acc.desk": 0.740999984741211, + "Acc.rock": 0.6654000091552734, + "Acc.wardrobe": 0.7637999725341796, + "Acc.lamp": 0.740999984741211, + "Acc.bathtub": 0.9097000122070312, + "Acc.railing": 0.4893000030517578, + "Acc.cushion": 0.7151999664306641, + "Acc.base": 0.6405999755859375, + "Acc.box": 0.32229999542236326, + "Acc.column": 0.5570999908447266, + "Acc.signboard": 0.41700000762939454, + "Acc.chest of drawers": 0.6177999877929687, + "Acc.counter": 0.46130001068115234, + "Acc.sand": 0.762300033569336, + "Acc.sink": 0.7758999633789062, + "Acc.skyscraper": 0.6805000305175781, + "Acc.fireplace": 0.9323000335693359, + "Acc.refrigerator": 0.8455999755859375, + "Acc.grandstand": 0.7201000213623047, + "Acc.path": 0.3768000030517578, + "Acc.stairs": 0.3243000030517578, + "Acc.runway": 0.9713999938964843, + "Acc.case": 0.7044000244140625, + "Acc.pool table": 0.9741000366210938, + "Acc.pillow": 0.6958000183105468, + "Acc.screen door": 0.7827999877929688, + "Acc.stairway": 0.47939998626708985, + "Acc.river": 0.27149999618530274, + "Acc.bridge": 0.6854000091552734, + "Acc.bookcase": 0.5231999969482422, + "Acc.blind": 0.509000015258789, + "Acc.coffee table": 0.8568000030517579, + "Acc.toilet": 0.9076000213623047, + "Acc.flower": 0.5322999954223633, + "Acc.book": 0.6566999816894531, + "Acc.hill": 0.25790000915527345, + "Acc.bench": 0.5959000015258789, + "Acc.countertop": 0.6968000030517578, + "Acc.stove": 0.8801000213623047, + "Acc.palm": 0.7312000274658204, + "Acc.kitchen island": 0.765199966430664, + "Acc.computer": 0.9194000244140625, + "Acc.swivel chair": 0.7205999755859375, + "Acc.boat": 0.8505999755859375, + "Acc.bar": 0.715999984741211, + "Acc.arcade machine": 0.7880000305175782, + "Acc.hovel": 0.5265000152587891, + "Acc.bus": 0.9637999725341797, + "Acc.towel": 0.7686000061035156, + "Acc.light": 0.5245999908447265, + "Acc.truck": 0.3877000045776367, + "Acc.tower": 0.49959999084472656, + "Acc.chandelier": 0.8036000061035157, + "Acc.awning": 0.34119998931884765, + "Acc.streetlight": 0.2954000091552734, + "Acc.booth": 0.4565000152587891, + "Acc.television receiver": 0.7787999725341797, + "Acc.airplane": 0.6363999938964844, + "Acc.dirt track": 0.03460000038146973, + "Acc.apparel": 0.4829000091552734, + "Acc.pole": 0.18850000381469725, + "Acc.land": 0.05909999847412109, + "Acc.bannister": 0.145, + "Acc.escalator": 0.7376000213623047, + "Acc.ottoman": 0.6936000061035156, + "Acc.bottle": 0.5338000106811523, + "Acc.buffet": 0.6422000122070313, + "Acc.poster": 0.17540000915527343, + "Acc.stage": 0.43959999084472656, + "Acc.van": 0.5031999969482421, + "Acc.ship": 0.22459999084472657, + "Acc.fountain": 0.2642000007629395, + "Acc.conveyer belt": 0.9255000305175781, + "Acc.canopy": 0.23200000762939454, + "Acc.washer": 0.7433999633789062, + "Acc.plaything": 0.4677999877929688, + "Acc.swimming pool": 0.89, + "Acc.stool": 0.5225999832153321, + "Acc.barrel": 0.6512000274658203, + "Acc.basket": 0.4627000045776367, + "Acc.waterfall": 0.7426000213623047, + "Acc.tent": 0.9913999938964844, + "Acc.bag": 0.1609000015258789, + "Acc.minibike": 0.7991999816894532, + "Acc.cradle": 0.9761000061035157, + "Acc.oven": 0.5279000091552735, + "Acc.ball": 0.5606000137329101, + "Acc.food": 0.5413000106811523, + "Acc.step": 0.11960000038146973, + "Acc.tank": 0.6569000244140625, + "Acc.trade name": 0.2034000015258789, + "Acc.microwave": 0.8491999816894531, + "Acc.pot": 0.5675, + "Acc.animal": 0.6694999694824219, + "Acc.bicycle": 0.7512000274658203, + "Acc.lake": 0.7816000366210938, + "Acc.dishwasher": 0.7143000030517578, + "Acc.screen": 0.7608999633789062, + "Acc.blanket": 0.1784000015258789, + "Acc.sculpture": 0.8043000030517579, + "Acc.hood": 0.7073000335693359, + "Acc.sconce": 0.465, + "Acc.vase": 0.5415000152587891, + "Acc.traffic light": 0.49290000915527343, + "Acc.tray": 0.13359999656677246, + "Acc.ashcan": 0.5443999862670899, + "Acc.fan": 0.7393000030517578, + "Acc.pier": 0.4597999954223633, + "Acc.crt screen": 0.11010000228881836, + "Acc.plate": 0.6944999694824219, + "Acc.monitor": 0.2825, + "Acc.bulletin board": 0.6793000030517579, + "Acc.shower": 0.05010000228881836, + "Acc.radiator": 0.6651000213623047, + "Acc.glass": 0.16809999465942382, + "Acc.clock": 0.3656999969482422, + "Acc.flag": 0.5504000091552734 + } + }, + "45": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8283, + "mIoU": 0.486, + "mAcc": 0.6163000000000001, + "IoU.wall": 0.7733000183105468, + "IoU.building": 0.8294000244140625, + "IoU.sky": 0.9347000122070312, + "IoU.floor": 0.8065000152587891, + "IoU.tree": 0.7331999969482422, + "IoU.ceiling": 0.8297000122070313, + "IoU.road": 0.8287000274658203, + "IoU.bed ": 0.8856999969482422, + "IoU.windowpane": 0.615099983215332, + "IoU.grass": 0.6904000091552734, + "IoU.cabinet": 0.6068999862670899, + "IoU.sidewalk": 0.6419999694824219, + "IoU.person": 0.7998000335693359, + "IoU.earth": 0.36529998779296874, + "IoU.door": 0.5015999984741211, + "IoU.table": 0.577599983215332, + "IoU.mountain": 0.5570999908447266, + "IoU.plant": 0.5052999877929687, + "IoU.curtain": 0.7227999877929687, + "IoU.chair": 0.548499984741211, + "IoU.car": 0.8444000244140625, + "IoU.water": 0.5570000076293945, + "IoU.painting": 0.7043000030517578, + "IoU.sofa": 0.6763999938964844, + "IoU.shelf": 0.4236000061035156, + "IoU.house": 0.5040999984741211, + "IoU.sea": 0.645199966430664, + "IoU.mirror": 0.6562000274658203, + "IoU.rug": 0.6462999725341797, + "IoU.field": 0.3309000015258789, + "IoU.armchair": 0.445099983215332, + "IoU.seat": 0.6156999969482422, + "IoU.fence": 0.42220001220703124, + "IoU.desk": 0.4743000030517578, + "IoU.rock": 0.4672999954223633, + "IoU.wardrobe": 0.5688000106811524, + "IoU.lamp": 0.552400016784668, + "IoU.bathtub": 0.8527999877929687, + "IoU.railing": 0.34509998321533203, + "IoU.cushion": 0.5647000122070313, + "IoU.base": 0.3061000061035156, + "IoU.box": 0.23829999923706055, + "IoU.column": 0.47150001525878904, + "IoU.signboard": 0.33419998168945314, + "IoU.chest of drawers": 0.35720001220703124, + "IoU.counter": 0.340099983215332, + "IoU.sand": 0.49209999084472655, + "IoU.sink": 0.6869000244140625, + "IoU.skyscraper": 0.5545999908447266, + "IoU.fireplace": 0.7351000213623047, + "IoU.refrigerator": 0.7473000335693359, + "IoU.grandstand": 0.5018000030517578, + "IoU.path": 0.2535000038146973, + "IoU.stairs": 0.20510000228881836, + "IoU.runway": 0.7295999908447266, + "IoU.case": 0.5447999954223632, + "IoU.pool table": 0.9065000152587891, + "IoU.pillow": 0.5406000137329101, + "IoU.screen door": 0.6311000061035156, + "IoU.stairway": 0.28649999618530275, + "IoU.river": 0.18790000915527344, + "IoU.bridge": 0.6744999694824219, + "IoU.bookcase": 0.3611000061035156, + "IoU.blind": 0.445099983215332, + "IoU.coffee table": 0.5479999923706055, + "IoU.toilet": 0.8256999969482421, + "IoU.flower": 0.30809999465942384, + "IoU.book": 0.46119998931884765, + "IoU.hill": 0.11569999694824219, + "IoU.bench": 0.4702000045776367, + "IoU.countertop": 0.5922000122070312, + "IoU.stove": 0.7027999877929687, + "IoU.palm": 0.49270000457763674, + "IoU.kitchen island": 0.4588999938964844, + "IoU.computer": 0.7519999694824219, + "IoU.swivel chair": 0.5052000045776367, + "IoU.boat": 0.6576999664306641, + "IoU.bar": 0.5238000106811523, + "IoU.arcade machine": 0.7111000061035156, + "IoU.hovel": 0.40970001220703123, + "IoU.bus": 0.8886000061035156, + "IoU.towel": 0.6475, + "IoU.light": 0.4022000122070313, + "IoU.truck": 0.271200008392334, + "IoU.tower": 0.2830999946594238, + "IoU.chandelier": 0.63, + "IoU.awning": 0.2759000015258789, + "IoU.streetlight": 0.1940999984741211, + "IoU.booth": 0.4241999816894531, + "IoU.television receiver": 0.6422000122070313, + "IoU.airplane": 0.5665999984741211, + "IoU.dirt track": 9.999999776482581e-05, + "IoU.apparel": 0.3390000152587891, + "IoU.pole": 0.16170000076293944, + "IoU.land": 0.03009999990463257, + "IoU.bannister": 0.09439999580383301, + "IoU.escalator": 0.49139999389648437, + "IoU.ottoman": 0.4518000030517578, + "IoU.bottle": 0.35639999389648436, + "IoU.buffet": 0.5461999893188476, + "IoU.poster": 0.21700000762939453, + "IoU.stage": 0.18780000686645507, + "IoU.van": 0.44669998168945313, + "IoU.ship": 0.3125, + "IoU.fountain": 0.2134000015258789, + "IoU.conveyer belt": 0.6731999969482422, + "IoU.canopy": 0.18889999389648438, + "IoU.washer": 0.7312000274658204, + "IoU.plaything": 0.2538999938964844, + "IoU.swimming pool": 0.6234999847412109, + "IoU.stool": 0.4, + "IoU.barrel": 0.42389999389648436, + "IoU.basket": 0.3234000015258789, + "IoU.waterfall": 0.6737999725341797, + "IoU.tent": 0.8044999694824219, + "IoU.bag": 0.1534000015258789, + "IoU.minibike": 0.6980000305175781, + "IoU.cradle": 0.7869000244140625, + "IoU.oven": 0.3290999984741211, + "IoU.ball": 0.47169998168945315, + "IoU.food": 0.524900016784668, + "IoU.step": 0.08710000038146973, + "IoU.tank": 0.5561999893188476, + "IoU.trade name": 0.19959999084472657, + "IoU.microwave": 0.7826000213623047, + "IoU.pot": 0.44099998474121094, + "IoU.animal": 0.6252000045776367, + "IoU.bicycle": 0.5683000183105469, + "IoU.lake": 0.14739999771118165, + "IoU.dishwasher": 0.5677000045776367, + "IoU.screen": 0.5075999832153321, + "IoU.blanket": 0.1225, + "IoU.sculpture": 0.6186000061035156, + "IoU.hood": 0.5377999877929688, + "IoU.sconce": 0.35630001068115236, + "IoU.vase": 0.335, + "IoU.traffic light": 0.27549999237060546, + "IoU.tray": 0.08060000419616699, + "IoU.ashcan": 0.39310001373291015, + "IoU.fan": 0.5338999938964843, + "IoU.pier": 0.18350000381469728, + "IoU.crt screen": 0.047899999618530274, + "IoU.plate": 0.49349998474121093, + "IoU.monitor": 0.2563999938964844, + "IoU.bulletin board": 0.4790999984741211, + "IoU.shower": 0.013200000524520875, + "IoU.radiator": 0.5241999816894531, + "IoU.glass": 0.13239999771118163, + "IoU.clock": 0.32049999237060545, + "IoU.flag": 0.46619998931884765, + "Acc.wall": 0.8755000305175781, + "Acc.building": 0.9338999938964844, + "Acc.sky": 0.957699966430664, + "Acc.floor": 0.8863999938964844, + "Acc.tree": 0.8930999755859375, + "Acc.ceiling": 0.8933999633789063, + "Acc.road": 0.8913999938964844, + "Acc.bed ": 0.9638999938964844, + "Acc.windowpane": 0.7616999816894531, + "Acc.grass": 0.8290000152587891, + "Acc.cabinet": 0.7368000030517579, + "Acc.sidewalk": 0.8031999969482422, + "Acc.person": 0.922300033569336, + "Acc.earth": 0.5104000091552734, + "Acc.door": 0.6438999938964843, + "Acc.table": 0.7387999725341797, + "Acc.mountain": 0.6993000030517578, + "Acc.plant": 0.5836999893188477, + "Acc.curtain": 0.8637000274658203, + "Acc.chair": 0.6951000213623046, + "Acc.car": 0.9294999694824219, + "Acc.water": 0.6961000061035156, + "Acc.painting": 0.8634999847412109, + "Acc.sofa": 0.8252999877929688, + "Acc.shelf": 0.5715999984741211, + "Acc.house": 0.6393000030517578, + "Acc.sea": 0.8294000244140625, + "Acc.mirror": 0.7858999633789062, + "Acc.rug": 0.7870999908447266, + "Acc.field": 0.49520000457763674, + "Acc.armchair": 0.6322000122070313, + "Acc.seat": 0.8473000335693359, + "Acc.fence": 0.5781000137329102, + "Acc.desk": 0.7406999969482422, + "Acc.rock": 0.6397999954223633, + "Acc.wardrobe": 0.765, + "Acc.lamp": 0.7426000213623047, + "Acc.bathtub": 0.9213999938964844, + "Acc.railing": 0.46169998168945314, + "Acc.cushion": 0.7080999755859375, + "Acc.base": 0.5943000030517578, + "Acc.box": 0.30639999389648437, + "Acc.column": 0.5758000183105468, + "Acc.signboard": 0.42060001373291017, + "Acc.chest of drawers": 0.6526000213623047, + "Acc.counter": 0.42369998931884767, + "Acc.sand": 0.7326000213623047, + "Acc.sink": 0.7670999908447266, + "Acc.skyscraper": 0.6391999816894531, + "Acc.fireplace": 0.9104000091552734, + "Acc.refrigerator": 0.86, + "Acc.grandstand": 0.7184999847412109, + "Acc.path": 0.38209999084472657, + "Acc.stairs": 0.28969999313354494, + "Acc.runway": 0.967699966430664, + "Acc.case": 0.6947000122070313, + "Acc.pool table": 0.9713999938964843, + "Acc.pillow": 0.6409999847412109, + "Acc.screen door": 0.7591999816894531, + "Acc.stairway": 0.4608000183105469, + "Acc.river": 0.43340000152587893, + "Acc.bridge": 0.8111000061035156, + "Acc.bookcase": 0.5549000167846679, + "Acc.blind": 0.5186999893188476, + "Acc.coffee table": 0.8606999969482422, + "Acc.toilet": 0.9152999877929687, + "Acc.flower": 0.4983000183105469, + "Acc.book": 0.6526000213623047, + "Acc.hill": 0.20309999465942383, + "Acc.bench": 0.5784000015258789, + "Acc.countertop": 0.7441000366210937, + "Acc.stove": 0.8366000366210937, + "Acc.palm": 0.6994999694824219, + "Acc.kitchen island": 0.7895999908447265, + "Acc.computer": 0.9029000091552735, + "Acc.swivel chair": 0.7052999877929688, + "Acc.boat": 0.8462999725341797, + "Acc.bar": 0.6755999755859375, + "Acc.arcade machine": 0.800199966430664, + "Acc.hovel": 0.44669998168945313, + "Acc.bus": 0.9530000305175781, + "Acc.towel": 0.7926000213623047, + "Acc.light": 0.5018000030517578, + "Acc.truck": 0.35560001373291017, + "Acc.tower": 0.4909999847412109, + "Acc.chandelier": 0.8011000061035156, + "Acc.awning": 0.3340999984741211, + "Acc.streetlight": 0.2790999984741211, + "Acc.booth": 0.45279998779296876, + "Acc.television receiver": 0.7543000030517578, + "Acc.airplane": 0.6594000244140625, + "Acc.dirt track": 0.00019999999552965163, + "Acc.apparel": 0.43130001068115237, + "Acc.pole": 0.2038999938964844, + "Acc.land": 0.05239999771118164, + "Acc.bannister": 0.144399995803833, + "Acc.escalator": 0.6755000305175781, + "Acc.ottoman": 0.6818000030517578, + "Acc.bottle": 0.5668000030517578, + "Acc.buffet": 0.7330999755859375, + "Acc.poster": 0.2645999908447266, + "Acc.stage": 0.4540999984741211, + "Acc.van": 0.5320000076293945, + "Acc.ship": 0.3218999862670898, + "Acc.fountain": 0.22120000839233397, + "Acc.conveyer belt": 0.9362000274658203, + "Acc.canopy": 0.26600000381469724, + "Acc.washer": 0.7569999694824219, + "Acc.plaything": 0.37849998474121094, + "Acc.swimming pool": 0.8704000091552735, + "Acc.stool": 0.5204000091552734, + "Acc.barrel": 0.6512000274658203, + "Acc.basket": 0.45430000305175783, + "Acc.waterfall": 0.8918000030517578, + "Acc.tent": 0.9919999694824219, + "Acc.bag": 0.1775, + "Acc.minibike": 0.802300033569336, + "Acc.cradle": 0.9743000030517578, + "Acc.oven": 0.43900001525878907, + "Acc.ball": 0.5140999984741211, + "Acc.food": 0.5740999984741211, + "Acc.step": 0.11279999732971191, + "Acc.tank": 0.6495999908447265, + "Acc.trade name": 0.21350000381469728, + "Acc.microwave": 0.8898999786376953, + "Acc.pot": 0.5243000030517578, + "Acc.animal": 0.6569000244140625, + "Acc.bicycle": 0.7130999755859375, + "Acc.lake": 0.17170000076293945, + "Acc.dishwasher": 0.6630999755859375, + "Acc.screen": 0.7658999633789062, + "Acc.blanket": 0.1390999984741211, + "Acc.sculpture": 0.7283000183105469, + "Acc.hood": 0.7044000244140625, + "Acc.sconce": 0.44369998931884763, + "Acc.vase": 0.5247999954223633, + "Acc.traffic light": 0.504900016784668, + "Acc.tray": 0.12899999618530272, + "Acc.ashcan": 0.5491999816894532, + "Acc.fan": 0.7225, + "Acc.pier": 0.44540000915527345, + "Acc.crt screen": 0.11890000343322754, + "Acc.plate": 0.6958999633789062, + "Acc.monitor": 0.3122999954223633, + "Acc.bulletin board": 0.6809999847412109, + "Acc.shower": 0.055199999809265134, + "Acc.radiator": 0.6431999969482421, + "Acc.glass": 0.14539999961853028, + "Acc.clock": 0.36689998626708986, + "Acc.flag": 0.5393000030517578 + } + }, + "46": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8220999999999999, + "mIoU": 0.4738, + "mAcc": 0.6065999999999999, + "IoU.wall": 0.7652999877929687, + "IoU.building": 0.8283000183105469, + "IoU.sky": 0.9344000244140624, + "IoU.floor": 0.803499984741211, + "IoU.tree": 0.7337000274658203, + "IoU.ceiling": 0.8254000091552735, + "IoU.road": 0.8194000244140625, + "IoU.bed ": 0.8731999969482422, + "IoU.windowpane": 0.607599983215332, + "IoU.grass": 0.6994999694824219, + "IoU.cabinet": 0.5786000061035156, + "IoU.sidewalk": 0.634000015258789, + "IoU.person": 0.7926000213623047, + "IoU.earth": 0.3643000030517578, + "IoU.door": 0.44779998779296876, + "IoU.table": 0.5581000137329102, + "IoU.mountain": 0.5779999923706055, + "IoU.plant": 0.5072999954223633, + "IoU.curtain": 0.7197000122070313, + "IoU.chair": 0.5268999862670899, + "IoU.car": 0.8284999847412109, + "IoU.water": 0.5272999954223633, + "IoU.painting": 0.6901999664306641, + "IoU.sofa": 0.65, + "IoU.shelf": 0.4234000015258789, + "IoU.house": 0.5140999984741211, + "IoU.sea": 0.5768999862670898, + "IoU.mirror": 0.6375999832153321, + "IoU.rug": 0.6586000061035157, + "IoU.field": 0.2905999946594238, + "IoU.armchair": 0.43509998321533205, + "IoU.seat": 0.6168000030517579, + "IoU.fence": 0.39540000915527346, + "IoU.desk": 0.4847999954223633, + "IoU.rock": 0.49279998779296874, + "IoU.wardrobe": 0.5281999969482422, + "IoU.lamp": 0.5443999862670899, + "IoU.bathtub": 0.7959999847412109, + "IoU.railing": 0.34650001525878904, + "IoU.cushion": 0.5527999877929688, + "IoU.base": 0.3046999931335449, + "IoU.box": 0.22370000839233398, + "IoU.column": 0.4520999908447266, + "IoU.signboard": 0.3290999984741211, + "IoU.chest of drawers": 0.3345999908447266, + "IoU.counter": 0.32369998931884764, + "IoU.sand": 0.39869998931884765, + "IoU.sink": 0.6793000030517579, + "IoU.skyscraper": 0.5772000122070312, + "IoU.fireplace": 0.7194000244140625, + "IoU.refrigerator": 0.6991999816894531, + "IoU.grandstand": 0.5084000015258789, + "IoU.path": 0.21350000381469728, + "IoU.stairs": 0.24219999313354493, + "IoU.runway": 0.7134999847412109, + "IoU.case": 0.5070000076293946, + "IoU.pool table": 0.9061000061035156, + "IoU.pillow": 0.5443000030517579, + "IoU.screen door": 0.5299000167846679, + "IoU.stairway": 0.35130001068115235, + "IoU.river": 0.17040000915527342, + "IoU.bridge": 0.6433999633789063, + "IoU.bookcase": 0.32630001068115233, + "IoU.blind": 0.3859000015258789, + "IoU.coffee table": 0.5454999923706054, + "IoU.toilet": 0.8188999938964844, + "IoU.flower": 0.36389999389648436, + "IoU.book": 0.43020000457763674, + "IoU.hill": 0.07380000114440918, + "IoU.bench": 0.39479999542236327, + "IoU.countertop": 0.5508000183105469, + "IoU.stove": 0.6909999847412109, + "IoU.palm": 0.49540000915527344, + "IoU.kitchen island": 0.37740001678466795, + "IoU.computer": 0.699800033569336, + "IoU.swivel chair": 0.4743000030517578, + "IoU.boat": 0.6386999893188476, + "IoU.bar": 0.47509998321533203, + "IoU.arcade machine": 0.7644999694824218, + "IoU.hovel": 0.27430000305175783, + "IoU.bus": 0.8794000244140625, + "IoU.towel": 0.6320000076293946, + "IoU.light": 0.4254999923706055, + "IoU.truck": 0.301299991607666, + "IoU.tower": 0.3384000015258789, + "IoU.chandelier": 0.6104000091552735, + "IoU.awning": 0.30190000534057615, + "IoU.streetlight": 0.1868000030517578, + "IoU.booth": 0.495, + "IoU.television receiver": 0.5936000061035156, + "IoU.airplane": 0.5486999893188477, + "IoU.dirt track": 0.06960000038146973, + "IoU.apparel": 0.3295999908447266, + "IoU.pole": 0.14800000190734863, + "IoU.land": 0.06159999847412109, + "IoU.bannister": 0.1018000030517578, + "IoU.escalator": 0.32849998474121095, + "IoU.ottoman": 0.46580001831054685, + "IoU.bottle": 0.1743000030517578, + "IoU.buffet": 0.49340000152587893, + "IoU.poster": 0.26040000915527345, + "IoU.stage": 0.17290000915527343, + "IoU.van": 0.4068000030517578, + "IoU.ship": 0.835, + "IoU.fountain": 0.2620000076293945, + "IoU.conveyer belt": 0.6477999877929688, + "IoU.canopy": 0.20180000305175783, + "IoU.washer": 0.7406999969482422, + "IoU.plaything": 0.26760000228881836, + "IoU.swimming pool": 0.6288000106811523, + "IoU.stool": 0.347400016784668, + "IoU.barrel": 0.32849998474121095, + "IoU.basket": 0.2775, + "IoU.waterfall": 0.6969999694824218, + "IoU.tent": 0.8623000335693359, + "IoU.bag": 0.1259000015258789, + "IoU.minibike": 0.6583999633789063, + "IoU.cradle": 0.8047000122070312, + "IoU.oven": 0.3047999954223633, + "IoU.ball": 0.32979999542236327, + "IoU.food": 0.515, + "IoU.step": 0.11539999961853027, + "IoU.tank": 0.5225999832153321, + "IoU.trade name": 0.20899999618530274, + "IoU.microwave": 0.6755000305175781, + "IoU.pot": 0.34330001831054685, + "IoU.animal": 0.632400016784668, + "IoU.bicycle": 0.5652999877929688, + "IoU.lake": 0.17780000686645508, + "IoU.dishwasher": 0.5195999908447265, + "IoU.screen": 0.5170000076293946, + "IoU.blanket": 0.15199999809265136, + "IoU.sculpture": 0.547400016784668, + "IoU.hood": 0.4829999923706055, + "IoU.sconce": 0.31540000915527344, + "IoU.vase": 0.3213000106811523, + "IoU.traffic light": 0.2595000076293945, + "IoU.tray": 0.055300002098083494, + "IoU.ashcan": 0.3925, + "IoU.fan": 0.5065999984741211, + "IoU.pier": 0.18629999160766603, + "IoU.crt screen": 0.033399999141693115, + "IoU.plate": 0.4825, + "IoU.monitor": 0.18290000915527344, + "IoU.bulletin board": 0.457599983215332, + "IoU.shower": 0.009300000071525573, + "IoU.radiator": 0.5081999969482421, + "IoU.glass": 0.099399995803833, + "IoU.clock": 0.2844000053405762, + "IoU.flag": 0.6090999984741211, + "Acc.wall": 0.8701000213623047, + "Acc.building": 0.9294000244140626, + "Acc.sky": 0.9568000030517578, + "Acc.floor": 0.8827999877929688, + "Acc.tree": 0.893499984741211, + "Acc.ceiling": 0.8911000061035156, + "Acc.road": 0.8801000213623047, + "Acc.bed ": 0.9616000366210937, + "Acc.windowpane": 0.7708000183105469, + "Acc.grass": 0.8344000244140625, + "Acc.cabinet": 0.7077999877929687, + "Acc.sidewalk": 0.8102999877929687, + "Acc.person": 0.9191999816894532, + "Acc.earth": 0.5145999908447265, + "Acc.door": 0.5841999816894531, + "Acc.table": 0.7287999725341797, + "Acc.mountain": 0.7269000244140625, + "Acc.plant": 0.5933000183105469, + "Acc.curtain": 0.8575, + "Acc.chair": 0.6765000152587891, + "Acc.car": 0.9291000366210938, + "Acc.water": 0.6841999816894532, + "Acc.painting": 0.8469999694824218, + "Acc.sofa": 0.8255000305175781, + "Acc.shelf": 0.5861999893188476, + "Acc.house": 0.6623999786376953, + "Acc.sea": 0.7187999725341797, + "Acc.mirror": 0.7487999725341797, + "Acc.rug": 0.7930000305175782, + "Acc.field": 0.40759998321533203, + "Acc.armchair": 0.6118000030517579, + "Acc.seat": 0.8331999969482422, + "Acc.fence": 0.5629999923706055, + "Acc.desk": 0.7808000183105469, + "Acc.rock": 0.6579000091552735, + "Acc.wardrobe": 0.726500015258789, + "Acc.lamp": 0.7341000366210938, + "Acc.bathtub": 0.8513999938964844, + "Acc.railing": 0.46119998931884765, + "Acc.cushion": 0.6962000274658203, + "Acc.base": 0.5684999847412109, + "Acc.box": 0.3011000061035156, + "Acc.column": 0.581500015258789, + "Acc.signboard": 0.414900016784668, + "Acc.chest of drawers": 0.6295999908447265, + "Acc.counter": 0.4240999984741211, + "Acc.sand": 0.6193000030517578, + "Acc.sink": 0.7570999908447266, + "Acc.skyscraper": 0.6659999847412109, + "Acc.fireplace": 0.8908999633789062, + "Acc.refrigerator": 0.8176999664306641, + "Acc.grandstand": 0.7301999664306641, + "Acc.path": 0.31629999160766603, + "Acc.stairs": 0.3729999923706055, + "Acc.runway": 0.9730999755859375, + "Acc.case": 0.6655000305175781, + "Acc.pool table": 0.9698999786376953, + "Acc.pillow": 0.6308000183105469, + "Acc.screen door": 0.6576999664306641, + "Acc.stairway": 0.5229000091552735, + "Acc.river": 0.42770000457763674, + "Acc.bridge": 0.8469000244140625, + "Acc.bookcase": 0.5054000091552734, + "Acc.blind": 0.4259999847412109, + "Acc.coffee table": 0.8583999633789062, + "Acc.toilet": 0.9009999847412109, + "Acc.flower": 0.572400016784668, + "Acc.book": 0.622599983215332, + "Acc.hill": 0.14739999771118165, + "Acc.bench": 0.5054999923706055, + "Acc.countertop": 0.6948999786376953, + "Acc.stove": 0.8122000122070312, + "Acc.palm": 0.7306999969482422, + "Acc.kitchen island": 0.7863999938964844, + "Acc.computer": 0.8411000061035157, + "Acc.swivel chair": 0.6780999755859375, + "Acc.boat": 0.8444999694824219, + "Acc.bar": 0.6891000366210938, + "Acc.arcade machine": 0.8620999908447265, + "Acc.hovel": 0.27829999923706056, + "Acc.bus": 0.9479000091552734, + "Acc.towel": 0.7659999847412109, + "Acc.light": 0.5111000061035156, + "Acc.truck": 0.40970001220703123, + "Acc.tower": 0.5565000152587891, + "Acc.chandelier": 0.7926999664306641, + "Acc.awning": 0.3659000015258789, + "Acc.streetlight": 0.26110000610351564, + "Acc.booth": 0.5468999862670898, + "Acc.television receiver": 0.7437000274658203, + "Acc.airplane": 0.6526000213623047, + "Acc.dirt track": 0.2259000015258789, + "Acc.apparel": 0.43139999389648437, + "Acc.pole": 0.18780000686645507, + "Acc.land": 0.10930000305175781, + "Acc.bannister": 0.15890000343322755, + "Acc.escalator": 0.40580001831054685, + "Acc.ottoman": 0.6855000305175781, + "Acc.bottle": 0.20280000686645508, + "Acc.buffet": 0.6827999877929688, + "Acc.poster": 0.30690000534057615, + "Acc.stage": 0.46680000305175784, + "Acc.van": 0.48279998779296873, + "Acc.ship": 0.8727999877929687, + "Acc.fountain": 0.2709000015258789, + "Acc.conveyer belt": 0.9306999969482422, + "Acc.canopy": 0.30760000228881834, + "Acc.washer": 0.755, + "Acc.plaything": 0.40310001373291016, + "Acc.swimming pool": 0.8566999816894532, + "Acc.stool": 0.5109999847412109, + "Acc.barrel": 0.650199966430664, + "Acc.basket": 0.40330001831054685, + "Acc.waterfall": 0.8886000061035156, + "Acc.tent": 0.9952999877929688, + "Acc.bag": 0.14390000343322754, + "Acc.minibike": 0.7570999908447266, + "Acc.cradle": 0.9786000061035156, + "Acc.oven": 0.5343999862670898, + "Acc.ball": 0.3509999847412109, + "Acc.food": 0.5772000122070312, + "Acc.step": 0.1606999969482422, + "Acc.tank": 0.6256999969482422, + "Acc.trade name": 0.22559999465942382, + "Acc.microwave": 0.7622000122070313, + "Acc.pot": 0.4022999954223633, + "Acc.animal": 0.6698999786376953, + "Acc.bicycle": 0.7456999969482422, + "Acc.lake": 0.20979999542236327, + "Acc.dishwasher": 0.6063999938964844, + "Acc.screen": 0.7412999725341797, + "Acc.blanket": 0.17549999237060546, + "Acc.sculpture": 0.6559999847412109, + "Acc.hood": 0.6327999877929688, + "Acc.sconce": 0.3838000106811523, + "Acc.vase": 0.4904999923706055, + "Acc.traffic light": 0.48639999389648436, + "Acc.tray": 0.0934000015258789, + "Acc.ashcan": 0.552599983215332, + "Acc.fan": 0.7311000061035157, + "Acc.pier": 0.44790000915527345, + "Acc.crt screen": 0.10539999961853028, + "Acc.plate": 0.6569000244140625, + "Acc.monitor": 0.21969999313354494, + "Acc.bulletin board": 0.6825, + "Acc.shower": 0.051999998092651364, + "Acc.radiator": 0.6183000183105469, + "Acc.glass": 0.10729999542236328, + "Acc.clock": 0.32740001678466796, + "Acc.flag": 0.6854000091552734 + } + }, + "47": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8178, + "mIoU": 0.4623, + "mAcc": 0.5943999999999999, + "IoU.wall": 0.7587999725341796, + "IoU.building": 0.8262999725341796, + "IoU.sky": 0.9347000122070312, + "IoU.floor": 0.8047000122070312, + "IoU.tree": 0.722699966430664, + "IoU.ceiling": 0.8222000122070312, + "IoU.road": 0.8116999816894531, + "IoU.bed ": 0.8676000213623047, + "IoU.windowpane": 0.610999984741211, + "IoU.grass": 0.6644000244140625, + "IoU.cabinet": 0.5791999816894531, + "IoU.sidewalk": 0.63, + "IoU.person": 0.7822000122070313, + "IoU.earth": 0.33189998626708983, + "IoU.door": 0.4488999938964844, + "IoU.table": 0.56, + "IoU.mountain": 0.575900001525879, + "IoU.plant": 0.49630001068115237, + "IoU.curtain": 0.7305000305175782, + "IoU.chair": 0.518400001525879, + "IoU.car": 0.8238999938964844, + "IoU.water": 0.5381000137329102, + "IoU.painting": 0.677300033569336, + "IoU.sofa": 0.6465000152587891, + "IoU.shelf": 0.4379999923706055, + "IoU.house": 0.5359999847412109, + "IoU.sea": 0.553499984741211, + "IoU.mirror": 0.6361999893188477, + "IoU.rug": 0.6783000183105469, + "IoU.field": 0.2745000076293945, + "IoU.armchair": 0.42830001831054687, + "IoU.seat": 0.5993999862670898, + "IoU.fence": 0.35900001525878905, + "IoU.desk": 0.4329999923706055, + "IoU.rock": 0.4433000183105469, + "IoU.wardrobe": 0.5256999969482422, + "IoU.lamp": 0.5254000091552734, + "IoU.bathtub": 0.7705000305175781, + "IoU.railing": 0.31059999465942384, + "IoU.cushion": 0.5266999816894531, + "IoU.base": 0.3072999954223633, + "IoU.box": 0.22540000915527345, + "IoU.column": 0.45939998626708983, + "IoU.signboard": 0.32630001068115233, + "IoU.chest of drawers": 0.3185000038146973, + "IoU.counter": 0.2740999984741211, + "IoU.sand": 0.4209000015258789, + "IoU.sink": 0.6670999908447266, + "IoU.skyscraper": 0.6090999984741211, + "IoU.fireplace": 0.689800033569336, + "IoU.refrigerator": 0.697699966430664, + "IoU.grandstand": 0.41259998321533203, + "IoU.path": 0.23670000076293946, + "IoU.stairs": 0.21100000381469727, + "IoU.runway": 0.7341999816894531, + "IoU.case": 0.4929999923706055, + "IoU.pool table": 0.9127999877929688, + "IoU.pillow": 0.5197000122070312, + "IoU.screen door": 0.6234000015258789, + "IoU.stairway": 0.24469999313354493, + "IoU.river": 0.17309999465942383, + "IoU.bridge": 0.6447000122070312, + "IoU.bookcase": 0.31690000534057616, + "IoU.blind": 0.39, + "IoU.coffee table": 0.5109999847412109, + "IoU.toilet": 0.8140000152587891, + "IoU.flower": 0.35220001220703123, + "IoU.book": 0.4184000015258789, + "IoU.hill": 0.07440000057220458, + "IoU.bench": 0.38290000915527345, + "IoU.countertop": 0.5245000076293945, + "IoU.stove": 0.705, + "IoU.palm": 0.47720001220703123, + "IoU.kitchen island": 0.3808000183105469, + "IoU.computer": 0.6947000122070313, + "IoU.swivel chair": 0.4366999816894531, + "IoU.boat": 0.6006999969482422, + "IoU.bar": 0.519000015258789, + "IoU.arcade machine": 0.6629000091552735, + "IoU.hovel": 0.4645999908447266, + "IoU.bus": 0.8230999755859375, + "IoU.towel": 0.5854999923706055, + "IoU.light": 0.3945000076293945, + "IoU.truck": 0.311299991607666, + "IoU.tower": 0.2955999946594238, + "IoU.chandelier": 0.5863000106811523, + "IoU.awning": 0.2886000061035156, + "IoU.streetlight": 0.18270000457763672, + "IoU.booth": 0.43560001373291013, + "IoU.television receiver": 0.5922000122070312, + "IoU.airplane": 0.5772999954223633, + "IoU.dirt track": 0.11, + "IoU.apparel": 0.30540000915527343, + "IoU.pole": 0.16100000381469726, + "IoU.land": 0.04989999771118164, + "IoU.bannister": 0.06260000228881836, + "IoU.escalator": 0.26649999618530273, + "IoU.ottoman": 0.42470001220703124, + "IoU.bottle": 0.18899999618530272, + "IoU.buffet": 0.5766999816894531, + "IoU.poster": 0.3081999969482422, + "IoU.stage": 0.12670000076293944, + "IoU.van": 0.39630001068115234, + "IoU.ship": 0.9019999694824219, + "IoU.fountain": 0.19540000915527345, + "IoU.conveyer belt": 0.6894000244140625, + "IoU.canopy": 0.25290000915527344, + "IoU.washer": 0.6341999816894531, + "IoU.plaything": 0.23610000610351561, + "IoU.swimming pool": 0.5631000137329102, + "IoU.stool": 0.3193000030517578, + "IoU.barrel": 0.5102999877929687, + "IoU.basket": 0.21239999771118165, + "IoU.waterfall": 0.5661000061035156, + "IoU.tent": 0.8733999633789062, + "IoU.bag": 0.1478999996185303, + "IoU.minibike": 0.5622000122070312, + "IoU.cradle": 0.7705000305175781, + "IoU.oven": 0.16809999465942382, + "IoU.ball": 0.4931000137329102, + "IoU.food": 0.5220999908447266, + "IoU.step": 0.10609999656677246, + "IoU.tank": 0.5334000015258789, + "IoU.trade name": 0.20969999313354493, + "IoU.microwave": 0.36779998779296874, + "IoU.pot": 0.36630001068115237, + "IoU.animal": 0.5461000061035156, + "IoU.bicycle": 0.48009998321533204, + "IoU.lake": 0.5827999877929687, + "IoU.dishwasher": 0.44619998931884763, + "IoU.screen": 0.6161000061035157, + "IoU.blanket": 0.12829999923706054, + "IoU.sculpture": 0.5261999893188477, + "IoU.hood": 0.5256000137329102, + "IoU.sconce": 0.27030000686645506, + "IoU.vase": 0.2865999984741211, + "IoU.traffic light": 0.25260000228881835, + "IoU.tray": 0.030399999618530273, + "IoU.ashcan": 0.35200000762939454, + "IoU.fan": 0.46290000915527346, + "IoU.pier": 0.21760000228881837, + "IoU.crt screen": 0.03619999885559082, + "IoU.plate": 0.48220001220703124, + "IoU.monitor": 0.1209000015258789, + "IoU.bulletin board": 0.38159999847412107, + "IoU.shower": 0.02059999942779541, + "IoU.radiator": 0.5384000015258789, + "IoU.glass": 0.0734000015258789, + "IoU.clock": 0.2798999977111816, + "IoU.flag": 0.3004999923706055, + "Acc.wall": 0.8683000183105469, + "Acc.building": 0.9244999694824219, + "Acc.sky": 0.9577999877929687, + "Acc.floor": 0.8870999908447266, + "Acc.tree": 0.8812000274658203, + "Acc.ceiling": 0.8830999755859374, + "Acc.road": 0.8718000030517579, + "Acc.bed ": 0.9616000366210937, + "Acc.windowpane": 0.7715000152587891, + "Acc.grass": 0.8305000305175781, + "Acc.cabinet": 0.716500015258789, + "Acc.sidewalk": 0.8166000366210937, + "Acc.person": 0.9151000213623047, + "Acc.earth": 0.4565999984741211, + "Acc.door": 0.6038000106811523, + "Acc.table": 0.715, + "Acc.mountain": 0.7331999969482422, + "Acc.plant": 0.5997000122070313, + "Acc.curtain": 0.8608999633789063, + "Acc.chair": 0.6655000305175781, + "Acc.car": 0.9230999755859375, + "Acc.water": 0.6487000274658203, + "Acc.painting": 0.8248999786376953, + "Acc.sofa": 0.8222000122070312, + "Acc.shelf": 0.6216999816894532, + "Acc.house": 0.6680999755859375, + "Acc.sea": 0.7501000213623047, + "Acc.mirror": 0.7501000213623047, + "Acc.rug": 0.78, + "Acc.field": 0.4134999847412109, + "Acc.armchair": 0.5795000076293946, + "Acc.seat": 0.8326000213623047, + "Acc.fence": 0.5, + "Acc.desk": 0.7469000244140624, + "Acc.rock": 0.6006999969482422, + "Acc.wardrobe": 0.7658000183105469, + "Acc.lamp": 0.7218000030517578, + "Acc.bathtub": 0.8575, + "Acc.railing": 0.4225, + "Acc.cushion": 0.6523999786376953, + "Acc.base": 0.571500015258789, + "Acc.box": 0.28100000381469725, + "Acc.column": 0.5709999847412109, + "Acc.signboard": 0.41, + "Acc.chest of drawers": 0.6052999877929688, + "Acc.counter": 0.3745000076293945, + "Acc.sand": 0.6024000167846679, + "Acc.sink": 0.7361000061035157, + "Acc.skyscraper": 0.765, + "Acc.fireplace": 0.8644000244140625, + "Acc.refrigerator": 0.8247000122070313, + "Acc.grandstand": 0.7348999786376953, + "Acc.path": 0.3697999954223633, + "Acc.stairs": 0.30020000457763674, + "Acc.runway": 0.9622000122070312, + "Acc.case": 0.6758999633789062, + "Acc.pool table": 0.9712000274658203, + "Acc.pillow": 0.6127999877929687, + "Acc.screen door": 0.7547000122070312, + "Acc.stairway": 0.36630001068115237, + "Acc.river": 0.4531999969482422, + "Acc.bridge": 0.8051000213623047, + "Acc.bookcase": 0.49259998321533205, + "Acc.blind": 0.44159999847412107, + "Acc.coffee table": 0.8583000183105469, + "Acc.toilet": 0.9006999969482422, + "Acc.flower": 0.5325, + "Acc.book": 0.6127999877929687, + "Acc.hill": 0.14800000190734863, + "Acc.bench": 0.4988999938964844, + "Acc.countertop": 0.6840000152587891, + "Acc.stove": 0.8336000061035156, + "Acc.palm": 0.7036000061035156, + "Acc.kitchen island": 0.7369000244140625, + "Acc.computer": 0.8856999969482422, + "Acc.swivel chair": 0.575999984741211, + "Acc.boat": 0.847699966430664, + "Acc.bar": 0.732699966430664, + "Acc.arcade machine": 0.7581999969482421, + "Acc.hovel": 0.5427000045776367, + "Acc.bus": 0.9522000122070312, + "Acc.towel": 0.7597000122070312, + "Acc.light": 0.46279998779296877, + "Acc.truck": 0.43340000152587893, + "Acc.tower": 0.40240001678466797, + "Acc.chandelier": 0.7618000030517578, + "Acc.awning": 0.35639999389648436, + "Acc.streetlight": 0.24819999694824219, + "Acc.booth": 0.5729999923706055, + "Acc.television receiver": 0.7273999786376953, + "Acc.airplane": 0.6527999877929688, + "Acc.dirt track": 0.12770000457763672, + "Acc.apparel": 0.4341999816894531, + "Acc.pole": 0.1934000015258789, + "Acc.land": 0.10989999771118164, + "Acc.bannister": 0.11350000381469727, + "Acc.escalator": 0.2972999954223633, + "Acc.ottoman": 0.6429000091552735, + "Acc.bottle": 0.23040000915527345, + "Acc.buffet": 0.767300033569336, + "Acc.poster": 0.3788999938964844, + "Acc.stage": 0.42209999084472655, + "Acc.van": 0.47470001220703123, + "Acc.ship": 0.9523000335693359, + "Acc.fountain": 0.20690000534057618, + "Acc.conveyer belt": 0.9277999877929688, + "Acc.canopy": 0.34970001220703123, + "Acc.washer": 0.7145999908447266, + "Acc.plaything": 0.37740001678466795, + "Acc.swimming pool": 0.8023999786376953, + "Acc.stool": 0.4868000030517578, + "Acc.barrel": 0.6481999969482422, + "Acc.basket": 0.28549999237060547, + "Acc.waterfall": 0.7083999633789062, + "Acc.tent": 0.9945999908447266, + "Acc.bag": 0.1697999954223633, + "Acc.minibike": 0.6826000213623047, + "Acc.cradle": 0.9719999694824218, + "Acc.oven": 0.4441999816894531, + "Acc.ball": 0.5883000183105469, + "Acc.food": 0.591500015258789, + "Acc.step": 0.13789999961853028, + "Acc.tank": 0.6393000030517578, + "Acc.trade name": 0.22280000686645507, + "Acc.microwave": 0.4136000061035156, + "Acc.pot": 0.4284000015258789, + "Acc.animal": 0.5816999816894531, + "Acc.bicycle": 0.6951000213623046, + "Acc.lake": 0.7766999816894531, + "Acc.dishwasher": 0.5509000015258789, + "Acc.screen": 0.9083999633789063, + "Acc.blanket": 0.144399995803833, + "Acc.sculpture": 0.6413999938964844, + "Acc.hood": 0.577599983215332, + "Acc.sconce": 0.33119998931884764, + "Acc.vase": 0.45529998779296876, + "Acc.traffic light": 0.45470001220703127, + "Acc.tray": 0.04369999885559082, + "Acc.ashcan": 0.4818000030517578, + "Acc.fan": 0.7037999725341797, + "Acc.pier": 0.5, + "Acc.crt screen": 0.10130000114440918, + "Acc.plate": 0.619900016784668, + "Acc.monitor": 0.139399995803833, + "Acc.bulletin board": 0.547599983215332, + "Acc.shower": 0.04289999961853028, + "Acc.radiator": 0.6390999984741211, + "Acc.glass": 0.08029999732971191, + "Acc.clock": 0.3268999862670898, + "Acc.flag": 0.34040000915527346 + } + }, + "48": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8169, + "mIoU": 0.4571, + "mAcc": 0.5938, + "IoU.wall": 0.7563999938964844, + "IoU.building": 0.8209999847412109, + "IoU.sky": 0.9375, + "IoU.floor": 0.8052999877929687, + "IoU.tree": 0.730999984741211, + "IoU.ceiling": 0.822699966430664, + "IoU.road": 0.8201000213623046, + "IoU.bed ": 0.870999984741211, + "IoU.windowpane": 0.6113999938964844, + "IoU.grass": 0.6372000122070313, + "IoU.cabinet": 0.5961000061035157, + "IoU.sidewalk": 0.6304999923706055, + "IoU.person": 0.7773999786376953, + "IoU.earth": 0.3388999938964844, + "IoU.door": 0.45439998626708983, + "IoU.table": 0.5395999908447265, + "IoU.mountain": 0.5606999969482422, + "IoU.plant": 0.5025999832153321, + "IoU.curtain": 0.7262999725341797, + "IoU.chair": 0.5070000076293946, + "IoU.car": 0.8061000061035156, + "IoU.water": 0.5636999893188477, + "IoU.painting": 0.6666999816894531, + "IoU.sofa": 0.6231999969482422, + "IoU.shelf": 0.43389999389648437, + "IoU.house": 0.43150001525878906, + "IoU.sea": 0.6211999893188477, + "IoU.mirror": 0.6518000030517578, + "IoU.rug": 0.648499984741211, + "IoU.field": 0.2528000068664551, + "IoU.armchair": 0.40299999237060546, + "IoU.seat": 0.5947999954223633, + "IoU.fence": 0.31059999465942384, + "IoU.desk": 0.42080001831054686, + "IoU.rock": 0.42770000457763674, + "IoU.wardrobe": 0.5488000106811524, + "IoU.lamp": 0.5220999908447266, + "IoU.bathtub": 0.7416999816894532, + "IoU.railing": 0.3146999931335449, + "IoU.cushion": 0.5322000122070313, + "IoU.base": 0.28639999389648435, + "IoU.box": 0.23299999237060548, + "IoU.column": 0.44959999084472657, + "IoU.signboard": 0.3352000045776367, + "IoU.chest of drawers": 0.3715999984741211, + "IoU.counter": 0.258700008392334, + "IoU.sand": 0.3725, + "IoU.sink": 0.6756999969482422, + "IoU.skyscraper": 0.5947999954223633, + "IoU.fireplace": 0.694800033569336, + "IoU.refrigerator": 0.6861000061035156, + "IoU.grandstand": 0.4368000030517578, + "IoU.path": 0.24819999694824219, + "IoU.stairs": 0.2588999938964844, + "IoU.runway": 0.685, + "IoU.case": 0.5313000106811523, + "IoU.pool table": 0.9141999816894532, + "IoU.pillow": 0.5518999862670898, + "IoU.screen door": 0.6659999847412109, + "IoU.stairway": 0.2947999954223633, + "IoU.river": 0.23989999771118165, + "IoU.bridge": 0.6270000076293946, + "IoU.bookcase": 0.3071999931335449, + "IoU.blind": 0.4370000076293945, + "IoU.coffee table": 0.5220000076293946, + "IoU.toilet": 0.7869999694824219, + "IoU.flower": 0.32459999084472657, + "IoU.book": 0.4261000061035156, + "IoU.hill": 0.08939999580383301, + "IoU.bench": 0.4034000015258789, + "IoU.countertop": 0.5843999862670899, + "IoU.stove": 0.6833000183105469, + "IoU.palm": 0.48150001525878905, + "IoU.kitchen island": 0.31579999923706054, + "IoU.computer": 0.7073000335693359, + "IoU.swivel chair": 0.4381999969482422, + "IoU.boat": 0.6897000122070313, + "IoU.bar": 0.532400016784668, + "IoU.arcade machine": 0.6456999969482422, + "IoU.hovel": 0.5372000122070313, + "IoU.bus": 0.7212000274658203, + "IoU.towel": 0.5879999923706055, + "IoU.light": 0.3858000183105469, + "IoU.truck": 0.22489999771118163, + "IoU.tower": 0.33849998474121096, + "IoU.chandelier": 0.5904000091552735, + "IoU.awning": 0.3252000045776367, + "IoU.streetlight": 0.18200000762939453, + "IoU.booth": 0.39310001373291015, + "IoU.television receiver": 0.609900016784668, + "IoU.airplane": 0.5606000137329101, + "IoU.dirt track": 0.27110000610351564, + "IoU.apparel": 0.32099998474121094, + "IoU.pole": 0.15420000076293947, + "IoU.land": 0.04519999980926514, + "IoU.bannister": 0.07769999980926513, + "IoU.escalator": 0.28219999313354494, + "IoU.ottoman": 0.49279998779296874, + "IoU.bottle": 0.35009998321533203, + "IoU.buffet": 0.5918000030517578, + "IoU.poster": 0.2702000045776367, + "IoU.stage": 0.145600004196167, + "IoU.van": 0.3579000091552734, + "IoU.ship": 0.6869000244140625, + "IoU.fountain": 0.18420000076293946, + "IoU.conveyer belt": 0.6929000091552734, + "IoU.canopy": 0.21549999237060546, + "IoU.washer": 0.6947000122070313, + "IoU.plaything": 0.24209999084472655, + "IoU.swimming pool": 0.49990001678466794, + "IoU.stool": 0.2747999954223633, + "IoU.barrel": 0.23690000534057618, + "IoU.basket": 0.21260000228881837, + "IoU.waterfall": 0.5383000183105469, + "IoU.tent": 0.8708999633789063, + "IoU.bag": 0.08949999809265137, + "IoU.minibike": 0.5002999877929688, + "IoU.cradle": 0.8002999877929687, + "IoU.oven": 0.16020000457763672, + "IoU.ball": 0.4211000061035156, + "IoU.food": 0.513400001525879, + "IoU.step": 0.11970000267028809, + "IoU.tank": 0.509900016784668, + "IoU.trade name": 0.2584000015258789, + "IoU.microwave": 0.35369998931884766, + "IoU.pot": 0.38599998474121094, + "IoU.animal": 0.5729999923706055, + "IoU.bicycle": 0.4468000030517578, + "IoU.lake": 0.5850999832153321, + "IoU.dishwasher": 0.4766999816894531, + "IoU.screen": 0.6231999969482422, + "IoU.blanket": 0.13579999923706054, + "IoU.sculpture": 0.39189998626708983, + "IoU.hood": 0.46060001373291015, + "IoU.sconce": 0.2840999984741211, + "IoU.vase": 0.2770999908447266, + "IoU.traffic light": 0.2352000045776367, + "IoU.tray": 0.03869999885559082, + "IoU.ashcan": 0.28420000076293944, + "IoU.fan": 0.4633000183105469, + "IoU.pier": 0.21639999389648437, + "IoU.crt screen": 0.025199999809265138, + "IoU.plate": 0.4518999862670898, + "IoU.monitor": 0.025399999618530275, + "IoU.bulletin board": 0.3527000045776367, + "IoU.shower": 0.00800000011920929, + "IoU.radiator": 0.5297999954223633, + "IoU.glass": 0.07789999961853028, + "IoU.clock": 0.24420000076293946, + "IoU.flag": 0.32419998168945313, + "Acc.wall": 0.8656999969482422, + "Acc.building": 0.9229000091552735, + "Acc.sky": 0.9588999938964844, + "Acc.floor": 0.8852999877929687, + "Acc.tree": 0.8898999786376953, + "Acc.ceiling": 0.8831999969482421, + "Acc.road": 0.8838999938964843, + "Acc.bed ": 0.9588999938964844, + "Acc.windowpane": 0.7644000244140625, + "Acc.grass": 0.8061000061035156, + "Acc.cabinet": 0.7425, + "Acc.sidewalk": 0.8044999694824219, + "Acc.person": 0.9165000152587891, + "Acc.earth": 0.4606999969482422, + "Acc.door": 0.599900016784668, + "Acc.table": 0.6923999786376953, + "Acc.mountain": 0.7415000152587891, + "Acc.plant": 0.5970999908447265, + "Acc.curtain": 0.856500015258789, + "Acc.chair": 0.6673999786376953, + "Acc.car": 0.8997000122070312, + "Acc.water": 0.6812999725341797, + "Acc.painting": 0.8586000061035156, + "Acc.sofa": 0.8038999938964844, + "Acc.shelf": 0.5902999877929688, + "Acc.house": 0.549000015258789, + "Acc.sea": 0.8341999816894531, + "Acc.mirror": 0.7594000244140625, + "Acc.rug": 0.7926999664306641, + "Acc.field": 0.41470001220703123, + "Acc.armchair": 0.5818999862670898, + "Acc.seat": 0.7991000366210937, + "Acc.fence": 0.43400001525878906, + "Acc.desk": 0.7255999755859375, + "Acc.rock": 0.589900016784668, + "Acc.wardrobe": 0.7343000030517578, + "Acc.lamp": 0.7190000152587891, + "Acc.bathtub": 0.7951000213623047, + "Acc.railing": 0.43849998474121094, + "Acc.cushion": 0.6683999633789063, + "Acc.base": 0.5386999893188477, + "Acc.box": 0.2940999984741211, + "Acc.column": 0.5709000015258789, + "Acc.signboard": 0.4284999847412109, + "Acc.chest of drawers": 0.5715999984741211, + "Acc.counter": 0.34369998931884765, + "Acc.sand": 0.6024000167846679, + "Acc.sink": 0.7529000091552734, + "Acc.skyscraper": 0.7423000335693359, + "Acc.fireplace": 0.8813999938964844, + "Acc.refrigerator": 0.8480000305175781, + "Acc.grandstand": 0.72, + "Acc.path": 0.38110000610351563, + "Acc.stairs": 0.3708000183105469, + "Acc.runway": 0.9037000274658203, + "Acc.case": 0.6916000366210937, + "Acc.pool table": 0.9730999755859375, + "Acc.pillow": 0.6512000274658203, + "Acc.screen door": 0.8004000091552734, + "Acc.stairway": 0.414900016784668, + "Acc.river": 0.49740001678466794, + "Acc.bridge": 0.8383000183105469, + "Acc.bookcase": 0.47060001373291016, + "Acc.blind": 0.49779998779296875, + "Acc.coffee table": 0.8441000366210938, + "Acc.toilet": 0.9054000091552734, + "Acc.flower": 0.5034999847412109, + "Acc.book": 0.630099983215332, + "Acc.hill": 0.1713999938964844, + "Acc.bench": 0.4993000030517578, + "Acc.countertop": 0.7440000152587891, + "Acc.stove": 0.8151000213623046, + "Acc.palm": 0.701500015258789, + "Acc.kitchen island": 0.6716999816894531, + "Acc.computer": 0.8812999725341797, + "Acc.swivel chair": 0.5586000061035157, + "Acc.boat": 0.8577999877929687, + "Acc.bar": 0.71, + "Acc.arcade machine": 0.7944999694824219, + "Acc.hovel": 0.620999984741211, + "Acc.bus": 0.9094000244140625, + "Acc.towel": 0.7593000030517578, + "Acc.light": 0.4734999847412109, + "Acc.truck": 0.33860000610351565, + "Acc.tower": 0.534000015258789, + "Acc.chandelier": 0.7473000335693359, + "Acc.awning": 0.39540000915527346, + "Acc.streetlight": 0.25959999084472657, + "Acc.booth": 0.49970001220703125, + "Acc.television receiver": 0.75, + "Acc.airplane": 0.6552999877929687, + "Acc.dirt track": 0.4961999893188477, + "Acc.apparel": 0.46529998779296877, + "Acc.pole": 0.19170000076293944, + "Acc.land": 0.08939999580383301, + "Acc.bannister": 0.12489999771118164, + "Acc.escalator": 0.32970001220703127, + "Acc.ottoman": 0.6702999877929687, + "Acc.bottle": 0.5411000061035156, + "Acc.buffet": 0.7533000183105468, + "Acc.poster": 0.31829999923706054, + "Acc.stage": 0.37860000610351563, + "Acc.van": 0.43040000915527343, + "Acc.ship": 0.7083000183105469, + "Acc.fountain": 0.2125, + "Acc.conveyer belt": 0.9268000030517578, + "Acc.canopy": 0.2996999931335449, + "Acc.washer": 0.7193000030517578, + "Acc.plaything": 0.37810001373291013, + "Acc.swimming pool": 0.8283000183105469, + "Acc.stool": 0.41959999084472654, + "Acc.barrel": 0.6480000305175782, + "Acc.basket": 0.28149999618530275, + "Acc.waterfall": 0.6252999877929688, + "Acc.tent": 0.9933999633789062, + "Acc.bag": 0.09819999694824219, + "Acc.minibike": 0.6059000015258789, + "Acc.cradle": 0.9722000122070312, + "Acc.oven": 0.4290999984741211, + "Acc.ball": 0.507400016784668, + "Acc.food": 0.5931999969482422, + "Acc.step": 0.1452999973297119, + "Acc.tank": 0.614900016784668, + "Acc.trade name": 0.296299991607666, + "Acc.microwave": 0.40119998931884765, + "Acc.pot": 0.4613999938964844, + "Acc.animal": 0.6168999862670899, + "Acc.bicycle": 0.727699966430664, + "Acc.lake": 0.7469999694824219, + "Acc.dishwasher": 0.610999984741211, + "Acc.screen": 0.9148000335693359, + "Acc.blanket": 0.1543000030517578, + "Acc.sculpture": 0.5818999862670898, + "Acc.hood": 0.5522000122070313, + "Acc.sconce": 0.3845000076293945, + "Acc.vase": 0.43439998626708987, + "Acc.traffic light": 0.48650001525878905, + "Acc.tray": 0.055500001907348634, + "Acc.ashcan": 0.37849998474121094, + "Acc.fan": 0.7337000274658203, + "Acc.pier": 0.4929999923706055, + "Acc.crt screen": 0.0771999979019165, + "Acc.plate": 0.5986000061035156, + "Acc.monitor": 0.027899999618530274, + "Acc.bulletin board": 0.5540000152587891, + "Acc.shower": 0.018700000047683716, + "Acc.radiator": 0.6363000106811524, + "Acc.glass": 0.08680000305175781, + "Acc.clock": 0.2925, + "Acc.flag": 0.36630001068115237 + } + }, + "49": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8154, + "mIoU": 0.4482, + "mAcc": 0.5796, + "IoU.wall": 0.7531999969482421, + "IoU.building": 0.8168000030517578, + "IoU.sky": 0.9387999725341797, + "IoU.floor": 0.8065000152587891, + "IoU.tree": 0.7319000244140625, + "IoU.ceiling": 0.8225, + "IoU.road": 0.8168000030517578, + "IoU.bed ": 0.8686000061035156, + "IoU.windowpane": 0.6102000045776367, + "IoU.grass": 0.6587000274658203, + "IoU.cabinet": 0.5854999923706055, + "IoU.sidewalk": 0.6218000030517579, + "IoU.person": 0.7819000244140625, + "IoU.earth": 0.34450000762939453, + "IoU.door": 0.477400016784668, + "IoU.table": 0.5459000015258789, + "IoU.mountain": 0.5845999908447266, + "IoU.plant": 0.4958000183105469, + "IoU.curtain": 0.7233999633789062, + "IoU.chair": 0.5138000106811523, + "IoU.car": 0.7944999694824219, + "IoU.water": 0.5231999969482422, + "IoU.painting": 0.6744000244140625, + "IoU.sofa": 0.6334000015258789, + "IoU.shelf": 0.4140999984741211, + "IoU.house": 0.4377000045776367, + "IoU.sea": 0.5552000045776367, + "IoU.mirror": 0.6462999725341797, + "IoU.rug": 0.6687000274658204, + "IoU.field": 0.27540000915527346, + "IoU.armchair": 0.43150001525878906, + "IoU.seat": 0.6154000091552735, + "IoU.fence": 0.3981999969482422, + "IoU.desk": 0.39490001678466796, + "IoU.rock": 0.41869998931884767, + "IoU.wardrobe": 0.47319999694824216, + "IoU.lamp": 0.5215999984741211, + "IoU.bathtub": 0.7261000061035157, + "IoU.railing": 0.315, + "IoU.cushion": 0.5293000030517578, + "IoU.base": 0.2893000030517578, + "IoU.box": 0.2209000015258789, + "IoU.column": 0.45430000305175783, + "IoU.signboard": 0.3365999984741211, + "IoU.chest of drawers": 0.3543000030517578, + "IoU.counter": 0.24739999771118165, + "IoU.sand": 0.38619998931884764, + "IoU.sink": 0.625999984741211, + "IoU.skyscraper": 0.5029999923706054, + "IoU.fireplace": 0.6908000183105468, + "IoU.refrigerator": 0.7234999847412109, + "IoU.grandstand": 0.37759998321533206, + "IoU.path": 0.24979999542236328, + "IoU.stairs": 0.2721999931335449, + "IoU.runway": 0.7344000244140625, + "IoU.case": 0.47650001525878904, + "IoU.pool table": 0.8862000274658203, + "IoU.pillow": 0.5545000076293946, + "IoU.screen door": 0.6229999923706054, + "IoU.stairway": 0.30559999465942383, + "IoU.river": 0.15550000190734864, + "IoU.bridge": 0.6295999908447265, + "IoU.bookcase": 0.2955999946594238, + "IoU.blind": 0.3686000061035156, + "IoU.coffee table": 0.5138999938964843, + "IoU.toilet": 0.7766000366210938, + "IoU.flower": 0.327599983215332, + "IoU.book": 0.41950000762939454, + "IoU.hill": 0.10090000152587891, + "IoU.bench": 0.46110000610351565, + "IoU.countertop": 0.5866999816894531, + "IoU.stove": 0.6787000274658204, + "IoU.palm": 0.4816999816894531, + "IoU.kitchen island": 0.3672999954223633, + "IoU.computer": 0.6190999984741211, + "IoU.swivel chair": 0.4470000076293945, + "IoU.boat": 0.6873000335693359, + "IoU.bar": 0.4897999954223633, + "IoU.arcade machine": 0.4363999938964844, + "IoU.hovel": 0.45169998168945313, + "IoU.bus": 0.6958000183105468, + "IoU.towel": 0.5631000137329102, + "IoU.light": 0.4127999877929687, + "IoU.truck": 0.1840999984741211, + "IoU.tower": 0.3666999816894531, + "IoU.chandelier": 0.5879000091552734, + "IoU.awning": 0.3436000061035156, + "IoU.streetlight": 0.183700008392334, + "IoU.booth": 0.4009000015258789, + "IoU.television receiver": 0.5929999923706055, + "IoU.airplane": 0.5609999847412109, + "IoU.dirt track": 0.39240001678466796, + "IoU.apparel": 0.271299991607666, + "IoU.pole": 0.20709999084472655, + "IoU.land": 0.03720000028610229, + "IoU.bannister": 0.09550000190734863, + "IoU.escalator": 0.3865999984741211, + "IoU.ottoman": 0.47400001525878904, + "IoU.bottle": 0.16989999771118164, + "IoU.buffet": 0.5358000183105469, + "IoU.poster": 0.24950000762939453, + "IoU.stage": 0.11020000457763672, + "IoU.van": 0.35459999084472654, + "IoU.ship": 0.6343000030517578, + "IoU.fountain": 0.168799991607666, + "IoU.conveyer belt": 0.6277999877929688, + "IoU.canopy": 0.2128000068664551, + "IoU.washer": 0.655, + "IoU.plaything": 0.255, + "IoU.swimming pool": 0.5827000045776367, + "IoU.stool": 0.2531999969482422, + "IoU.barrel": 0.09930000305175782, + "IoU.basket": 0.20299999237060548, + "IoU.waterfall": 0.5420999908447266, + "IoU.tent": 0.8662999725341797, + "IoU.bag": 0.08989999771118164, + "IoU.minibike": 0.5633000183105469, + "IoU.cradle": 0.7495999908447266, + "IoU.oven": 0.15199999809265136, + "IoU.ball": 0.3895999908447266, + "IoU.food": 0.5522999954223633, + "IoU.step": 0.138100004196167, + "IoU.tank": 0.49990001678466794, + "IoU.trade name": 0.20420000076293945, + "IoU.microwave": 0.33189998626708983, + "IoU.pot": 0.34709999084472654, + "IoU.animal": 0.5672000122070312, + "IoU.bicycle": 0.46919998168945315, + "IoU.lake": 0.31559999465942384, + "IoU.dishwasher": 0.45439998626708983, + "IoU.screen": 0.6944000244140625, + "IoU.blanket": 0.09539999961853027, + "IoU.sculpture": 0.44479999542236326, + "IoU.hood": 0.47450000762939454, + "IoU.sconce": 0.2827000045776367, + "IoU.vase": 0.24489999771118165, + "IoU.traffic light": 0.25209999084472656, + "IoU.tray": 0.05190000057220459, + "IoU.ashcan": 0.27549999237060546, + "IoU.fan": 0.43509998321533205, + "IoU.pier": 0.27360000610351565, + "IoU.crt screen": 0.024600000381469728, + "IoU.plate": 0.42279998779296873, + "IoU.monitor": 0.023299999237060547, + "IoU.bulletin board": 0.3711000061035156, + "IoU.shower": 0.02619999885559082, + "IoU.radiator": 0.5161999893188477, + "IoU.glass": 0.06679999828338623, + "IoU.clock": 0.20110000610351564, + "IoU.flag": 0.3931999969482422, + "Acc.wall": 0.859800033569336, + "Acc.building": 0.923499984741211, + "Acc.sky": 0.9633000183105469, + "Acc.floor": 0.8886000061035156, + "Acc.tree": 0.8795999908447265, + "Acc.ceiling": 0.8795999908447265, + "Acc.road": 0.8812999725341797, + "Acc.bed ": 0.9605000305175782, + "Acc.windowpane": 0.7688999938964843, + "Acc.grass": 0.8269000244140625, + "Acc.cabinet": 0.7191999816894531, + "Acc.sidewalk": 0.8193000030517578, + "Acc.person": 0.9161000061035156, + "Acc.earth": 0.46680000305175784, + "Acc.door": 0.6461000061035156, + "Acc.table": 0.6983000183105469, + "Acc.mountain": 0.7423000335693359, + "Acc.plant": 0.5884000015258789, + "Acc.curtain": 0.8651999664306641, + "Acc.chair": 0.6655999755859375, + "Acc.car": 0.9052999877929687, + "Acc.water": 0.6598999786376953, + "Acc.painting": 0.8722000122070312, + "Acc.sofa": 0.7831999969482422, + "Acc.shelf": 0.5879999923706055, + "Acc.house": 0.5422999954223633, + "Acc.sea": 0.7943000030517579, + "Acc.mirror": 0.7751000213623047, + "Acc.rug": 0.780199966430664, + "Acc.field": 0.44459999084472657, + "Acc.armchair": 0.6643000030517578, + "Acc.seat": 0.8095999908447266, + "Acc.fence": 0.5795000076293946, + "Acc.desk": 0.729800033569336, + "Acc.rock": 0.6343999862670898, + "Acc.wardrobe": 0.6819000244140625, + "Acc.lamp": 0.6963999938964843, + "Acc.bathtub": 0.7959999847412109, + "Acc.railing": 0.4465999984741211, + "Acc.cushion": 0.6552999877929687, + "Acc.base": 0.5347000122070312, + "Acc.box": 0.28549999237060547, + "Acc.column": 0.5763999938964843, + "Acc.signboard": 0.4493000030517578, + "Acc.chest of drawers": 0.5170000076293946, + "Acc.counter": 0.34369998931884765, + "Acc.sand": 0.5722000122070312, + "Acc.sink": 0.7343000030517578, + "Acc.skyscraper": 0.6193999862670898, + "Acc.fireplace": 0.89, + "Acc.refrigerator": 0.8576000213623047, + "Acc.grandstand": 0.6880999755859375, + "Acc.path": 0.357400016784668, + "Acc.stairs": 0.37689998626708987, + "Acc.runway": 0.9101000213623047, + "Acc.case": 0.6013999938964844, + "Acc.pool table": 0.9687000274658203, + "Acc.pillow": 0.6483999633789063, + "Acc.screen door": 0.7430000305175781, + "Acc.stairway": 0.44799999237060545, + "Acc.river": 0.29620000839233396, + "Acc.bridge": 0.821500015258789, + "Acc.bookcase": 0.4793000030517578, + "Acc.blind": 0.40919998168945315, + "Acc.coffee table": 0.8583000183105469, + "Acc.toilet": 0.8941000366210937, + "Acc.flower": 0.5506999969482422, + "Acc.book": 0.6224000167846679, + "Acc.hill": 0.20969999313354493, + "Acc.bench": 0.5388000106811524, + "Acc.countertop": 0.7290000152587891, + "Acc.stove": 0.8231999969482422, + "Acc.palm": 0.7006999969482421, + "Acc.kitchen island": 0.7361000061035157, + "Acc.computer": 0.7879000091552735, + "Acc.swivel chair": 0.6120000076293945, + "Acc.boat": 0.8351999664306641, + "Acc.bar": 0.6680000305175782, + "Acc.arcade machine": 0.5063000106811524, + "Acc.hovel": 0.518400001525879, + "Acc.bus": 0.9066999816894531, + "Acc.towel": 0.724800033569336, + "Acc.light": 0.5166999816894531, + "Acc.truck": 0.2713999938964844, + "Acc.tower": 0.5661000061035156, + "Acc.chandelier": 0.7937000274658204, + "Acc.awning": 0.4459000015258789, + "Acc.streetlight": 0.25329999923706054, + "Acc.booth": 0.5536000061035157, + "Acc.television receiver": 0.7530000305175781, + "Acc.airplane": 0.6529000091552735, + "Acc.dirt track": 0.4772999954223633, + "Acc.apparel": 0.3963999938964844, + "Acc.pole": 0.26059999465942385, + "Acc.land": 0.06920000076293946, + "Acc.bannister": 0.14050000190734863, + "Acc.escalator": 0.48619998931884767, + "Acc.ottoman": 0.6715000152587891, + "Acc.bottle": 0.20629999160766602, + "Acc.buffet": 0.699800033569336, + "Acc.poster": 0.29620000839233396, + "Acc.stage": 0.3490999984741211, + "Acc.van": 0.41569999694824217, + "Acc.ship": 0.6769999694824219, + "Acc.fountain": 0.17780000686645508, + "Acc.conveyer belt": 0.8008999633789062, + "Acc.canopy": 0.2836000061035156, + "Acc.washer": 0.6775, + "Acc.plaything": 0.39560001373291015, + "Acc.swimming pool": 0.8030999755859375, + "Acc.stool": 0.37209999084472656, + "Acc.barrel": 0.3163999938964844, + "Acc.basket": 0.25379999160766603, + "Acc.waterfall": 0.6193000030517578, + "Acc.tent": 0.9943000030517578, + "Acc.bag": 0.10010000228881837, + "Acc.minibike": 0.6762999725341797, + "Acc.cradle": 0.9648999786376953, + "Acc.oven": 0.4068000030517578, + "Acc.ball": 0.515, + "Acc.food": 0.6345000076293945, + "Acc.step": 0.16489999771118163, + "Acc.tank": 0.5754999923706055, + "Acc.trade name": 0.22870000839233398, + "Acc.microwave": 0.36779998779296874, + "Acc.pot": 0.4152000045776367, + "Acc.animal": 0.6284999847412109, + "Acc.bicycle": 0.6648000335693359, + "Acc.lake": 0.49340000152587893, + "Acc.dishwasher": 0.5952000045776367, + "Acc.screen": 0.9065000152587891, + "Acc.blanket": 0.10819999694824219, + "Acc.sculpture": 0.6377000045776368, + "Acc.hood": 0.5241999816894531, + "Acc.sconce": 0.3606999969482422, + "Acc.vase": 0.39630001068115234, + "Acc.traffic light": 0.4463000106811523, + "Acc.tray": 0.08420000076293946, + "Acc.ashcan": 0.41869998931884767, + "Acc.fan": 0.7162999725341797, + "Acc.pier": 0.45599998474121095, + "Acc.crt screen": 0.07260000228881835, + "Acc.plate": 0.5738000106811524, + "Acc.monitor": 0.03059999942779541, + "Acc.bulletin board": 0.5608000183105468, + "Acc.shower": 0.05139999866485596, + "Acc.radiator": 0.6108000183105469, + "Acc.glass": 0.07380000114440918, + "Acc.clock": 0.22790000915527345, + "Acc.flag": 0.4527000045776367 + } + }, + "50": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8173999999999999, + "mIoU": 0.4571, + "mAcc": 0.5905, + "IoU.wall": 0.7579000091552734, + "IoU.building": 0.8190000152587891, + "IoU.sky": 0.9388999938964844, + "IoU.floor": 0.8062999725341797, + "IoU.tree": 0.7376000213623047, + "IoU.ceiling": 0.829800033569336, + "IoU.road": 0.8162000274658203, + "IoU.bed ": 0.8658000183105469, + "IoU.windowpane": 0.6084999847412109, + "IoU.grass": 0.6494999694824218, + "IoU.cabinet": 0.582599983215332, + "IoU.sidewalk": 0.629000015258789, + "IoU.person": 0.7706999969482422, + "IoU.earth": 0.3356999969482422, + "IoU.door": 0.4859000015258789, + "IoU.table": 0.5616999816894531, + "IoU.mountain": 0.5740000152587891, + "IoU.plant": 0.4986000061035156, + "IoU.curtain": 0.7241000366210938, + "IoU.chair": 0.5193000030517578, + "IoU.car": 0.797300033569336, + "IoU.water": 0.5366999816894531, + "IoU.painting": 0.6811000061035156, + "IoU.sofa": 0.6265999984741211, + "IoU.shelf": 0.42060001373291017, + "IoU.house": 0.47319999694824216, + "IoU.sea": 0.6370999908447266, + "IoU.mirror": 0.6555000305175781, + "IoU.rug": 0.668499984741211, + "IoU.field": 0.2809000015258789, + "IoU.armchair": 0.44049999237060544, + "IoU.seat": 0.604900016784668, + "IoU.fence": 0.4040999984741211, + "IoU.desk": 0.42380001068115236, + "IoU.rock": 0.3963999938964844, + "IoU.wardrobe": 0.5075, + "IoU.lamp": 0.5293999862670898, + "IoU.bathtub": 0.7166999816894531, + "IoU.railing": 0.32209999084472657, + "IoU.cushion": 0.5158000183105469, + "IoU.base": 0.2669000053405762, + "IoU.box": 0.21850000381469725, + "IoU.column": 0.45299999237060545, + "IoU.signboard": 0.34439998626708984, + "IoU.chest of drawers": 0.3547999954223633, + "IoU.counter": 0.27030000686645506, + "IoU.sand": 0.36939998626708986, + "IoU.sink": 0.6458999633789062, + "IoU.skyscraper": 0.5075999832153321, + "IoU.fireplace": 0.7105000305175782, + "IoU.refrigerator": 0.7301999664306641, + "IoU.grandstand": 0.4015999984741211, + "IoU.path": 0.2272999954223633, + "IoU.stairs": 0.2618000030517578, + "IoU.runway": 0.6875, + "IoU.case": 0.5045999908447265, + "IoU.pool table": 0.8798999786376953, + "IoU.pillow": 0.5729999923706055, + "IoU.screen door": 0.6222000122070312, + "IoU.stairway": 0.2780999946594238, + "IoU.river": 0.20149999618530273, + "IoU.bridge": 0.6812999725341797, + "IoU.bookcase": 0.3160000038146973, + "IoU.blind": 0.4097999954223633, + "IoU.coffee table": 0.5740999984741211, + "IoU.toilet": 0.8055000305175781, + "IoU.flower": 0.3313999938964844, + "IoU.book": 0.41630001068115235, + "IoU.hill": 0.10220000267028809, + "IoU.bench": 0.41049999237060547, + "IoU.countertop": 0.574900016784668, + "IoU.stove": 0.6726000213623047, + "IoU.palm": 0.4956000137329102, + "IoU.kitchen island": 0.37270000457763675, + "IoU.computer": 0.6972000122070312, + "IoU.swivel chair": 0.4584000015258789, + "IoU.boat": 0.6994000244140625, + "IoU.bar": 0.5138000106811523, + "IoU.arcade machine": 0.43970001220703125, + "IoU.hovel": 0.4329999923706055, + "IoU.bus": 0.6444999694824218, + "IoU.towel": 0.5783000183105469, + "IoU.light": 0.3877000045776367, + "IoU.truck": 0.233799991607666, + "IoU.tower": 0.2645999908447266, + "IoU.chandelier": 0.6020000076293945, + "IoU.awning": 0.2820999908447266, + "IoU.streetlight": 0.1946999931335449, + "IoU.booth": 0.32299999237060545, + "IoU.television receiver": 0.5927000045776367, + "IoU.airplane": 0.5750999832153321, + "IoU.dirt track": 0.22899999618530273, + "IoU.apparel": 0.2904999923706055, + "IoU.pole": 0.26, + "IoU.land": 0.035799999237060544, + "IoU.bannister": 0.10449999809265137, + "IoU.escalator": 0.4022999954223633, + "IoU.ottoman": 0.5038999938964843, + "IoU.bottle": 0.2740999984741211, + "IoU.buffet": 0.4891999816894531, + "IoU.poster": 0.22799999237060548, + "IoU.stage": 0.12050000190734864, + "IoU.van": 0.3604999923706055, + "IoU.ship": 0.7676999664306641, + "IoU.fountain": 0.176200008392334, + "IoU.conveyer belt": 0.6804000091552734, + "IoU.canopy": 0.19190000534057616, + "IoU.washer": 0.6894000244140625, + "IoU.plaything": 0.23020000457763673, + "IoU.swimming pool": 0.6397000122070312, + "IoU.stool": 0.27510000228881837, + "IoU.barrel": 0.4884999847412109, + "IoU.basket": 0.21450000762939453, + "IoU.waterfall": 0.5670000076293945, + "IoU.tent": 0.8906999969482422, + "IoU.bag": 0.1381999969482422, + "IoU.minibike": 0.518499984741211, + "IoU.cradle": 0.7697000122070312, + "IoU.oven": 0.1472000026702881, + "IoU.ball": 0.3806999969482422, + "IoU.food": 0.5559999847412109, + "IoU.step": 0.15699999809265136, + "IoU.tank": 0.509000015258789, + "IoU.trade name": 0.23530000686645508, + "IoU.microwave": 0.3383000183105469, + "IoU.pot": 0.32369998931884764, + "IoU.animal": 0.5950999832153321, + "IoU.bicycle": 0.48200000762939454, + "IoU.lake": 0.5561000061035156, + "IoU.dishwasher": 0.5252000045776367, + "IoU.screen": 0.5156999969482422, + "IoU.blanket": 0.0653000020980835, + "IoU.sculpture": 0.47880001068115235, + "IoU.hood": 0.47779998779296873, + "IoU.sconce": 0.35450000762939454, + "IoU.vase": 0.25409999847412107, + "IoU.traffic light": 0.24379999160766602, + "IoU.tray": 0.04630000114440918, + "IoU.ashcan": 0.2911000061035156, + "IoU.fan": 0.47509998321533203, + "IoU.pier": 0.2596999931335449, + "IoU.crt screen": 0.0014000000059604645, + "IoU.plate": 0.43099998474121093, + "IoU.monitor": 0.02190000057220459, + "IoU.bulletin board": 0.3591999816894531, + "IoU.shower": 0.00550000011920929, + "IoU.radiator": 0.5358000183105469, + "IoU.glass": 0.084399995803833, + "IoU.clock": 0.23170000076293945, + "IoU.flag": 0.3420999908447266, + "Acc.wall": 0.865999984741211, + "Acc.building": 0.9194999694824219, + "Acc.sky": 0.9636000061035156, + "Acc.floor": 0.8879000091552735, + "Acc.tree": 0.8804000091552734, + "Acc.ceiling": 0.8901000213623047, + "Acc.road": 0.8863999938964844, + "Acc.bed ": 0.959800033569336, + "Acc.windowpane": 0.7620999908447266, + "Acc.grass": 0.7965000152587891, + "Acc.cabinet": 0.7, + "Acc.sidewalk": 0.8062000274658203, + "Acc.person": 0.9294000244140626, + "Acc.earth": 0.4445000076293945, + "Acc.door": 0.6501000213623047, + "Acc.table": 0.7175, + "Acc.mountain": 0.7401000213623047, + "Acc.plant": 0.5968000030517578, + "Acc.curtain": 0.8640000152587891, + "Acc.chair": 0.6726000213623047, + "Acc.car": 0.9118000030517578, + "Acc.water": 0.6630000305175782, + "Acc.painting": 0.8486000061035156, + "Acc.sofa": 0.7863999938964844, + "Acc.shelf": 0.6204999923706055, + "Acc.house": 0.5775, + "Acc.sea": 0.865999984741211, + "Acc.mirror": 0.7820999908447266, + "Acc.rug": 0.7187999725341797, + "Acc.field": 0.48, + "Acc.armchair": 0.6741999816894532, + "Acc.seat": 0.8240000152587891, + "Acc.fence": 0.599900016784668, + "Acc.desk": 0.6927999877929687, + "Acc.rock": 0.6338000106811523, + "Acc.wardrobe": 0.7195999908447266, + "Acc.lamp": 0.6751000213623047, + "Acc.bathtub": 0.7865000152587891, + "Acc.railing": 0.45990001678466796, + "Acc.cushion": 0.6247999954223633, + "Acc.base": 0.429900016784668, + "Acc.box": 0.26360000610351564, + "Acc.column": 0.5938999938964844, + "Acc.signboard": 0.4518000030517578, + "Acc.chest of drawers": 0.5691999816894531, + "Acc.counter": 0.3516999816894531, + "Acc.sand": 0.5168999862670899, + "Acc.sink": 0.7491999816894531, + "Acc.skyscraper": 0.630099983215332, + "Acc.fireplace": 0.8983000183105468, + "Acc.refrigerator": 0.8834999847412109, + "Acc.grandstand": 0.7041000366210938, + "Acc.path": 0.31989999771118166, + "Acc.stairs": 0.3708000183105469, + "Acc.runway": 0.9526000213623047, + "Acc.case": 0.7269999694824218, + "Acc.pool table": 0.9719000244140625, + "Acc.pillow": 0.7163999938964843, + "Acc.screen door": 0.7191999816894531, + "Acc.stairway": 0.39490001678466796, + "Acc.river": 0.44740001678466795, + "Acc.bridge": 0.846500015258789, + "Acc.bookcase": 0.5531000137329102, + "Acc.blind": 0.47049999237060547, + "Acc.coffee table": 0.8223999786376953, + "Acc.toilet": 0.8919999694824219, + "Acc.flower": 0.5356999969482422, + "Acc.book": 0.6022999954223632, + "Acc.hill": 0.22200000762939454, + "Acc.bench": 0.5027000045776367, + "Acc.countertop": 0.7208000183105469, + "Acc.stove": 0.8125, + "Acc.palm": 0.6938999938964844, + "Acc.kitchen island": 0.7755999755859375, + "Acc.computer": 0.8508999633789063, + "Acc.swivel chair": 0.6498000335693359, + "Acc.boat": 0.8216000366210937, + "Acc.bar": 0.7143000030517578, + "Acc.arcade machine": 0.48650001525878905, + "Acc.hovel": 0.4931999969482422, + "Acc.bus": 0.9233000183105469, + "Acc.towel": 0.7325, + "Acc.light": 0.4520999908447266, + "Acc.truck": 0.3477000045776367, + "Acc.tower": 0.42700000762939455, + "Acc.chandelier": 0.7755999755859375, + "Acc.awning": 0.35650001525878905, + "Acc.streetlight": 0.2440999984741211, + "Acc.booth": 0.5095000076293945, + "Acc.television receiver": 0.7647000122070312, + "Acc.airplane": 0.6633000183105469, + "Acc.dirt track": 0.35770000457763673, + "Acc.apparel": 0.4318000030517578, + "Acc.pole": 0.3809000015258789, + "Acc.land": 0.09199999809265137, + "Acc.bannister": 0.16549999237060548, + "Acc.escalator": 0.5213000106811524, + "Acc.ottoman": 0.6643000030517578, + "Acc.bottle": 0.38349998474121094, + "Acc.buffet": 0.6601000213623047, + "Acc.poster": 0.28450000762939454, + "Acc.stage": 0.3845000076293945, + "Acc.van": 0.435, + "Acc.ship": 0.8658999633789063, + "Acc.fountain": 0.19899999618530273, + "Acc.conveyer belt": 0.9126000213623047, + "Acc.canopy": 0.26680000305175783, + "Acc.washer": 0.6943000030517578, + "Acc.plaything": 0.37810001373291013, + "Acc.swimming pool": 0.8444999694824219, + "Acc.stool": 0.4109999847412109, + "Acc.barrel": 0.6195999908447266, + "Acc.basket": 0.30120000839233396, + "Acc.waterfall": 0.655199966430664, + "Acc.tent": 0.9895999908447266, + "Acc.bag": 0.1722999954223633, + "Acc.minibike": 0.6625, + "Acc.cradle": 0.9751999664306641, + "Acc.oven": 0.400099983215332, + "Acc.ball": 0.4727999877929687, + "Acc.food": 0.6520999908447266, + "Acc.step": 0.19360000610351563, + "Acc.tank": 0.6333000183105468, + "Acc.trade name": 0.27790000915527346, + "Acc.microwave": 0.3808000183105469, + "Acc.pot": 0.3838000106811523, + "Acc.animal": 0.6415000152587891, + "Acc.bicycle": 0.7133999633789062, + "Acc.lake": 0.6458000183105469, + "Acc.dishwasher": 0.6291999816894531, + "Acc.screen": 0.9080999755859375, + "Acc.blanket": 0.07429999828338624, + "Acc.sculpture": 0.6415000152587891, + "Acc.hood": 0.5604999923706054, + "Acc.sconce": 0.47689998626708985, + "Acc.vase": 0.39430000305175783, + "Acc.traffic light": 0.42270000457763673, + "Acc.tray": 0.07619999885559083, + "Acc.ashcan": 0.43959999084472656, + "Acc.fan": 0.6756999969482422, + "Acc.pier": 0.43979999542236325, + "Acc.crt screen": 0.004300000071525574, + "Acc.plate": 0.555, + "Acc.monitor": 0.025199999809265138, + "Acc.bulletin board": 0.49029998779296874, + "Acc.shower": 0.026600000858306886, + "Acc.radiator": 0.6656999969482422, + "Acc.glass": 0.09520000457763672, + "Acc.clock": 0.2854000091552734, + "Acc.flag": 0.3740999984741211 + } + }, + "51": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8188, + "mIoU": 0.4518, + "mAcc": 0.5532, + "IoU.wall": 0.7568000030517578, + "IoU.building": 0.8202999877929688, + "IoU.sky": 0.9380000305175781, + "IoU.floor": 0.7994000244140625, + "IoU.tree": 0.7318000030517579, + "IoU.ceiling": 0.8255999755859375, + "IoU.road": 0.8223999786376953, + "IoU.bed ": 0.865999984741211, + "IoU.windowpane": 0.6022000122070312, + "IoU.grass": 0.639000015258789, + "IoU.cabinet": 0.5897999954223633, + "IoU.sidewalk": 0.630099983215332, + "IoU.person": 0.7813999938964844, + "IoU.earth": 0.35009998321533203, + "IoU.door": 0.48029998779296873, + "IoU.table": 0.5618000030517578, + "IoU.mountain": 0.572599983215332, + "IoU.plant": 0.4879999923706055, + "IoU.curtain": 0.7304000091552735, + "IoU.chair": 0.523499984741211, + "IoU.car": 0.814800033569336, + "IoU.water": 0.5604000091552734, + "IoU.painting": 0.6780000305175782, + "IoU.sofa": 0.6179999923706054, + "IoU.shelf": 0.42889999389648437, + "IoU.house": 0.47049999237060547, + "IoU.sea": 0.6480999755859375, + "IoU.mirror": 0.6512999725341797, + "IoU.rug": 0.6483000183105468, + "IoU.field": 0.2564999961853027, + "IoU.armchair": 0.4229000091552734, + "IoU.seat": 0.6215999984741211, + "IoU.fence": 0.40549999237060547, + "IoU.desk": 0.4329999923706055, + "IoU.rock": 0.4343000030517578, + "IoU.wardrobe": 0.47450000762939454, + "IoU.lamp": 0.5363999938964844, + "IoU.bathtub": 0.7098999786376953, + "IoU.railing": 0.30959999084472656, + "IoU.cushion": 0.5175, + "IoU.base": 0.24729999542236328, + "IoU.box": 0.23329999923706055, + "IoU.column": 0.4493000030517578, + "IoU.signboard": 0.32779998779296876, + "IoU.chest of drawers": 0.3465999984741211, + "IoU.counter": 0.22389999389648438, + "IoU.sand": 0.3736999893188477, + "IoU.sink": 0.6637999725341797, + "IoU.skyscraper": 0.4956000137329102, + "IoU.fireplace": 0.7195999908447266, + "IoU.refrigerator": 0.7787999725341797, + "IoU.grandstand": 0.4079000091552734, + "IoU.path": 0.21110000610351562, + "IoU.stairs": 0.2780999946594238, + "IoU.runway": 0.625999984741211, + "IoU.case": 0.5052999877929687, + "IoU.pool table": 0.9173999786376953, + "IoU.pillow": 0.5290999984741211, + "IoU.screen door": 0.6063999938964844, + "IoU.stairway": 0.32540000915527345, + "IoU.river": 0.1906999969482422, + "IoU.bridge": 0.7069999694824218, + "IoU.bookcase": 0.3422000122070312, + "IoU.blind": 0.3995000076293945, + "IoU.coffee table": 0.5947000122070313, + "IoU.toilet": 0.8212999725341796, + "IoU.flower": 0.3079999923706055, + "IoU.book": 0.4172999954223633, + "IoU.hill": 0.08970000267028809, + "IoU.bench": 0.4204999923706055, + "IoU.countertop": 0.539099998474121, + "IoU.stove": 0.6973000335693359, + "IoU.palm": 0.43790000915527344, + "IoU.kitchen island": 0.3711999893188477, + "IoU.computer": 0.7137000274658203, + "IoU.swivel chair": 0.4136000061035156, + "IoU.boat": 0.7183000183105469, + "IoU.bar": 0.5056000137329102, + "IoU.arcade machine": 0.4070999908447266, + "IoU.hovel": 0.3958000183105469, + "IoU.bus": 0.7544000244140625, + "IoU.towel": 0.5777000045776367, + "IoU.light": 0.24190000534057618, + "IoU.truck": 0.24889999389648437, + "IoU.tower": 0.3234000015258789, + "IoU.chandelier": 0.582400016784668, + "IoU.awning": 0.21129999160766602, + "IoU.streetlight": 0.15739999771118163, + "IoU.booth": 0.36150001525878905, + "IoU.television receiver": 0.5731999969482422, + "IoU.airplane": 0.5831000137329102, + "IoU.dirt track": 0.16270000457763673, + "IoU.apparel": 0.29219999313354494, + "IoU.pole": 0.25959999084472657, + "IoU.land": 0.013200000524520875, + "IoU.bannister": 0.06980000019073486, + "IoU.escalator": 0.3014999961853027, + "IoU.ottoman": 0.5066999816894531, + "IoU.bottle": 0.3110000038146973, + "IoU.buffet": 0.39490001678466796, + "IoU.poster": 0.17899999618530274, + "IoU.stage": 0.17829999923706055, + "IoU.van": 0.3536000061035156, + "IoU.ship": 0.7037000274658203, + "IoU.fountain": 0.19280000686645507, + "IoU.conveyer belt": 0.7333999633789062, + "IoU.canopy": 0.19120000839233398, + "IoU.washer": 0.7237999725341797, + "IoU.plaything": 0.2656999969482422, + "IoU.swimming pool": 0.5611999893188476, + "IoU.stool": 0.2578000068664551, + "IoU.barrel": 0.5747000122070313, + "IoU.basket": 0.205, + "IoU.waterfall": 0.5777999877929687, + "IoU.tent": 0.9225, + "IoU.bag": 0.09350000381469727, + "IoU.minibike": 0.504000015258789, + "IoU.cradle": 0.7301000213623047, + "IoU.oven": 0.19610000610351563, + "IoU.ball": 0.38709999084472657, + "IoU.food": 0.5513999938964844, + "IoU.step": 0.13649999618530273, + "IoU.tank": 0.5345000076293945, + "IoU.trade name": 0.16420000076293945, + "IoU.microwave": 0.32099998474121094, + "IoU.pot": 0.32790000915527345, + "IoU.animal": 0.5709000015258789, + "IoU.bicycle": 0.44869998931884764, + "IoU.lake": 0.4734000015258789, + "IoU.dishwasher": 0.5502999877929687, + "IoU.screen": 0.6419999694824219, + "IoU.blanket": 0.04510000228881836, + "IoU.sculpture": 0.43439998626708987, + "IoU.hood": 0.40360000610351565, + "IoU.sconce": 0.2877000045776367, + "IoU.vase": 0.271200008392334, + "IoU.traffic light": 0.22479999542236329, + "IoU.tray": 0.005299999713897705, + "IoU.ashcan": 0.3252000045776367, + "IoU.fan": 0.4761000061035156, + "IoU.pier": 0.28959999084472654, + "IoU.crt screen": 0.0, + "IoU.plate": 0.4045999908447266, + "IoU.monitor": 0.056500000953674315, + "IoU.bulletin board": 0.35009998321533203, + "IoU.shower": 0.0005999999865889549, + "IoU.radiator": 0.49150001525878906, + "IoU.glass": 0.06579999923706055, + "IoU.clock": 0.20879999160766602, + "IoU.flag": 0.34810001373291016, + "Acc.wall": 0.894000015258789, + "Acc.building": 0.9308999633789062, + "Acc.sky": 0.9769000244140625, + "Acc.floor": 0.9068000030517578, + "Acc.tree": 0.8722000122070312, + "Acc.ceiling": 0.8941999816894531, + "Acc.road": 0.9037999725341797, + "Acc.bed ": 0.9563999938964843, + "Acc.windowpane": 0.7337000274658203, + "Acc.grass": 0.7901000213623047, + "Acc.cabinet": 0.7356999969482422, + "Acc.sidewalk": 0.7830000305175782, + "Acc.person": 0.9006999969482422, + "Acc.earth": 0.517400016784668, + "Acc.door": 0.6466000366210938, + "Acc.table": 0.7204000091552735, + "Acc.mountain": 0.706500015258789, + "Acc.plant": 0.5786999893188477, + "Acc.curtain": 0.8276000213623047, + "Acc.chair": 0.6579000091552735, + "Acc.car": 0.89, + "Acc.water": 0.6905000305175781, + "Acc.painting": 0.8219999694824218, + "Acc.sofa": 0.7613999938964844, + "Acc.shelf": 0.6138999938964844, + "Acc.house": 0.5552999877929687, + "Acc.sea": 0.8284999847412109, + "Acc.mirror": 0.7169999694824218, + "Acc.rug": 0.6845999908447266, + "Acc.field": 0.42009998321533204, + "Acc.armchair": 0.6598999786376953, + "Acc.seat": 0.7930000305175782, + "Acc.fence": 0.5615999984741211, + "Acc.desk": 0.6463999938964844, + "Acc.rock": 0.6229000091552734, + "Acc.wardrobe": 0.6479000091552735, + "Acc.lamp": 0.6343999862670898, + "Acc.bathtub": 0.7608000183105469, + "Acc.railing": 0.4418000030517578, + "Acc.cushion": 0.6108000183105469, + "Acc.base": 0.41639999389648436, + "Acc.box": 0.3053000068664551, + "Acc.column": 0.5791999816894531, + "Acc.signboard": 0.40849998474121096, + "Acc.chest of drawers": 0.5431999969482422, + "Acc.counter": 0.2961000061035156, + "Acc.sand": 0.5027000045776367, + "Acc.sink": 0.7055999755859375, + "Acc.skyscraper": 0.5683000183105469, + "Acc.fireplace": 0.8687999725341797, + "Acc.refrigerator": 0.845, + "Acc.grandstand": 0.7004000091552735, + "Acc.path": 0.281299991607666, + "Acc.stairs": 0.3579999923706055, + "Acc.runway": 0.8280999755859375, + "Acc.case": 0.6565000152587891, + "Acc.pool table": 0.9533000183105469, + "Acc.pillow": 0.6138000106811523, + "Acc.screen door": 0.6718000030517578, + "Acc.stairway": 0.4725, + "Acc.river": 0.4463999938964844, + "Acc.bridge": 0.8308999633789063, + "Acc.bookcase": 0.5856999969482422, + "Acc.blind": 0.43009998321533205, + "Acc.coffee table": 0.7833999633789063, + "Acc.toilet": 0.8762000274658203, + "Acc.flower": 0.4545000076293945, + "Acc.book": 0.5363999938964844, + "Acc.hill": 0.17639999389648436, + "Acc.bench": 0.49770000457763675, + "Acc.countertop": 0.701500015258789, + "Acc.stove": 0.7688999938964843, + "Acc.palm": 0.5515999984741211, + "Acc.kitchen island": 0.5836000061035156, + "Acc.computer": 0.8262000274658203, + "Acc.swivel chair": 0.5041999816894531, + "Acc.boat": 0.8283000183105469, + "Acc.bar": 0.6841999816894532, + "Acc.arcade machine": 0.44459999084472657, + "Acc.hovel": 0.4204999923706055, + "Acc.bus": 0.9079000091552735, + "Acc.towel": 0.6816999816894531, + "Acc.light": 0.256200008392334, + "Acc.truck": 0.33860000610351565, + "Acc.tower": 0.4420000076293945, + "Acc.chandelier": 0.7051999664306641, + "Acc.awning": 0.22889999389648438, + "Acc.streetlight": 0.17020000457763673, + "Acc.booth": 0.4434000015258789, + "Acc.television receiver": 0.71, + "Acc.airplane": 0.6286000061035156, + "Acc.dirt track": 0.2440999984741211, + "Acc.apparel": 0.41130001068115235, + "Acc.pole": 0.3454000091552734, + "Acc.land": 0.024800000190734865, + "Acc.bannister": 0.0859000015258789, + "Acc.escalator": 0.3597999954223633, + "Acc.ottoman": 0.6297999954223633, + "Acc.bottle": 0.43380001068115237, + "Acc.buffet": 0.4595000076293945, + "Acc.poster": 0.23639999389648436, + "Acc.stage": 0.3377000045776367, + "Acc.van": 0.42119998931884767, + "Acc.ship": 0.7401999664306641, + "Acc.fountain": 0.19790000915527345, + "Acc.conveyer belt": 0.7868000030517578, + "Acc.canopy": 0.29139999389648436, + "Acc.washer": 0.7294000244140625, + "Acc.plaything": 0.4234999847412109, + "Acc.swimming pool": 0.701500015258789, + "Acc.stool": 0.3345999908447266, + "Acc.barrel": 0.6218000030517579, + "Acc.basket": 0.25239999771118166, + "Acc.waterfall": 0.625099983215332, + "Acc.tent": 0.9894999694824219, + "Acc.bag": 0.1015999984741211, + "Acc.minibike": 0.5870999908447265, + "Acc.cradle": 0.9648999786376953, + "Acc.oven": 0.5758000183105468, + "Acc.ball": 0.4570000076293945, + "Acc.food": 0.6494999694824218, + "Acc.step": 0.15229999542236328, + "Acc.tank": 0.6265999984741211, + "Acc.trade name": 0.17799999237060546, + "Acc.microwave": 0.3347999954223633, + "Acc.pot": 0.375, + "Acc.animal": 0.5979000091552734, + "Acc.bicycle": 0.6441999816894531, + "Acc.lake": 0.5459999847412109, + "Acc.dishwasher": 0.6081000137329101, + "Acc.screen": 0.8780999755859376, + "Acc.blanket": 0.049000000953674315, + "Acc.sculpture": 0.5740999984741211, + "Acc.hood": 0.4186000061035156, + "Acc.sconce": 0.33439998626708983, + "Acc.vase": 0.3554999923706055, + "Acc.traffic light": 0.29620000839233396, + "Acc.tray": 0.0064999997615814206, + "Acc.ashcan": 0.5047999954223633, + "Acc.fan": 0.5770999908447265, + "Acc.pier": 0.38709999084472657, + "Acc.crt screen": 0.0, + "Acc.plate": 0.4841999816894531, + "Acc.monitor": 0.0603000020980835, + "Acc.bulletin board": 0.4175, + "Acc.shower": 0.0009000000357627869, + "Acc.radiator": 0.5459000015258789, + "Acc.glass": 0.07099999904632569, + "Acc.clock": 0.25010000228881835, + "Acc.flag": 0.36970001220703125 + } + }, + "52": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.83, + "mIoU": 0.493, + "mAcc": 0.6214999999999999, + "IoU.wall": 0.7751000213623047, + "IoU.building": 0.8238999938964844, + "IoU.sky": 0.9383000183105469, + "IoU.floor": 0.8105000305175781, + "IoU.tree": 0.7437000274658203, + "IoU.ceiling": 0.8369000244140625, + "IoU.road": 0.8323999786376953, + "IoU.bed ": 0.8873999786376953, + "IoU.windowpane": 0.6184999847412109, + "IoU.grass": 0.6973999786376953, + "IoU.cabinet": 0.6004000091552735, + "IoU.sidewalk": 0.6512000274658203, + "IoU.person": 0.8002999877929687, + "IoU.earth": 0.35950000762939455, + "IoU.door": 0.5025999832153321, + "IoU.table": 0.5861000061035156, + "IoU.mountain": 0.5677999877929687, + "IoU.plant": 0.4911999893188477, + "IoU.curtain": 0.7398999786376953, + "IoU.chair": 0.5627000045776367, + "IoU.car": 0.8434999847412109, + "IoU.water": 0.5779000091552734, + "IoU.painting": 0.7088999938964844, + "IoU.sofa": 0.6894999694824219, + "IoU.shelf": 0.4179000091552734, + "IoU.house": 0.4443999862670898, + "IoU.sea": 0.6476999664306641, + "IoU.mirror": 0.6719999694824219, + "IoU.rug": 0.6608000183105469, + "IoU.field": 0.3268999862670898, + "IoU.armchair": 0.45, + "IoU.seat": 0.627400016784668, + "IoU.fence": 0.4413999938964844, + "IoU.desk": 0.4634000015258789, + "IoU.rock": 0.4570999908447266, + "IoU.wardrobe": 0.5395000076293945, + "IoU.lamp": 0.5643999862670899, + "IoU.bathtub": 0.8483000183105469, + "IoU.railing": 0.35009998321533203, + "IoU.cushion": 0.5613999938964844, + "IoU.base": 0.32549999237060545, + "IoU.box": 0.2680999946594238, + "IoU.column": 0.4740999984741211, + "IoU.signboard": 0.3370000076293945, + "IoU.chest of drawers": 0.3566999816894531, + "IoU.counter": 0.3034000015258789, + "IoU.sand": 0.4925, + "IoU.sink": 0.6961000061035156, + "IoU.skyscraper": 0.5520000076293945, + "IoU.fireplace": 0.7291000366210938, + "IoU.refrigerator": 0.7273999786376953, + "IoU.grandstand": 0.5147999954223633, + "IoU.path": 0.24850000381469728, + "IoU.stairs": 0.20489999771118164, + "IoU.runway": 0.7181999969482422, + "IoU.case": 0.558499984741211, + "IoU.pool table": 0.9127999877929688, + "IoU.pillow": 0.5525, + "IoU.screen door": 0.6879000091552734, + "IoU.stairway": 0.2822999954223633, + "IoU.river": 0.16139999389648438, + "IoU.bridge": 0.6776999664306641, + "IoU.bookcase": 0.32810001373291015, + "IoU.blind": 0.43709999084472656, + "IoU.coffee table": 0.5509999847412109, + "IoU.toilet": 0.8333999633789062, + "IoU.flower": 0.31559999465942384, + "IoU.book": 0.4715999984741211, + "IoU.hill": 0.1468000030517578, + "IoU.bench": 0.45849998474121095, + "IoU.countertop": 0.5800999832153321, + "IoU.stove": 0.7091999816894531, + "IoU.palm": 0.4893000030517578, + "IoU.kitchen island": 0.47439998626708985, + "IoU.computer": 0.7561000061035156, + "IoU.swivel chair": 0.5147000122070312, + "IoU.boat": 0.7040000152587891, + "IoU.bar": 0.5320999908447266, + "IoU.arcade machine": 0.8241999816894531, + "IoU.hovel": 0.31090000152587893, + "IoU.bus": 0.8586000061035156, + "IoU.towel": 0.605099983215332, + "IoU.light": 0.3893000030517578, + "IoU.truck": 0.23819999694824218, + "IoU.tower": 0.22020000457763672, + "IoU.chandelier": 0.6361999893188477, + "IoU.awning": 0.2671999931335449, + "IoU.streetlight": 0.20200000762939452, + "IoU.booth": 0.45529998779296876, + "IoU.television receiver": 0.6448000335693359, + "IoU.airplane": 0.6008000183105469, + "IoU.dirt track": 0.004199999868869781, + "IoU.apparel": 0.3490999984741211, + "IoU.pole": 0.16799999237060548, + "IoU.land": 0.03980000019073486, + "IoU.bannister": 0.13609999656677246, + "IoU.escalator": 0.4393000030517578, + "IoU.ottoman": 0.46349998474121096, + "IoU.bottle": 0.3447999954223633, + "IoU.buffet": 0.5900999832153321, + "IoU.poster": 0.25709999084472657, + "IoU.stage": 0.1809000015258789, + "IoU.van": 0.427400016784668, + "IoU.ship": 0.3028000068664551, + "IoU.fountain": 0.2118000030517578, + "IoU.conveyer belt": 0.6745999908447265, + "IoU.canopy": 0.23760000228881836, + "IoU.washer": 0.7275, + "IoU.plaything": 0.2652000045776367, + "IoU.swimming pool": 0.6293000030517578, + "IoU.stool": 0.40830001831054685, + "IoU.barrel": 0.45119998931884764, + "IoU.basket": 0.35900001525878905, + "IoU.waterfall": 0.6575, + "IoU.tent": 0.789000015258789, + "IoU.bag": 0.16020000457763672, + "IoU.minibike": 0.6862000274658203, + "IoU.cradle": 0.8087999725341797, + "IoU.oven": 0.38099998474121094, + "IoU.ball": 0.48459999084472655, + "IoU.food": 0.5247999954223633, + "IoU.step": 0.12859999656677246, + "IoU.tank": 0.5452000045776367, + "IoU.trade name": 0.19329999923706054, + "IoU.microwave": 0.7876999664306641, + "IoU.pot": 0.43810001373291013, + "IoU.animal": 0.6445999908447265, + "IoU.bicycle": 0.5820000076293945, + "IoU.lake": 0.5391999816894532, + "IoU.dishwasher": 0.6368999862670899, + "IoU.screen": 0.5013999938964844, + "IoU.blanket": 0.15050000190734864, + "IoU.sculpture": 0.6704000091552734, + "IoU.hood": 0.5336000061035157, + "IoU.sconce": 0.38779998779296876, + "IoU.vase": 0.33049999237060546, + "IoU.traffic light": 0.2765999984741211, + "IoU.tray": 0.10710000038146973, + "IoU.ashcan": 0.3834000015258789, + "IoU.fan": 0.525099983215332, + "IoU.pier": 0.18139999389648437, + "IoU.crt screen": 0.04550000190734863, + "IoU.plate": 0.4809000015258789, + "IoU.monitor": 0.25379999160766603, + "IoU.bulletin board": 0.5202999877929687, + "IoU.shower": 0.03380000114440918, + "IoU.radiator": 0.572400016784668, + "IoU.glass": 0.134399995803833, + "IoU.clock": 0.33169998168945314, + "IoU.flag": 0.42459999084472655, + "Acc.wall": 0.8763999938964844, + "Acc.building": 0.9330000305175781, + "Acc.sky": 0.9627999877929687, + "Acc.floor": 0.8859999847412109, + "Acc.tree": 0.8919000244140625, + "Acc.ceiling": 0.9018000030517578, + "Acc.road": 0.899000015258789, + "Acc.bed ": 0.9637000274658203, + "Acc.windowpane": 0.7643000030517578, + "Acc.grass": 0.8370999908447265, + "Acc.cabinet": 0.7313999938964844, + "Acc.sidewalk": 0.8048000335693359, + "Acc.person": 0.9233999633789063, + "Acc.earth": 0.5047000122070312, + "Acc.door": 0.6465000152587891, + "Acc.table": 0.7472000122070312, + "Acc.mountain": 0.7080999755859375, + "Acc.plant": 0.5779999923706055, + "Acc.curtain": 0.8738999938964844, + "Acc.chair": 0.7062999725341796, + "Acc.car": 0.9277999877929688, + "Acc.water": 0.6970999908447265, + "Acc.painting": 0.8611000061035157, + "Acc.sofa": 0.8391999816894531, + "Acc.shelf": 0.5672000122070312, + "Acc.house": 0.5881000137329102, + "Acc.sea": 0.8191999816894531, + "Acc.mirror": 0.775, + "Acc.rug": 0.8105999755859375, + "Acc.field": 0.5054000091552734, + "Acc.armchair": 0.6311999893188477, + "Acc.seat": 0.8498999786376953, + "Acc.fence": 0.6011999893188477, + "Acc.desk": 0.742699966430664, + "Acc.rock": 0.6206999969482422, + "Acc.wardrobe": 0.7518000030517578, + "Acc.lamp": 0.7475, + "Acc.bathtub": 0.8969999694824219, + "Acc.railing": 0.4590000152587891, + "Acc.cushion": 0.706500015258789, + "Acc.base": 0.6290999984741211, + "Acc.box": 0.3370000076293945, + "Acc.column": 0.5793000030517578, + "Acc.signboard": 0.42770000457763674, + "Acc.chest of drawers": 0.629900016784668, + "Acc.counter": 0.37869998931884763, + "Acc.sand": 0.7340000152587891, + "Acc.sink": 0.7669000244140625, + "Acc.skyscraper": 0.6505999755859375, + "Acc.fireplace": 0.9129000091552735, + "Acc.refrigerator": 0.8233999633789062, + "Acc.grandstand": 0.7123000335693359, + "Acc.path": 0.3672999954223633, + "Acc.stairs": 0.2936000061035156, + "Acc.runway": 0.9712999725341797, + "Acc.case": 0.7155000305175782, + "Acc.pool table": 0.972699966430664, + "Acc.pillow": 0.6572000122070313, + "Acc.screen door": 0.7991000366210937, + "Acc.stairway": 0.4583000183105469, + "Acc.river": 0.38979999542236327, + "Acc.bridge": 0.7998999786376954, + "Acc.bookcase": 0.529000015258789, + "Acc.blind": 0.5118000030517578, + "Acc.coffee table": 0.8537000274658203, + "Acc.toilet": 0.9076000213623047, + "Acc.flower": 0.5065000152587891, + "Acc.book": 0.6591000366210937, + "Acc.hill": 0.26290000915527345, + "Acc.bench": 0.5404999923706054, + "Acc.countertop": 0.7412999725341797, + "Acc.stove": 0.8498999786376953, + "Acc.palm": 0.6711000061035156, + "Acc.kitchen island": 0.7722000122070313, + "Acc.computer": 0.9073999786376953, + "Acc.swivel chair": 0.7094000244140625, + "Acc.boat": 0.8620999908447265, + "Acc.bar": 0.7019000244140625, + "Acc.arcade machine": 0.9191000366210937, + "Acc.hovel": 0.33419998168945314, + "Acc.bus": 0.9391999816894532, + "Acc.towel": 0.7854000091552734, + "Acc.light": 0.4693000030517578, + "Acc.truck": 0.31489999771118166, + "Acc.tower": 0.3502999877929687, + "Acc.chandelier": 0.7970999908447266, + "Acc.awning": 0.3213999938964844, + "Acc.streetlight": 0.2890999984741211, + "Acc.booth": 0.4834000015258789, + "Acc.television receiver": 0.7783999633789063, + "Acc.airplane": 0.6891999816894532, + "Acc.dirt track": 0.009599999785423278, + "Acc.apparel": 0.4570999908447266, + "Acc.pole": 0.21549999237060546, + "Acc.land": 0.07539999961853028, + "Acc.bannister": 0.21059999465942383, + "Acc.escalator": 0.560099983215332, + "Acc.ottoman": 0.6791999816894532, + "Acc.bottle": 0.5615999984741211, + "Acc.buffet": 0.7863999938964844, + "Acc.poster": 0.2979999923706055, + "Acc.stage": 0.45630001068115233, + "Acc.van": 0.5102000045776367, + "Acc.ship": 0.33990001678466797, + "Acc.fountain": 0.21860000610351563, + "Acc.conveyer belt": 0.947699966430664, + "Acc.canopy": 0.26649999618530273, + "Acc.washer": 0.7466999816894532, + "Acc.plaything": 0.3813000106811523, + "Acc.swimming pool": 0.8508999633789063, + "Acc.stool": 0.5245000076293945, + "Acc.barrel": 0.6512000274658203, + "Acc.basket": 0.47560001373291017, + "Acc.waterfall": 0.9044999694824218, + "Acc.tent": 0.9894000244140625, + "Acc.bag": 0.18309999465942384, + "Acc.minibike": 0.7987999725341797, + "Acc.cradle": 0.9704000091552735, + "Acc.oven": 0.5018000030517578, + "Acc.ball": 0.5383000183105469, + "Acc.food": 0.5850999832153321, + "Acc.step": 0.16290000915527345, + "Acc.tank": 0.6454000091552734, + "Acc.trade name": 0.20540000915527343, + "Acc.microwave": 0.8908000183105469, + "Acc.pot": 0.5216999816894531, + "Acc.animal": 0.6780000305175782, + "Acc.bicycle": 0.7177999877929687, + "Acc.lake": 0.6638999938964844, + "Acc.dishwasher": 0.7263999938964844, + "Acc.screen": 0.7356999969482422, + "Acc.blanket": 0.17120000839233399, + "Acc.sculpture": 0.7816000366210938, + "Acc.hood": 0.7045999908447266, + "Acc.sconce": 0.4765999984741211, + "Acc.vase": 0.5481999969482422, + "Acc.traffic light": 0.4911000061035156, + "Acc.tray": 0.14960000038146973, + "Acc.ashcan": 0.5602999877929687, + "Acc.fan": 0.7283999633789062, + "Acc.pier": 0.43470001220703125, + "Acc.crt screen": 0.11090000152587891, + "Acc.plate": 0.6677999877929688, + "Acc.monitor": 0.3061000061035156, + "Acc.bulletin board": 0.7143000030517578, + "Acc.shower": 0.05010000228881836, + "Acc.radiator": 0.6719000244140625, + "Acc.glass": 0.14739999771118165, + "Acc.clock": 0.3722999954223633, + "Acc.flag": 0.4922999954223633 + } + }, + "53": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8265, + "mIoU": 0.4832, + "mAcc": 0.6161, + "IoU.wall": 0.7686000061035156, + "IoU.building": 0.8326000213623047, + "IoU.sky": 0.9369000244140625, + "IoU.floor": 0.8094999694824219, + "IoU.tree": 0.7383999633789062, + "IoU.ceiling": 0.8312999725341796, + "IoU.road": 0.8356999969482422, + "IoU.bed ": 0.8794000244140625, + "IoU.windowpane": 0.606500015258789, + "IoU.grass": 0.7097000122070313, + "IoU.cabinet": 0.5845999908447266, + "IoU.sidewalk": 0.6570999908447266, + "IoU.person": 0.7916999816894531, + "IoU.earth": 0.3729999923706055, + "IoU.door": 0.4734000015258789, + "IoU.table": 0.5677000045776367, + "IoU.mountain": 0.5906999969482422, + "IoU.plant": 0.5006000137329102, + "IoU.curtain": 0.7233999633789062, + "IoU.chair": 0.5379999923706055, + "IoU.car": 0.8247000122070313, + "IoU.water": 0.5590999984741211, + "IoU.painting": 0.6969999694824218, + "IoU.sofa": 0.6748999786376954, + "IoU.shelf": 0.41150001525878904, + "IoU.house": 0.48759998321533204, + "IoU.sea": 0.6266999816894532, + "IoU.mirror": 0.635, + "IoU.rug": 0.6631999969482422, + "IoU.field": 0.28389999389648435, + "IoU.armchair": 0.4438000106811523, + "IoU.seat": 0.6275999832153321, + "IoU.fence": 0.41630001068115235, + "IoU.desk": 0.4815999984741211, + "IoU.rock": 0.48709999084472655, + "IoU.wardrobe": 0.5268999862670899, + "IoU.lamp": 0.5531000137329102, + "IoU.bathtub": 0.7630999755859375, + "IoU.railing": 0.3397000122070313, + "IoU.cushion": 0.5475, + "IoU.base": 0.3231999969482422, + "IoU.box": 0.2535000038146973, + "IoU.column": 0.45970001220703127, + "IoU.signboard": 0.34669998168945315, + "IoU.chest of drawers": 0.34369998931884765, + "IoU.counter": 0.31940000534057617, + "IoU.sand": 0.4102999877929687, + "IoU.sink": 0.6966999816894531, + "IoU.skyscraper": 0.5593999862670899, + "IoU.fireplace": 0.714800033569336, + "IoU.refrigerator": 0.6833999633789063, + "IoU.grandstand": 0.5008000183105469, + "IoU.path": 0.23209999084472657, + "IoU.stairs": 0.25989999771118166, + "IoU.runway": 0.7212000274658203, + "IoU.case": 0.5272000122070313, + "IoU.pool table": 0.8994000244140625, + "IoU.pillow": 0.5452999877929687, + "IoU.screen door": 0.535, + "IoU.stairway": 0.37490001678466794, + "IoU.river": 0.166299991607666, + "IoU.bridge": 0.624900016784668, + "IoU.bookcase": 0.32310001373291014, + "IoU.blind": 0.37659999847412107, + "IoU.coffee table": 0.5297999954223633, + "IoU.toilet": 0.8295999908447266, + "IoU.flower": 0.3629999923706055, + "IoU.book": 0.43520000457763675, + "IoU.hill": 0.13789999961853028, + "IoU.bench": 0.40450000762939453, + "IoU.countertop": 0.5454999923706054, + "IoU.stove": 0.7219000244140625, + "IoU.palm": 0.5063999938964844, + "IoU.kitchen island": 0.39299999237060546, + "IoU.computer": 0.7073999786376953, + "IoU.swivel chair": 0.48330001831054686, + "IoU.boat": 0.669000015258789, + "IoU.bar": 0.4834000015258789, + "IoU.arcade machine": 0.8398999786376953, + "IoU.hovel": 0.35650001525878905, + "IoU.bus": 0.8679000091552734, + "IoU.towel": 0.6002000045776367, + "IoU.light": 0.42069999694824217, + "IoU.truck": 0.2468000030517578, + "IoU.tower": 0.3263999938964844, + "IoU.chandelier": 0.6052000045776367, + "IoU.awning": 0.3103000068664551, + "IoU.streetlight": 0.1940999984741211, + "IoU.booth": 0.48869998931884767, + "IoU.television receiver": 0.62, + "IoU.airplane": 0.5475, + "IoU.dirt track": 0.024800000190734865, + "IoU.apparel": 0.3028000068664551, + "IoU.pole": 0.15079999923706056, + "IoU.land": 0.060799999237060545, + "IoU.bannister": 0.13270000457763673, + "IoU.escalator": 0.3025, + "IoU.ottoman": 0.45720001220703127, + "IoU.bottle": 0.2570000076293945, + "IoU.buffet": 0.5041999816894531, + "IoU.poster": 0.25309999465942384, + "IoU.stage": 0.18200000762939453, + "IoU.van": 0.3347999954223633, + "IoU.ship": 0.7755000305175781, + "IoU.fountain": 0.22969999313354492, + "IoU.conveyer belt": 0.6755000305175781, + "IoU.canopy": 0.21479999542236328, + "IoU.washer": 0.7458999633789063, + "IoU.plaything": 0.26110000610351564, + "IoU.swimming pool": 0.48400001525878905, + "IoU.stool": 0.3683000183105469, + "IoU.barrel": 0.4438000106811523, + "IoU.basket": 0.25790000915527345, + "IoU.waterfall": 0.664000015258789, + "IoU.tent": 0.8708999633789063, + "IoU.bag": 0.11210000038146972, + "IoU.minibike": 0.6836000061035157, + "IoU.cradle": 0.7918000030517578, + "IoU.oven": 0.3240000152587891, + "IoU.ball": 0.4136000061035156, + "IoU.food": 0.5161999893188477, + "IoU.step": 0.13760000228881836, + "IoU.tank": 0.543499984741211, + "IoU.trade name": 0.21450000762939453, + "IoU.microwave": 0.7069999694824218, + "IoU.pot": 0.3679999923706055, + "IoU.animal": 0.6541000366210937, + "IoU.bicycle": 0.5661999893188476, + "IoU.lake": 0.6240999984741211, + "IoU.dishwasher": 0.5854999923706055, + "IoU.screen": 0.492400016784668, + "IoU.blanket": 0.15289999961853026, + "IoU.sculpture": 0.5529000091552735, + "IoU.hood": 0.5065000152587891, + "IoU.sconce": 0.31079999923706053, + "IoU.vase": 0.34040000915527346, + "IoU.traffic light": 0.2635000038146973, + "IoU.tray": 0.06510000228881836, + "IoU.ashcan": 0.42439998626708986, + "IoU.fan": 0.5168000030517578, + "IoU.pier": 0.24420000076293946, + "IoU.crt screen": 0.03259999990463257, + "IoU.plate": 0.48939998626708986, + "IoU.monitor": 0.20790000915527343, + "IoU.bulletin board": 0.4897999954223633, + "IoU.shower": 0.009700000286102295, + "IoU.radiator": 0.5734000015258789, + "IoU.glass": 0.10069999694824219, + "IoU.clock": 0.2773999977111816, + "IoU.flag": 0.609900016784668, + "Acc.wall": 0.8697000122070313, + "Acc.building": 0.9291000366210938, + "Acc.sky": 0.9591999816894531, + "Acc.floor": 0.8837000274658203, + "Acc.tree": 0.8943000030517578, + "Acc.ceiling": 0.8998999786376953, + "Acc.road": 0.8959999847412109, + "Acc.bed ": 0.9633999633789062, + "Acc.windowpane": 0.7687999725341796, + "Acc.grass": 0.846500015258789, + "Acc.cabinet": 0.7194999694824219, + "Acc.sidewalk": 0.8212999725341796, + "Acc.person": 0.917300033569336, + "Acc.earth": 0.5345999908447265, + "Acc.door": 0.6186000061035156, + "Acc.table": 0.7366999816894532, + "Acc.mountain": 0.7351999664306641, + "Acc.plant": 0.5804999923706055, + "Acc.curtain": 0.8608000183105469, + "Acc.chair": 0.6995999908447266, + "Acc.car": 0.9287999725341797, + "Acc.water": 0.6891999816894532, + "Acc.painting": 0.8480999755859375, + "Acc.sofa": 0.8375, + "Acc.shelf": 0.5620000076293945, + "Acc.house": 0.6619000244140625, + "Acc.sea": 0.778499984741211, + "Acc.mirror": 0.7423000335693359, + "Acc.rug": 0.8183000183105469, + "Acc.field": 0.39380001068115233, + "Acc.armchair": 0.6318000030517578, + "Acc.seat": 0.8290000152587891, + "Acc.fence": 0.5961000061035157, + "Acc.desk": 0.7694999694824218, + "Acc.rock": 0.6411000061035156, + "Acc.wardrobe": 0.731500015258789, + "Acc.lamp": 0.7361000061035157, + "Acc.bathtub": 0.8123999786376953, + "Acc.railing": 0.43849998474121094, + "Acc.cushion": 0.7097000122070313, + "Acc.base": 0.5783000183105469, + "Acc.box": 0.3338999938964844, + "Acc.column": 0.5883000183105469, + "Acc.signboard": 0.43389999389648437, + "Acc.chest of drawers": 0.6431999969482421, + "Acc.counter": 0.43099998474121093, + "Acc.sand": 0.6281000137329101, + "Acc.sink": 0.7693000030517578, + "Acc.skyscraper": 0.7051999664306641, + "Acc.fireplace": 0.897300033569336, + "Acc.refrigerator": 0.8006999969482422, + "Acc.grandstand": 0.715, + "Acc.path": 0.32529998779296876, + "Acc.stairs": 0.40430000305175784, + "Acc.runway": 0.9712999725341797, + "Acc.case": 0.7087000274658203, + "Acc.pool table": 0.9722000122070312, + "Acc.pillow": 0.6277999877929688, + "Acc.screen door": 0.6612999725341797, + "Acc.stairway": 0.5427000045776367, + "Acc.river": 0.43150001525878906, + "Acc.bridge": 0.8256999969482421, + "Acc.bookcase": 0.49900001525878906, + "Acc.blind": 0.4159000015258789, + "Acc.coffee table": 0.8569999694824219, + "Acc.toilet": 0.9026000213623047, + "Acc.flower": 0.5522000122070313, + "Acc.book": 0.6302000045776367, + "Acc.hill": 0.2440999984741211, + "Acc.bench": 0.5033000183105468, + "Acc.countertop": 0.696500015258789, + "Acc.stove": 0.8456999969482422, + "Acc.palm": 0.7005000305175781, + "Acc.kitchen island": 0.7515000152587891, + "Acc.computer": 0.8361000061035156, + "Acc.swivel chair": 0.6586000061035157, + "Acc.boat": 0.8580000305175781, + "Acc.bar": 0.7066999816894531, + "Acc.arcade machine": 0.9093000030517578, + "Acc.hovel": 0.3631000137329102, + "Acc.bus": 0.9312999725341797, + "Acc.towel": 0.7380000305175781, + "Acc.light": 0.5034000015258789, + "Acc.truck": 0.33380001068115234, + "Acc.tower": 0.5063000106811524, + "Acc.chandelier": 0.7626000213623046, + "Acc.awning": 0.37709999084472656, + "Acc.streetlight": 0.2669000053405762, + "Acc.booth": 0.5583000183105469, + "Acc.television receiver": 0.7626999664306641, + "Acc.airplane": 0.6619999694824219, + "Acc.dirt track": 0.06739999771118164, + "Acc.apparel": 0.40830001831054685, + "Acc.pole": 0.18989999771118163, + "Acc.land": 0.10720000267028809, + "Acc.bannister": 0.20860000610351562, + "Acc.escalator": 0.36450000762939455, + "Acc.ottoman": 0.6363999938964844, + "Acc.bottle": 0.3434000015258789, + "Acc.buffet": 0.6966999816894531, + "Acc.poster": 0.30270000457763674, + "Acc.stage": 0.47220001220703123, + "Acc.van": 0.4009000015258789, + "Acc.ship": 0.8006999969482422, + "Acc.fountain": 0.23719999313354492, + "Acc.conveyer belt": 0.9293000030517579, + "Acc.canopy": 0.31170000076293947, + "Acc.washer": 0.7512999725341797, + "Acc.plaything": 0.39549999237060546, + "Acc.swimming pool": 0.849800033569336, + "Acc.stool": 0.5086999893188476, + "Acc.barrel": 0.762300033569336, + "Acc.basket": 0.3761000061035156, + "Acc.waterfall": 0.9048999786376953, + "Acc.tent": 0.995, + "Acc.bag": 0.12329999923706055, + "Acc.minibike": 0.7698999786376953, + "Acc.cradle": 0.9791999816894531, + "Acc.oven": 0.5454999923706054, + "Acc.ball": 0.4602000045776367, + "Acc.food": 0.5608000183105468, + "Acc.step": 0.18260000228881837, + "Acc.tank": 0.6366999816894531, + "Acc.trade name": 0.22790000915527345, + "Acc.microwave": 0.7965000152587891, + "Acc.pot": 0.4290999984741211, + "Acc.animal": 0.6920999908447265, + "Acc.bicycle": 0.7394999694824219, + "Acc.lake": 0.7069000244140625, + "Acc.dishwasher": 0.6676999664306641, + "Acc.screen": 0.7325, + "Acc.blanket": 0.1768000030517578, + "Acc.sculpture": 0.6493000030517578, + "Acc.hood": 0.6719000244140625, + "Acc.sconce": 0.38709999084472657, + "Acc.vase": 0.5190999984741211, + "Acc.traffic light": 0.5045999908447265, + "Acc.tray": 0.10420000076293945, + "Acc.ashcan": 0.5886999893188477, + "Acc.fan": 0.7393000030517578, + "Acc.pier": 0.6152000045776367, + "Acc.crt screen": 0.09779999732971191, + "Acc.plate": 0.6343000030517578, + "Acc.monitor": 0.2535000038146973, + "Acc.bulletin board": 0.729800033569336, + "Acc.shower": 0.05, + "Acc.radiator": 0.6843000030517579, + "Acc.glass": 0.1075, + "Acc.clock": 0.3195000076293945, + "Acc.flag": 0.6941999816894531 + } + }, + "54": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.821, + "mIoU": 0.4677, + "mAcc": 0.6008, + "IoU.wall": 0.759000015258789, + "IoU.building": 0.8284999847412109, + "IoU.sky": 0.9388999938964844, + "IoU.floor": 0.8070999908447266, + "IoU.tree": 0.7362999725341797, + "IoU.ceiling": 0.8287000274658203, + "IoU.road": 0.8179000091552734, + "IoU.bed ": 0.8730000305175781, + "IoU.windowpane": 0.6090000152587891, + "IoU.grass": 0.6704000091552734, + "IoU.cabinet": 0.580999984741211, + "IoU.sidewalk": 0.6343000030517578, + "IoU.person": 0.785, + "IoU.earth": 0.3477000045776367, + "IoU.door": 0.465099983215332, + "IoU.table": 0.5609000015258789, + "IoU.mountain": 0.6129000091552734, + "IoU.plant": 0.5095999908447265, + "IoU.curtain": 0.7341000366210938, + "IoU.chair": 0.5272999954223633, + "IoU.car": 0.8226000213623047, + "IoU.water": 0.5472000122070313, + "IoU.painting": 0.7005000305175781, + "IoU.sofa": 0.6658999633789062, + "IoU.shelf": 0.4229999923706055, + "IoU.house": 0.5418999862670898, + "IoU.sea": 0.5606999969482422, + "IoU.mirror": 0.6218999862670899, + "IoU.rug": 0.6719999694824219, + "IoU.field": 0.27280000686645506, + "IoU.armchair": 0.40700000762939453, + "IoU.seat": 0.6211000061035157, + "IoU.fence": 0.39680000305175783, + "IoU.desk": 0.4375, + "IoU.rock": 0.4779999923706055, + "IoU.wardrobe": 0.5161000061035156, + "IoU.lamp": 0.5265000152587891, + "IoU.bathtub": 0.7452999877929688, + "IoU.railing": 0.3202000045776367, + "IoU.cushion": 0.5272000122070313, + "IoU.base": 0.3325, + "IoU.box": 0.2325, + "IoU.column": 0.47, + "IoU.signboard": 0.3315999984741211, + "IoU.chest of drawers": 0.3340000152587891, + "IoU.counter": 0.2534000015258789, + "IoU.sand": 0.4265999984741211, + "IoU.sink": 0.6688999938964844, + "IoU.skyscraper": 0.5979000091552734, + "IoU.fireplace": 0.6937000274658203, + "IoU.refrigerator": 0.7119999694824218, + "IoU.grandstand": 0.3820000076293945, + "IoU.path": 0.26379999160766604, + "IoU.stairs": 0.2168000030517578, + "IoU.runway": 0.7083000183105469, + "IoU.case": 0.49540000915527344, + "IoU.pool table": 0.9122000122070313, + "IoU.pillow": 0.5236000061035156, + "IoU.screen door": 0.6084000015258789, + "IoU.stairway": 0.23040000915527345, + "IoU.river": 0.15850000381469725, + "IoU.bridge": 0.6641000366210937, + "IoU.bookcase": 0.31409999847412107, + "IoU.blind": 0.39169998168945314, + "IoU.coffee table": 0.5061000061035156, + "IoU.toilet": 0.8208999633789062, + "IoU.flower": 0.39610000610351564, + "IoU.book": 0.40419998168945315, + "IoU.hill": 0.12619999885559083, + "IoU.bench": 0.40279998779296877, + "IoU.countertop": 0.5318000030517578, + "IoU.stove": 0.691500015258789, + "IoU.palm": 0.48709999084472655, + "IoU.kitchen island": 0.3675, + "IoU.computer": 0.7, + "IoU.swivel chair": 0.4490000152587891, + "IoU.boat": 0.6040999984741211, + "IoU.bar": 0.5065999984741211, + "IoU.arcade machine": 0.7527999877929688, + "IoU.hovel": 0.4743000030517578, + "IoU.bus": 0.8413999938964843, + "IoU.towel": 0.5379999923706055, + "IoU.light": 0.3959000015258789, + "IoU.truck": 0.26479999542236327, + "IoU.tower": 0.2231999969482422, + "IoU.chandelier": 0.5763000106811523, + "IoU.awning": 0.3014999961853027, + "IoU.streetlight": 0.18770000457763672, + "IoU.booth": 0.4127999877929687, + "IoU.television receiver": 0.5979000091552734, + "IoU.airplane": 0.5745999908447266, + "IoU.dirt track": 0.018600000143051146, + "IoU.apparel": 0.3159000015258789, + "IoU.pole": 0.17959999084472655, + "IoU.land": 0.06710000038146972, + "IoU.bannister": 0.09850000381469727, + "IoU.escalator": 0.30760000228881834, + "IoU.ottoman": 0.43220001220703125, + "IoU.bottle": 0.17530000686645508, + "IoU.buffet": 0.6436000061035156, + "IoU.poster": 0.3377000045776367, + "IoU.stage": 0.12930000305175782, + "IoU.van": 0.32560001373291014, + "IoU.ship": 0.9158999633789062, + "IoU.fountain": 0.20799999237060546, + "IoU.conveyer belt": 0.6972000122070312, + "IoU.canopy": 0.2642000007629395, + "IoU.washer": 0.6675, + "IoU.plaything": 0.23299999237060548, + "IoU.swimming pool": 0.4493000030517578, + "IoU.stool": 0.3268999862670898, + "IoU.barrel": 0.5127000045776368, + "IoU.basket": 0.19860000610351564, + "IoU.waterfall": 0.6256999969482422, + "IoU.tent": 0.8513999938964844, + "IoU.bag": 0.14409999847412108, + "IoU.minibike": 0.6156999969482422, + "IoU.cradle": 0.8081999969482422, + "IoU.oven": 0.145, + "IoU.ball": 0.5027000045776367, + "IoU.food": 0.5208000183105469, + "IoU.step": 0.14409999847412108, + "IoU.tank": 0.4936999893188477, + "IoU.trade name": 0.19780000686645507, + "IoU.microwave": 0.3711000061035156, + "IoU.pot": 0.3825, + "IoU.animal": 0.5636000061035156, + "IoU.bicycle": 0.5045999908447265, + "IoU.lake": 0.5370999908447266, + "IoU.dishwasher": 0.5597000122070312, + "IoU.screen": 0.6379999923706055, + "IoU.blanket": 0.16270000457763673, + "IoU.sculpture": 0.4986000061035156, + "IoU.hood": 0.5470000076293945, + "IoU.sconce": 0.27350000381469725, + "IoU.vase": 0.29149999618530276, + "IoU.traffic light": 0.26629999160766604, + "IoU.tray": 0.03130000114440918, + "IoU.ashcan": 0.38790000915527345, + "IoU.fan": 0.4645999908447266, + "IoU.pier": 0.21729999542236328, + "IoU.crt screen": 0.03809999942779541, + "IoU.plate": 0.47259998321533203, + "IoU.monitor": 0.12989999771118163, + "IoU.bulletin board": 0.44529998779296875, + "IoU.shower": 0.03309999942779541, + "IoU.radiator": 0.5591999816894532, + "IoU.glass": 0.07969999790191651, + "IoU.clock": 0.2819000053405762, + "IoU.flag": 0.3725, + "Acc.wall": 0.8663999938964844, + "Acc.building": 0.925199966430664, + "Acc.sky": 0.9625, + "Acc.floor": 0.8836000061035156, + "Acc.tree": 0.8873999786376953, + "Acc.ceiling": 0.8891999816894531, + "Acc.road": 0.8830000305175781, + "Acc.bed ": 0.9627999877929687, + "Acc.windowpane": 0.7706999969482422, + "Acc.grass": 0.8291999816894531, + "Acc.cabinet": 0.7223000335693359, + "Acc.sidewalk": 0.8111000061035156, + "Acc.person": 0.9116999816894531, + "Acc.earth": 0.4911000061035156, + "Acc.door": 0.625099983215332, + "Acc.table": 0.7183000183105469, + "Acc.mountain": 0.7594999694824218, + "Acc.plant": 0.6052000045776367, + "Acc.curtain": 0.8595999908447266, + "Acc.chair": 0.688499984741211, + "Acc.car": 0.9229000091552735, + "Acc.water": 0.6525, + "Acc.painting": 0.8411000061035157, + "Acc.sofa": 0.8433000183105469, + "Acc.shelf": 0.6052000045776367, + "Acc.house": 0.6712999725341797, + "Acc.sea": 0.7418000030517579, + "Acc.mirror": 0.7304000091552735, + "Acc.rug": 0.7963999938964844, + "Acc.field": 0.40330001831054685, + "Acc.armchair": 0.5627000045776367, + "Acc.seat": 0.8319000244140625, + "Acc.fence": 0.5625, + "Acc.desk": 0.7415000152587891, + "Acc.rock": 0.6091999816894531, + "Acc.wardrobe": 0.7473000335693359, + "Acc.lamp": 0.7222000122070312, + "Acc.bathtub": 0.8126000213623047, + "Acc.railing": 0.44479999542236326, + "Acc.cushion": 0.6595999908447265, + "Acc.base": 0.5815999984741211, + "Acc.box": 0.29399999618530276, + "Acc.column": 0.5802000045776368, + "Acc.signboard": 0.41889999389648436, + "Acc.chest of drawers": 0.645, + "Acc.counter": 0.35639999389648436, + "Acc.sand": 0.6127999877929687, + "Acc.sink": 0.7423000335693359, + "Acc.skyscraper": 0.7576999664306641, + "Acc.fireplace": 0.8925, + "Acc.refrigerator": 0.8238999938964844, + "Acc.grandstand": 0.7204000091552735, + "Acc.path": 0.3763999938964844, + "Acc.stairs": 0.32490001678466796, + "Acc.runway": 0.9451000213623046, + "Acc.case": 0.67, + "Acc.pool table": 0.9744000244140625, + "Acc.pillow": 0.6213000106811524, + "Acc.screen door": 0.7706999969482422, + "Acc.stairway": 0.3309000015258789, + "Acc.river": 0.41970001220703124, + "Acc.bridge": 0.8366000366210937, + "Acc.bookcase": 0.49509998321533205, + "Acc.blind": 0.43, + "Acc.coffee table": 0.8637000274658203, + "Acc.toilet": 0.9027999877929688, + "Acc.flower": 0.537599983215332, + "Acc.book": 0.6202999877929688, + "Acc.hill": 0.235, + "Acc.bench": 0.5281000137329102, + "Acc.countertop": 0.699800033569336, + "Acc.stove": 0.8237000274658203, + "Acc.palm": 0.6986000061035156, + "Acc.kitchen island": 0.6716000366210938, + "Acc.computer": 0.8851000213623047, + "Acc.swivel chair": 0.5829999923706055, + "Acc.boat": 0.8494000244140625, + "Acc.bar": 0.7440000152587891, + "Acc.arcade machine": 0.8386000061035156, + "Acc.hovel": 0.5338999938964843, + "Acc.bus": 0.9433000183105469, + "Acc.towel": 0.7358999633789063, + "Acc.light": 0.4686000061035156, + "Acc.truck": 0.3631000137329102, + "Acc.tower": 0.32529998779296876, + "Acc.chandelier": 0.7470999908447266, + "Acc.awning": 0.3643000030517578, + "Acc.streetlight": 0.2559000015258789, + "Acc.booth": 0.5879000091552734, + "Acc.television receiver": 0.7626999664306641, + "Acc.airplane": 0.6505999755859375, + "Acc.dirt track": 0.020799999237060548, + "Acc.apparel": 0.43909999847412107, + "Acc.pole": 0.21559999465942384, + "Acc.land": 0.12930000305175782, + "Acc.bannister": 0.17629999160766602, + "Acc.escalator": 0.34939998626708985, + "Acc.ottoman": 0.6333000183105468, + "Acc.bottle": 0.2215999984741211, + "Acc.buffet": 0.8519000244140625, + "Acc.poster": 0.43220001220703125, + "Acc.stage": 0.3981999969482422, + "Acc.van": 0.40180000305175784, + "Acc.ship": 0.9511000061035156, + "Acc.fountain": 0.2109000015258789, + "Acc.conveyer belt": 0.9241999816894532, + "Acc.canopy": 0.31670000076293947, + "Acc.washer": 0.7, + "Acc.plaything": 0.3388999938964844, + "Acc.swimming pool": 0.7737000274658203, + "Acc.stool": 0.49040000915527343, + "Acc.barrel": 0.6844000244140624, + "Acc.basket": 0.26860000610351564, + "Acc.waterfall": 0.7798000335693359, + "Acc.tent": 0.9945999908447266, + "Acc.bag": 0.163799991607666, + "Acc.minibike": 0.7308000183105469, + "Acc.cradle": 0.9455999755859374, + "Acc.oven": 0.4315999984741211, + "Acc.ball": 0.6281999969482421, + "Acc.food": 0.5827000045776367, + "Acc.step": 0.18930000305175781, + "Acc.tank": 0.5861000061035156, + "Acc.trade name": 0.20909999847412108, + "Acc.microwave": 0.4168000030517578, + "Acc.pot": 0.4520999908447266, + "Acc.animal": 0.6052000045776367, + "Acc.bicycle": 0.699800033569336, + "Acc.lake": 0.7573999786376953, + "Acc.dishwasher": 0.6705999755859375, + "Acc.screen": 0.8993000030517578, + "Acc.blanket": 0.18059999465942383, + "Acc.sculpture": 0.6655000305175781, + "Acc.hood": 0.6224000167846679, + "Acc.sconce": 0.33369998931884765, + "Acc.vase": 0.49470001220703125, + "Acc.traffic light": 0.45939998626708983, + "Acc.tray": 0.04650000095367432, + "Acc.ashcan": 0.529000015258789, + "Acc.fan": 0.7266999816894532, + "Acc.pier": 0.5425, + "Acc.crt screen": 0.10140000343322754, + "Acc.plate": 0.5990999984741211, + "Acc.monitor": 0.14829999923706055, + "Acc.bulletin board": 0.6633000183105469, + "Acc.shower": 0.043299999237060544, + "Acc.radiator": 0.6570999908447266, + "Acc.glass": 0.08770000457763671, + "Acc.clock": 0.3290999984741211, + "Acc.flag": 0.41830001831054686 + } + }, + "55": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8220000000000001, + "mIoU": 0.46520000000000006, + "mAcc": 0.6031, + "IoU.wall": 0.7598000335693359, + "IoU.building": 0.8298999786376953, + "IoU.sky": 0.9391999816894532, + "IoU.floor": 0.8106999969482422, + "IoU.tree": 0.7387000274658203, + "IoU.ceiling": 0.8333000183105469, + "IoU.road": 0.8220999908447265, + "IoU.bed ": 0.8702999877929688, + "IoU.windowpane": 0.6106999969482422, + "IoU.grass": 0.6561000061035156, + "IoU.cabinet": 0.6040000152587891, + "IoU.sidewalk": 0.6361999893188477, + "IoU.person": 0.7847000122070312, + "IoU.earth": 0.3584000015258789, + "IoU.door": 0.48, + "IoU.table": 0.5329999923706055, + "IoU.mountain": 0.6034999847412109, + "IoU.plant": 0.5145999908447265, + "IoU.curtain": 0.7219000244140625, + "IoU.chair": 0.5193999862670898, + "IoU.car": 0.8154000091552734, + "IoU.water": 0.572599983215332, + "IoU.painting": 0.6754000091552734, + "IoU.sofa": 0.6544000244140625, + "IoU.shelf": 0.4347999954223633, + "IoU.house": 0.5077999877929688, + "IoU.sea": 0.6322000122070313, + "IoU.mirror": 0.6256000137329102, + "IoU.rug": 0.6655000305175781, + "IoU.field": 0.28030000686645506, + "IoU.armchair": 0.39220001220703127, + "IoU.seat": 0.5834000015258789, + "IoU.fence": 0.40119998931884765, + "IoU.desk": 0.4438000106811523, + "IoU.rock": 0.44810001373291014, + "IoU.wardrobe": 0.569900016784668, + "IoU.lamp": 0.5216999816894531, + "IoU.bathtub": 0.7465000152587891, + "IoU.railing": 0.31790000915527344, + "IoU.cushion": 0.5309000015258789, + "IoU.base": 0.31489999771118166, + "IoU.box": 0.23010000228881836, + "IoU.column": 0.46180000305175783, + "IoU.signboard": 0.34060001373291016, + "IoU.chest of drawers": 0.38970001220703127, + "IoU.counter": 0.24459999084472656, + "IoU.sand": 0.38470001220703126, + "IoU.sink": 0.6744999694824219, + "IoU.skyscraper": 0.5643000030517578, + "IoU.fireplace": 0.6795999908447266, + "IoU.refrigerator": 0.7084999847412109, + "IoU.grandstand": 0.44470001220703126, + "IoU.path": 0.2663999938964844, + "IoU.stairs": 0.25549999237060544, + "IoU.runway": 0.6947000122070313, + "IoU.case": 0.5154999923706055, + "IoU.pool table": 0.9198000335693359, + "IoU.pillow": 0.5552999877929687, + "IoU.screen door": 0.5966999816894532, + "IoU.stairway": 0.28149999618530275, + "IoU.river": 0.19870000839233398, + "IoU.bridge": 0.6480000305175782, + "IoU.bookcase": 0.32119998931884763, + "IoU.blind": 0.4297999954223633, + "IoU.coffee table": 0.5218999862670899, + "IoU.toilet": 0.795, + "IoU.flower": 0.340099983215332, + "IoU.book": 0.4075, + "IoU.hill": 0.13430000305175782, + "IoU.bench": 0.3906999969482422, + "IoU.countertop": 0.576500015258789, + "IoU.stove": 0.6761000061035156, + "IoU.palm": 0.4779000091552734, + "IoU.kitchen island": 0.2996999931335449, + "IoU.computer": 0.7051999664306641, + "IoU.swivel chair": 0.4525, + "IoU.boat": 0.688499984741211, + "IoU.bar": 0.5027000045776367, + "IoU.arcade machine": 0.7788999938964843, + "IoU.hovel": 0.5616999816894531, + "IoU.bus": 0.7719999694824219, + "IoU.towel": 0.5379000091552735, + "IoU.light": 0.39610000610351564, + "IoU.truck": 0.21239999771118165, + "IoU.tower": 0.3465999984741211, + "IoU.chandelier": 0.5829000091552734, + "IoU.awning": 0.3502000045776367, + "IoU.streetlight": 0.19049999237060547, + "IoU.booth": 0.4129000091552734, + "IoU.television receiver": 0.6090000152587891, + "IoU.airplane": 0.545, + "IoU.dirt track": 0.21399999618530274, + "IoU.apparel": 0.3536000061035156, + "IoU.pole": 0.16049999237060547, + "IoU.land": 0.053499999046325686, + "IoU.bannister": 0.09039999961853028, + "IoU.escalator": 0.27700000762939453, + "IoU.ottoman": 0.4570000076293945, + "IoU.bottle": 0.35580001831054686, + "IoU.buffet": 0.6494999694824218, + "IoU.poster": 0.2680999946594238, + "IoU.stage": 0.13170000076293945, + "IoU.van": 0.318700008392334, + "IoU.ship": 0.7533000183105468, + "IoU.fountain": 0.21049999237060546, + "IoU.conveyer belt": 0.6487000274658203, + "IoU.canopy": 0.2695999908447266, + "IoU.washer": 0.67, + "IoU.plaything": 0.241200008392334, + "IoU.swimming pool": 0.5116999816894531, + "IoU.stool": 0.3018000030517578, + "IoU.barrel": 0.42529998779296874, + "IoU.basket": 0.22459999084472657, + "IoU.waterfall": 0.5202999877929687, + "IoU.tent": 0.8125, + "IoU.bag": 0.0840999984741211, + "IoU.minibike": 0.5327000045776367, + "IoU.cradle": 0.8051000213623047, + "IoU.oven": 0.18629999160766603, + "IoU.ball": 0.3906999969482422, + "IoU.food": 0.5379999923706055, + "IoU.step": 0.14319999694824218, + "IoU.tank": 0.494900016784668, + "IoU.trade name": 0.24629999160766602, + "IoU.microwave": 0.3493000030517578, + "IoU.pot": 0.38860000610351564, + "IoU.animal": 0.5736000061035156, + "IoU.bicycle": 0.47630001068115235, + "IoU.lake": 0.5370000076293945, + "IoU.dishwasher": 0.48529998779296873, + "IoU.screen": 0.6422000122070313, + "IoU.blanket": 0.12170000076293945, + "IoU.sculpture": 0.43090000152587893, + "IoU.hood": 0.48630001068115236, + "IoU.sconce": 0.3065999984741211, + "IoU.vase": 0.2631999969482422, + "IoU.traffic light": 0.24790000915527344, + "IoU.tray": 0.03380000114440918, + "IoU.ashcan": 0.3240000152587891, + "IoU.fan": 0.45119998931884764, + "IoU.pier": 0.26209999084472657, + "IoU.crt screen": 0.03480000019073486, + "IoU.plate": 0.45220001220703127, + "IoU.monitor": 0.02690000057220459, + "IoU.bulletin board": 0.4231000137329102, + "IoU.shower": 0.009900000095367432, + "IoU.radiator": 0.5597999954223633, + "IoU.glass": 0.07639999866485596, + "IoU.clock": 0.23579999923706055, + "IoU.flag": 0.4152000045776367, + "Acc.wall": 0.8666000366210938, + "Acc.building": 0.9231999969482422, + "Acc.sky": 0.9605000305175782, + "Acc.floor": 0.8837000274658203, + "Acc.tree": 0.8929000091552735, + "Acc.ceiling": 0.8947000122070312, + "Acc.road": 0.8875, + "Acc.bed ": 0.9583999633789062, + "Acc.windowpane": 0.7705000305175781, + "Acc.grass": 0.8190000152587891, + "Acc.cabinet": 0.7483999633789062, + "Acc.sidewalk": 0.8068000030517578, + "Acc.person": 0.9144000244140625, + "Acc.earth": 0.4856000137329102, + "Acc.door": 0.620099983215332, + "Acc.table": 0.6891000366210938, + "Acc.mountain": 0.7662999725341797, + "Acc.plant": 0.6079999923706054, + "Acc.curtain": 0.8533000183105469, + "Acc.chair": 0.6883999633789063, + "Acc.car": 0.9079000091552735, + "Acc.water": 0.6927999877929687, + "Acc.painting": 0.8718000030517579, + "Acc.sofa": 0.8331999969482422, + "Acc.shelf": 0.5950999832153321, + "Acc.house": 0.6483999633789063, + "Acc.sea": 0.8180999755859375, + "Acc.mirror": 0.7270999908447265, + "Acc.rug": 0.8080999755859375, + "Acc.field": 0.4638999938964844, + "Acc.armchair": 0.5754999923706055, + "Acc.seat": 0.8213999938964843, + "Acc.fence": 0.5654000091552734, + "Acc.desk": 0.7506999969482422, + "Acc.rock": 0.6136999893188476, + "Acc.wardrobe": 0.7352999877929688, + "Acc.lamp": 0.7161000061035157, + "Acc.bathtub": 0.7959999847412109, + "Acc.railing": 0.4418000030517578, + "Acc.cushion": 0.6648000335693359, + "Acc.base": 0.5588999938964844, + "Acc.box": 0.2996999931335449, + "Acc.column": 0.58, + "Acc.signboard": 0.4358000183105469, + "Acc.chest of drawers": 0.6047000122070313, + "Acc.counter": 0.3231999969482422, + "Acc.sand": 0.615099983215332, + "Acc.sink": 0.7556999969482422, + "Acc.skyscraper": 0.7191000366210938, + "Acc.fireplace": 0.8776999664306641, + "Acc.refrigerator": 0.8523999786376953, + "Acc.grandstand": 0.7290000152587891, + "Acc.path": 0.385, + "Acc.stairs": 0.4031999969482422, + "Acc.runway": 0.9222000122070313, + "Acc.case": 0.6720999908447266, + "Acc.pool table": 0.9762999725341797, + "Acc.pillow": 0.6580000305175782, + "Acc.screen door": 0.767300033569336, + "Acc.stairway": 0.3890999984741211, + "Acc.river": 0.42830001831054687, + "Acc.bridge": 0.8458999633789063, + "Acc.bookcase": 0.479900016784668, + "Acc.blind": 0.48069999694824217, + "Acc.coffee table": 0.8394000244140625, + "Acc.toilet": 0.9091000366210937, + "Acc.flower": 0.48880001068115236, + "Acc.book": 0.625099983215332, + "Acc.hill": 0.24100000381469727, + "Acc.bench": 0.48009998321533204, + "Acc.countertop": 0.7445999908447266, + "Acc.stove": 0.8173000335693359, + "Acc.palm": 0.6968000030517578, + "Acc.kitchen island": 0.6573999786376953, + "Acc.computer": 0.8737000274658203, + "Acc.swivel chair": 0.5804000091552735, + "Acc.boat": 0.8569999694824219, + "Acc.bar": 0.7087000274658203, + "Acc.arcade machine": 0.9201000213623047, + "Acc.hovel": 0.6405999755859375, + "Acc.bus": 0.9011000061035156, + "Acc.towel": 0.7261000061035157, + "Acc.light": 0.4809000015258789, + "Acc.truck": 0.32479999542236326, + "Acc.tower": 0.5429000091552735, + "Acc.chandelier": 0.729800033569336, + "Acc.awning": 0.4254999923706055, + "Acc.streetlight": 0.27469999313354493, + "Acc.booth": 0.4752000045776367, + "Acc.television receiver": 0.7668000030517578, + "Acc.airplane": 0.6498999786376953, + "Acc.dirt track": 0.37270000457763675, + "Acc.apparel": 0.4931000137329102, + "Acc.pole": 0.201299991607666, + "Acc.land": 0.11079999923706055, + "Acc.bannister": 0.1468000030517578, + "Acc.escalator": 0.31610000610351563, + "Acc.ottoman": 0.6379000091552735, + "Acc.bottle": 0.5456999969482422, + "Acc.buffet": 0.8280000305175781, + "Acc.poster": 0.3160000038146973, + "Acc.stage": 0.3390999984741211, + "Acc.van": 0.3833000183105469, + "Acc.ship": 0.7662999725341797, + "Acc.fountain": 0.21610000610351562, + "Acc.conveyer belt": 0.9255999755859375, + "Acc.canopy": 0.30809999465942384, + "Acc.washer": 0.6895999908447266, + "Acc.plaything": 0.34599998474121096, + "Acc.swimming pool": 0.8086000061035157, + "Acc.stool": 0.4356999969482422, + "Acc.barrel": 0.650199966430664, + "Acc.basket": 0.3127000045776367, + "Acc.waterfall": 0.6168999862670899, + "Acc.tent": 0.9933999633789062, + "Acc.bag": 0.08949999809265137, + "Acc.minibike": 0.6381000137329101, + "Acc.cradle": 0.9755000305175782, + "Acc.oven": 0.49590000152587893, + "Acc.ball": 0.5204999923706055, + "Acc.food": 0.6020000076293945, + "Acc.step": 0.17190000534057617, + "Acc.tank": 0.5868000030517578, + "Acc.trade name": 0.27540000915527346, + "Acc.microwave": 0.39380001068115233, + "Acc.pot": 0.4670999908447266, + "Acc.animal": 0.6325, + "Acc.bicycle": 0.7391999816894531, + "Acc.lake": 0.7565000152587891, + "Acc.dishwasher": 0.6347000122070312, + "Acc.screen": 0.8591999816894531, + "Acc.blanket": 0.1325, + "Acc.sculpture": 0.6093000030517578, + "Acc.hood": 0.5943000030517578, + "Acc.sconce": 0.41330001831054686, + "Acc.vase": 0.4570999908447266, + "Acc.traffic light": 0.5027000045776367, + "Acc.tray": 0.048899998664855955, + "Acc.ashcan": 0.44349998474121094, + "Acc.fan": 0.7452999877929688, + "Acc.pier": 0.716500015258789, + "Acc.crt screen": 0.11140000343322753, + "Acc.plate": 0.605999984741211, + "Acc.monitor": 0.028199999332427977, + "Acc.bulletin board": 0.6937999725341797, + "Acc.shower": 0.03369999885559082, + "Acc.radiator": 0.6666999816894531, + "Acc.glass": 0.08430000305175782, + "Acc.clock": 0.28030000686645506, + "Acc.flag": 0.47299999237060547 + } + }, + "56": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8195999999999999, + "mIoU": 0.4539, + "mAcc": 0.5882999999999999, + "IoU.wall": 0.7608999633789062, + "IoU.building": 0.8255999755859375, + "IoU.sky": 0.9412999725341797, + "IoU.floor": 0.8080999755859375, + "IoU.tree": 0.7398999786376953, + "IoU.ceiling": 0.8308000183105468, + "IoU.road": 0.8188999938964844, + "IoU.bed ": 0.8705999755859375, + "IoU.windowpane": 0.6063999938964844, + "IoU.grass": 0.6668000030517578, + "IoU.cabinet": 0.5977999877929687, + "IoU.sidewalk": 0.638499984741211, + "IoU.person": 0.7891000366210937, + "IoU.earth": 0.35400001525878905, + "IoU.door": 0.487400016784668, + "IoU.table": 0.548499984741211, + "IoU.mountain": 0.6186999893188476, + "IoU.plant": 0.5072000122070313, + "IoU.curtain": 0.7201000213623047, + "IoU.chair": 0.5147999954223633, + "IoU.car": 0.8012000274658203, + "IoU.water": 0.5356000137329101, + "IoU.painting": 0.6716000366210938, + "IoU.sofa": 0.6469999694824219, + "IoU.shelf": 0.4315999984741211, + "IoU.house": 0.4493999862670898, + "IoU.sea": 0.5640999984741211, + "IoU.mirror": 0.6286000061035156, + "IoU.rug": 0.6716999816894531, + "IoU.field": 0.26780000686645505, + "IoU.armchair": 0.39540000915527346, + "IoU.seat": 0.5934000015258789, + "IoU.fence": 0.4191999816894531, + "IoU.desk": 0.4065999984741211, + "IoU.rock": 0.42779998779296874, + "IoU.wardrobe": 0.5258000183105469, + "IoU.lamp": 0.5159000015258789, + "IoU.bathtub": 0.7212999725341797, + "IoU.railing": 0.32549999237060545, + "IoU.cushion": 0.5309999847412109, + "IoU.base": 0.31079999923706053, + "IoU.box": 0.23299999237060548, + "IoU.column": 0.469900016784668, + "IoU.signboard": 0.34040000915527346, + "IoU.chest of drawers": 0.34549999237060547, + "IoU.counter": 0.23909999847412108, + "IoU.sand": 0.39169998168945314, + "IoU.sink": 0.6390999984741211, + "IoU.skyscraper": 0.512400016784668, + "IoU.fireplace": 0.678499984741211, + "IoU.refrigerator": 0.7329000091552734, + "IoU.grandstand": 0.38240001678466795, + "IoU.path": 0.26049999237060545, + "IoU.stairs": 0.30020000457763674, + "IoU.runway": 0.6975, + "IoU.case": 0.485, + "IoU.pool table": 0.8820999908447266, + "IoU.pillow": 0.5559000015258789, + "IoU.screen door": 0.5790000152587891, + "IoU.stairway": 0.30260000228881834, + "IoU.river": 0.12729999542236328, + "IoU.bridge": 0.6377999877929688, + "IoU.bookcase": 0.3308000183105469, + "IoU.blind": 0.3579999923706055, + "IoU.coffee table": 0.5109999847412109, + "IoU.toilet": 0.7758999633789062, + "IoU.flower": 0.3522999954223633, + "IoU.book": 0.4381999969482422, + "IoU.hill": 0.12420000076293945, + "IoU.bench": 0.4204999923706055, + "IoU.countertop": 0.5727999877929687, + "IoU.stove": 0.6595999908447265, + "IoU.palm": 0.48689998626708986, + "IoU.kitchen island": 0.3758000183105469, + "IoU.computer": 0.6769000244140625, + "IoU.swivel chair": 0.46279998779296877, + "IoU.boat": 0.6875, + "IoU.bar": 0.4506999969482422, + "IoU.arcade machine": 0.6931999969482422, + "IoU.hovel": 0.46939998626708984, + "IoU.bus": 0.7294999694824219, + "IoU.towel": 0.5408000183105469, + "IoU.light": 0.41959999084472654, + "IoU.truck": 0.19620000839233398, + "IoU.tower": 0.3327000045776367, + "IoU.chandelier": 0.5793999862670899, + "IoU.awning": 0.3383000183105469, + "IoU.streetlight": 0.18700000762939453, + "IoU.booth": 0.38529998779296876, + "IoU.television receiver": 0.5816999816894531, + "IoU.airplane": 0.5570999908447266, + "IoU.dirt track": 0.24979999542236328, + "IoU.apparel": 0.36009998321533204, + "IoU.pole": 0.17860000610351562, + "IoU.land": 0.049000000953674315, + "IoU.bannister": 0.07360000133514405, + "IoU.escalator": 0.4311000061035156, + "IoU.ottoman": 0.44729999542236326, + "IoU.bottle": 0.18219999313354493, + "IoU.buffet": 0.535099983215332, + "IoU.poster": 0.2221999931335449, + "IoU.stage": 0.11600000381469727, + "IoU.van": 0.38360000610351563, + "IoU.ship": 0.34630001068115235, + "IoU.fountain": 0.19610000610351563, + "IoU.conveyer belt": 0.6876999664306641, + "IoU.canopy": 0.22700000762939454, + "IoU.washer": 0.6463999938964844, + "IoU.plaything": 0.21770000457763672, + "IoU.swimming pool": 0.5161999893188477, + "IoU.stool": 0.28989999771118163, + "IoU.barrel": 0.11970000267028809, + "IoU.basket": 0.22040000915527344, + "IoU.waterfall": 0.5365000152587891, + "IoU.tent": 0.8501000213623047, + "IoU.bag": 0.08850000381469726, + "IoU.minibike": 0.6068999862670899, + "IoU.cradle": 0.8112000274658203, + "IoU.oven": 0.17600000381469727, + "IoU.ball": 0.3963999938964844, + "IoU.food": 0.5563000106811523, + "IoU.step": 0.13170000076293945, + "IoU.tank": 0.47970001220703123, + "IoU.trade name": 0.19270000457763672, + "IoU.microwave": 0.335099983215332, + "IoU.pot": 0.34110000610351565, + "IoU.animal": 0.5893999862670899, + "IoU.bicycle": 0.5370999908447266, + "IoU.lake": 0.37860000610351563, + "IoU.dishwasher": 0.537400016784668, + "IoU.screen": 0.6325, + "IoU.blanket": 0.14140000343322753, + "IoU.sculpture": 0.49759998321533205, + "IoU.hood": 0.4993000030517578, + "IoU.sconce": 0.2972999954223633, + "IoU.vase": 0.22450000762939454, + "IoU.traffic light": 0.25540000915527344, + "IoU.tray": 0.056700000762939455, + "IoU.ashcan": 0.3184000015258789, + "IoU.fan": 0.43380001068115237, + "IoU.pier": 0.2861000061035156, + "IoU.crt screen": 0.03609999895095825, + "IoU.plate": 0.45939998626708983, + "IoU.monitor": 0.03180000066757202, + "IoU.bulletin board": 0.4306999969482422, + "IoU.shower": 0.018799999952316283, + "IoU.radiator": 0.5502999877929687, + "IoU.glass": 0.06940000057220459, + "IoU.clock": 0.19850000381469726, + "IoU.flag": 0.4256999969482422, + "Acc.wall": 0.8618000030517579, + "Acc.building": 0.9272000122070313, + "Acc.sky": 0.9655000305175782, + "Acc.floor": 0.8888999938964843, + "Acc.tree": 0.8822000122070313, + "Acc.ceiling": 0.8895999908447265, + "Acc.road": 0.885, + "Acc.bed ": 0.9605000305175782, + "Acc.windowpane": 0.7716999816894531, + "Acc.grass": 0.826500015258789, + "Acc.cabinet": 0.7343000030517578, + "Acc.sidewalk": 0.8262000274658203, + "Acc.person": 0.914000015258789, + "Acc.earth": 0.47959999084472654, + "Acc.door": 0.6545999908447265, + "Acc.table": 0.6944999694824219, + "Acc.mountain": 0.7641999816894531, + "Acc.plant": 0.6038999938964844, + "Acc.curtain": 0.8604000091552735, + "Acc.chair": 0.6748000335693359, + "Acc.car": 0.9062999725341797, + "Acc.water": 0.6647000122070312, + "Acc.painting": 0.8841999816894531, + "Acc.sofa": 0.8027999877929688, + "Acc.shelf": 0.6129000091552734, + "Acc.house": 0.5579999923706055, + "Acc.sea": 0.7808000183105469, + "Acc.mirror": 0.7452999877929688, + "Acc.rug": 0.8002999877929687, + "Acc.field": 0.445, + "Acc.armchair": 0.6311000061035156, + "Acc.seat": 0.8116000366210937, + "Acc.fence": 0.610999984741211, + "Acc.desk": 0.7369999694824219, + "Acc.rock": 0.6293000030517578, + "Acc.wardrobe": 0.7238999938964844, + "Acc.lamp": 0.6926000213623047, + "Acc.bathtub": 0.7794000244140625, + "Acc.railing": 0.45310001373291015, + "Acc.cushion": 0.6462000274658203, + "Acc.base": 0.5568999862670898, + "Acc.box": 0.30639999389648437, + "Acc.column": 0.5938000106811523, + "Acc.signboard": 0.4559000015258789, + "Acc.chest of drawers": 0.5302000045776367, + "Acc.counter": 0.33279998779296877, + "Acc.sand": 0.5872999954223633, + "Acc.sink": 0.7420999908447266, + "Acc.skyscraper": 0.6430000305175781, + "Acc.fireplace": 0.8977999877929688, + "Acc.refrigerator": 0.8616000366210937, + "Acc.grandstand": 0.6987999725341797, + "Acc.path": 0.3643000030517578, + "Acc.stairs": 0.44560001373291014, + "Acc.runway": 0.9116999816894531, + "Acc.case": 0.6165999984741211, + "Acc.pool table": 0.9775, + "Acc.pillow": 0.6566000366210938, + "Acc.screen door": 0.7306999969482422, + "Acc.stairway": 0.39119998931884764, + "Acc.river": 0.25860000610351563, + "Acc.bridge": 0.819800033569336, + "Acc.bookcase": 0.5106999969482422, + "Acc.blind": 0.39599998474121095, + "Acc.coffee table": 0.8544999694824219, + "Acc.toilet": 0.8975, + "Acc.flower": 0.5540000152587891, + "Acc.book": 0.6572000122070313, + "Acc.hill": 0.2375, + "Acc.bench": 0.4956999969482422, + "Acc.countertop": 0.7391999816894531, + "Acc.stove": 0.825, + "Acc.palm": 0.7019000244140625, + "Acc.kitchen island": 0.7469999694824219, + "Acc.computer": 0.836500015258789, + "Acc.swivel chair": 0.6526000213623047, + "Acc.boat": 0.8322000122070312, + "Acc.bar": 0.6234000015258789, + "Acc.arcade machine": 0.8208000183105468, + "Acc.hovel": 0.5111999893188477, + "Acc.bus": 0.905, + "Acc.towel": 0.6962000274658203, + "Acc.light": 0.5154000091552734, + "Acc.truck": 0.29649999618530276, + "Acc.tower": 0.4906000137329102, + "Acc.chandelier": 0.7801000213623047, + "Acc.awning": 0.4102999877929687, + "Acc.streetlight": 0.26860000610351564, + "Acc.booth": 0.5431999969482422, + "Acc.television receiver": 0.7756999969482422, + "Acc.airplane": 0.6469999694824219, + "Acc.dirt track": 0.30209999084472655, + "Acc.apparel": 0.48939998626708986, + "Acc.pole": 0.22389999389648438, + "Acc.land": 0.1, + "Acc.bannister": 0.12640000343322755, + "Acc.escalator": 0.5650999832153321, + "Acc.ottoman": 0.6159000015258789, + "Acc.bottle": 0.23049999237060548, + "Acc.buffet": 0.6998999786376953, + "Acc.poster": 0.26079999923706054, + "Acc.stage": 0.3377000045776367, + "Acc.van": 0.45110000610351564, + "Acc.ship": 0.3615999984741211, + "Acc.fountain": 0.20040000915527345, + "Acc.conveyer belt": 0.885199966430664, + "Acc.canopy": 0.2869000053405762, + "Acc.washer": 0.6608000183105469, + "Acc.plaything": 0.3522999954223633, + "Acc.swimming pool": 0.6498999786376953, + "Acc.stool": 0.41049999237060547, + "Acc.barrel": 0.5811000061035156, + "Acc.basket": 0.27700000762939453, + "Acc.waterfall": 0.63, + "Acc.tent": 0.9940000152587891, + "Acc.bag": 0.09819999694824219, + "Acc.minibike": 0.7454000091552735, + "Acc.cradle": 0.9641000366210938, + "Acc.oven": 0.4615000152587891, + "Acc.ball": 0.4986000061035156, + "Acc.food": 0.6193999862670898, + "Acc.step": 0.15649999618530275, + "Acc.tank": 0.5656999969482421, + "Acc.trade name": 0.2153000068664551, + "Acc.microwave": 0.3693000030517578, + "Acc.pot": 0.4127999877929687, + "Acc.animal": 0.6711000061035156, + "Acc.bicycle": 0.6919999694824219, + "Acc.lake": 0.6606999969482422, + "Acc.dishwasher": 0.7083999633789062, + "Acc.screen": 0.821500015258789, + "Acc.blanket": 0.15720000267028808, + "Acc.sculpture": 0.6448000335693359, + "Acc.hood": 0.5675, + "Acc.sconce": 0.37540000915527344, + "Acc.vase": 0.4052000045776367, + "Acc.traffic light": 0.46950000762939453, + "Acc.tray": 0.08689999580383301, + "Acc.ashcan": 0.4575, + "Acc.fan": 0.714800033569336, + "Acc.pier": 0.5211000061035156, + "Acc.crt screen": 0.1084000015258789, + "Acc.plate": 0.6186000061035156, + "Acc.monitor": 0.037300000190734865, + "Acc.bulletin board": 0.6587000274658203, + "Acc.shower": 0.05289999961853027, + "Acc.radiator": 0.6413999938964844, + "Acc.glass": 0.07599999904632568, + "Acc.clock": 0.22670000076293945, + "Acc.flag": 0.4884999847412109 + } + }, + "57": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8217, + "mIoU": 0.46619999999999995, + "mAcc": 0.6, + "IoU.wall": 0.7625, + "IoU.building": 0.8266999816894531, + "IoU.sky": 0.9408000183105468, + "IoU.floor": 0.8102999877929687, + "IoU.tree": 0.7431999969482422, + "IoU.ceiling": 0.834800033569336, + "IoU.road": 0.8181999969482422, + "IoU.bed ": 0.8676000213623047, + "IoU.windowpane": 0.6034999847412109, + "IoU.grass": 0.6801000213623047, + "IoU.cabinet": 0.5924000167846679, + "IoU.sidewalk": 0.637400016784668, + "IoU.person": 0.7798999786376953, + "IoU.earth": 0.33990001678466797, + "IoU.door": 0.49509998321533205, + "IoU.table": 0.5666999816894531, + "IoU.mountain": 0.605099983215332, + "IoU.plant": 0.5070999908447266, + "IoU.curtain": 0.7223999786376953, + "IoU.chair": 0.5218999862670899, + "IoU.car": 0.8077999877929688, + "IoU.water": 0.5549000167846679, + "IoU.painting": 0.6886000061035156, + "IoU.sofa": 0.6462000274658203, + "IoU.shelf": 0.4231000137329102, + "IoU.house": 0.5029000091552734, + "IoU.sea": 0.6318000030517578, + "IoU.mirror": 0.6279000091552734, + "IoU.rug": 0.6691000366210937, + "IoU.field": 0.3031999969482422, + "IoU.armchair": 0.4040999984741211, + "IoU.seat": 0.5883000183105469, + "IoU.fence": 0.4179000091552734, + "IoU.desk": 0.4322999954223633, + "IoU.rock": 0.4165999984741211, + "IoU.wardrobe": 0.5225, + "IoU.lamp": 0.53, + "IoU.bathtub": 0.7348999786376953, + "IoU.railing": 0.33490001678466796, + "IoU.cushion": 0.52, + "IoU.base": 0.288700008392334, + "IoU.box": 0.23, + "IoU.column": 0.45919998168945314, + "IoU.signboard": 0.35009998321533203, + "IoU.chest of drawers": 0.3429000091552734, + "IoU.counter": 0.29459999084472654, + "IoU.sand": 0.36279998779296874, + "IoU.sink": 0.6523999786376953, + "IoU.skyscraper": 0.5133000183105468, + "IoU.fireplace": 0.6951000213623046, + "IoU.refrigerator": 0.7245999908447266, + "IoU.grandstand": 0.42150001525878905, + "IoU.path": 0.2759000015258789, + "IoU.stairs": 0.28579999923706056, + "IoU.runway": 0.6762000274658203, + "IoU.case": 0.489900016784668, + "IoU.pool table": 0.8873999786376953, + "IoU.pillow": 0.5743999862670899, + "IoU.screen door": 0.5022000122070313, + "IoU.stairway": 0.2830999946594238, + "IoU.river": 0.14649999618530274, + "IoU.bridge": 0.6833000183105469, + "IoU.bookcase": 0.3381999969482422, + "IoU.blind": 0.39470001220703127, + "IoU.coffee table": 0.5820000076293945, + "IoU.toilet": 0.7995999908447265, + "IoU.flower": 0.35220001220703123, + "IoU.book": 0.43509998321533205, + "IoU.hill": 0.11520000457763672, + "IoU.bench": 0.4154000091552734, + "IoU.countertop": 0.5559999847412109, + "IoU.stove": 0.6962000274658203, + "IoU.palm": 0.49009998321533205, + "IoU.kitchen island": 0.3615999984741211, + "IoU.computer": 0.7068000030517578, + "IoU.swivel chair": 0.5129999923706055, + "IoU.boat": 0.6698000335693359, + "IoU.bar": 0.5190999984741211, + "IoU.arcade machine": 0.7294000244140625, + "IoU.hovel": 0.52, + "IoU.bus": 0.7361000061035157, + "IoU.towel": 0.5659999847412109, + "IoU.light": 0.38610000610351564, + "IoU.truck": 0.205, + "IoU.tower": 0.24510000228881837, + "IoU.chandelier": 0.5970000076293945, + "IoU.awning": 0.322599983215332, + "IoU.streetlight": 0.206200008392334, + "IoU.booth": 0.3479000091552734, + "IoU.television receiver": 0.579900016784668, + "IoU.airplane": 0.5920999908447265, + "IoU.dirt track": 0.105600004196167, + "IoU.apparel": 0.3213999938964844, + "IoU.pole": 0.26680000305175783, + "IoU.land": 0.04639999866485596, + "IoU.bannister": 0.12510000228881835, + "IoU.escalator": 0.4102000045776367, + "IoU.ottoman": 0.46290000915527346, + "IoU.bottle": 0.30639999389648437, + "IoU.buffet": 0.5606999969482422, + "IoU.poster": 0.20280000686645508, + "IoU.stage": 0.12609999656677245, + "IoU.van": 0.37130001068115237, + "IoU.ship": 0.7643000030517578, + "IoU.fountain": 0.19989999771118164, + "IoU.conveyer belt": 0.6443000030517578, + "IoU.canopy": 0.21389999389648437, + "IoU.washer": 0.6555999755859375, + "IoU.plaything": 0.21069999694824218, + "IoU.swimming pool": 0.6375999832153321, + "IoU.stool": 0.3056999969482422, + "IoU.barrel": 0.38209999084472657, + "IoU.basket": 0.23549999237060548, + "IoU.waterfall": 0.5929000091552734, + "IoU.tent": 0.8911000061035156, + "IoU.bag": 0.13380000114440918, + "IoU.minibike": 0.5977999877929687, + "IoU.cradle": 0.8098000335693359, + "IoU.oven": 0.181200008392334, + "IoU.ball": 0.39529998779296877, + "IoU.food": 0.5318999862670899, + "IoU.step": 0.1413000011444092, + "IoU.tank": 0.4779000091552734, + "IoU.trade name": 0.2336000061035156, + "IoU.microwave": 0.33880001068115234, + "IoU.pot": 0.33669998168945314, + "IoU.animal": 0.6573999786376953, + "IoU.bicycle": 0.5236000061035156, + "IoU.lake": 0.5263999938964844, + "IoU.dishwasher": 0.5718000030517578, + "IoU.screen": 0.5638999938964844, + "IoU.blanket": 0.105, + "IoU.sculpture": 0.48619998931884767, + "IoU.hood": 0.4890999984741211, + "IoU.sconce": 0.36259998321533204, + "IoU.vase": 0.23420000076293945, + "IoU.traffic light": 0.26079999923706054, + "IoU.tray": 0.044800000190734865, + "IoU.ashcan": 0.3143000030517578, + "IoU.fan": 0.4786000061035156, + "IoU.pier": 0.4061000061035156, + "IoU.crt screen": 0.0007999999821186065, + "IoU.plate": 0.4343000030517578, + "IoU.monitor": 0.022200000286102296, + "IoU.bulletin board": 0.4025, + "IoU.shower": 0.005899999737739563, + "IoU.radiator": 0.5695999908447266, + "IoU.glass": 0.08770000457763671, + "IoU.clock": 0.23110000610351564, + "IoU.flag": 0.40240001678466797, + "Acc.wall": 0.867699966430664, + "Acc.building": 0.9226000213623047, + "Acc.sky": 0.9647000122070313, + "Acc.floor": 0.8905000305175781, + "Acc.tree": 0.8826000213623046, + "Acc.ceiling": 0.8976999664306641, + "Acc.road": 0.8919000244140625, + "Acc.bed ": 0.9595999908447266, + "Acc.windowpane": 0.7630000305175781, + "Acc.grass": 0.8168000030517578, + "Acc.cabinet": 0.7106999969482422, + "Acc.sidewalk": 0.8061000061035156, + "Acc.person": 0.9255000305175781, + "Acc.earth": 0.45360000610351564, + "Acc.door": 0.6562000274658203, + "Acc.table": 0.7195999908447266, + "Acc.mountain": 0.7719000244140625, + "Acc.plant": 0.6031999969482422, + "Acc.curtain": 0.8654000091552735, + "Acc.chair": 0.6855999755859375, + "Acc.car": 0.915, + "Acc.water": 0.6768000030517578, + "Acc.painting": 0.8623000335693359, + "Acc.sofa": 0.8137999725341797, + "Acc.shelf": 0.6127000045776367, + "Acc.house": 0.6143000030517578, + "Acc.sea": 0.822699966430664, + "Acc.mirror": 0.7362999725341797, + "Acc.rug": 0.73, + "Acc.field": 0.5154000091552734, + "Acc.armchair": 0.6272000122070313, + "Acc.seat": 0.829800033569336, + "Acc.fence": 0.6202999877929688, + "Acc.desk": 0.7073000335693359, + "Acc.rock": 0.6318999862670899, + "Acc.wardrobe": 0.7148999786376953, + "Acc.lamp": 0.6719000244140625, + "Acc.bathtub": 0.7859999847412109, + "Acc.railing": 0.4686000061035156, + "Acc.cushion": 0.6204000091552735, + "Acc.base": 0.46869998931884765, + "Acc.box": 0.28, + "Acc.column": 0.6043000030517578, + "Acc.signboard": 0.45630001068115233, + "Acc.chest of drawers": 0.5766999816894531, + "Acc.counter": 0.3863999938964844, + "Acc.sand": 0.5115000152587891, + "Acc.sink": 0.7581999969482421, + "Acc.skyscraper": 0.6527999877929688, + "Acc.fireplace": 0.9015000152587891, + "Acc.refrigerator": 0.8769999694824219, + "Acc.grandstand": 0.7022000122070312, + "Acc.path": 0.36689998626708986, + "Acc.stairs": 0.41689998626708985, + "Acc.runway": 0.9356999969482422, + "Acc.case": 0.7472000122070312, + "Acc.pool table": 0.9776000213623047, + "Acc.pillow": 0.7248999786376953, + "Acc.screen door": 0.6352000045776367, + "Acc.stairway": 0.35900001525878905, + "Acc.river": 0.36880001068115237, + "Acc.bridge": 0.845, + "Acc.bookcase": 0.571500015258789, + "Acc.blind": 0.44349998474121094, + "Acc.coffee table": 0.8112999725341797, + "Acc.toilet": 0.8958999633789062, + "Acc.flower": 0.5316999816894531, + "Acc.book": 0.6281000137329101, + "Acc.hill": 0.22940000534057617, + "Acc.bench": 0.5027000045776367, + "Acc.countertop": 0.7133999633789062, + "Acc.stove": 0.8329000091552734, + "Acc.palm": 0.6937000274658203, + "Acc.kitchen island": 0.7506999969482422, + "Acc.computer": 0.8634999847412109, + "Acc.swivel chair": 0.7255000305175782, + "Acc.boat": 0.7969000244140625, + "Acc.bar": 0.7269000244140625, + "Acc.arcade machine": 0.7863999938964844, + "Acc.hovel": 0.5936999893188477, + "Acc.bus": 0.9231999969482422, + "Acc.towel": 0.7023999786376953, + "Acc.light": 0.442599983215332, + "Acc.truck": 0.320099983215332, + "Acc.tower": 0.39169998168945314, + "Acc.chandelier": 0.7698000335693359, + "Acc.awning": 0.3813000106811523, + "Acc.streetlight": 0.26899999618530274, + "Acc.booth": 0.5613999938964844, + "Acc.television receiver": 0.7715000152587891, + "Acc.airplane": 0.6780999755859375, + "Acc.dirt track": 0.144399995803833, + "Acc.apparel": 0.4475, + "Acc.pole": 0.39110000610351564, + "Acc.land": 0.12300000190734864, + "Acc.bannister": 0.24100000381469727, + "Acc.escalator": 0.5563999938964844, + "Acc.ottoman": 0.5816999816894531, + "Acc.bottle": 0.465099983215332, + "Acc.buffet": 0.7483000183105468, + "Acc.poster": 0.24920000076293947, + "Acc.stage": 0.37689998626708987, + "Acc.van": 0.44099998474121094, + "Acc.ship": 0.8320999908447265, + "Acc.fountain": 0.20579999923706055, + "Acc.conveyer belt": 0.9027999877929688, + "Acc.canopy": 0.276299991607666, + "Acc.washer": 0.6576000213623047, + "Acc.plaything": 0.3163999938964844, + "Acc.swimming pool": 0.8065000152587891, + "Acc.stool": 0.42509998321533204, + "Acc.barrel": 0.634900016784668, + "Acc.basket": 0.31120000839233397, + "Acc.waterfall": 0.7056999969482421, + "Acc.tent": 0.9902999877929688, + "Acc.bag": 0.16639999389648438, + "Acc.minibike": 0.7626000213623046, + "Acc.cradle": 0.9712999725341797, + "Acc.oven": 0.48650001525878905, + "Acc.ball": 0.4988999938964844, + "Acc.food": 0.612599983215332, + "Acc.step": 0.17309999465942383, + "Acc.tank": 0.6131999969482422, + "Acc.trade name": 0.26930000305175783, + "Acc.microwave": 0.3793000030517578, + "Acc.pot": 0.402599983215332, + "Acc.animal": 0.7066000366210937, + "Acc.bicycle": 0.7172000122070312, + "Acc.lake": 0.6412000274658203, + "Acc.dishwasher": 0.6870999908447266, + "Acc.screen": 0.8458999633789063, + "Acc.blanket": 0.114399995803833, + "Acc.sculpture": 0.6281999969482421, + "Acc.hood": 0.5902999877929688, + "Acc.sconce": 0.4915999984741211, + "Acc.vase": 0.4070999908447266, + "Acc.traffic light": 0.42720001220703124, + "Acc.tray": 0.0719000005722046, + "Acc.ashcan": 0.45729999542236327, + "Acc.fan": 0.6816000366210937, + "Acc.pier": 0.7580000305175781, + "Acc.crt screen": 0.0025, + "Acc.plate": 0.5597000122070312, + "Acc.monitor": 0.024100000858306884, + "Acc.bulletin board": 0.562599983215332, + "Acc.shower": 0.041599998474121096, + "Acc.radiator": 0.6911000061035156, + "Acc.glass": 0.09819999694824219, + "Acc.clock": 0.2834000015258789, + "Acc.flag": 0.44529998779296875 + } + }, + "58": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8227, + "mIoU": 0.45990000000000003, + "mAcc": 0.5589999999999999, + "IoU.wall": 0.7594000244140625, + "IoU.building": 0.8256999969482421, + "IoU.sky": 0.939800033569336, + "IoU.floor": 0.8016000366210938, + "IoU.tree": 0.7373999786376954, + "IoU.ceiling": 0.8305999755859375, + "IoU.road": 0.8240000152587891, + "IoU.bed ": 0.8673000335693359, + "IoU.windowpane": 0.597599983215332, + "IoU.grass": 0.6725, + "IoU.cabinet": 0.5961000061035157, + "IoU.sidewalk": 0.6408999633789062, + "IoU.person": 0.785199966430664, + "IoU.earth": 0.3615999984741211, + "IoU.door": 0.48369998931884767, + "IoU.table": 0.5765999984741211, + "IoU.mountain": 0.5881999969482422, + "IoU.plant": 0.4990999984741211, + "IoU.curtain": 0.7287000274658203, + "IoU.chair": 0.5277000045776368, + "IoU.car": 0.822699966430664, + "IoU.water": 0.596599998474121, + "IoU.painting": 0.6938999938964844, + "IoU.sofa": 0.6444999694824218, + "IoU.shelf": 0.4284000015258789, + "IoU.house": 0.45860000610351564, + "IoU.sea": 0.639900016784668, + "IoU.mirror": 0.6247999954223633, + "IoU.rug": 0.6458999633789062, + "IoU.field": 0.2830999946594238, + "IoU.armchair": 0.39939998626708983, + "IoU.seat": 0.6081999969482422, + "IoU.fence": 0.4347999954223633, + "IoU.desk": 0.44919998168945313, + "IoU.rock": 0.42830001831054687, + "IoU.wardrobe": 0.5193999862670898, + "IoU.lamp": 0.5445000076293945, + "IoU.bathtub": 0.7208999633789063, + "IoU.railing": 0.3193000030517578, + "IoU.cushion": 0.5247999954223633, + "IoU.base": 0.2688999938964844, + "IoU.box": 0.253700008392334, + "IoU.column": 0.46450000762939453, + "IoU.signboard": 0.3277000045776367, + "IoU.chest of drawers": 0.33740001678466797, + "IoU.counter": 0.2478000068664551, + "IoU.sand": 0.3741999816894531, + "IoU.sink": 0.6637000274658204, + "IoU.skyscraper": 0.48080001831054686, + "IoU.fireplace": 0.6975, + "IoU.refrigerator": 0.774000015258789, + "IoU.grandstand": 0.42689998626708986, + "IoU.path": 0.2215999984741211, + "IoU.stairs": 0.29270000457763673, + "IoU.runway": 0.5940000152587891, + "IoU.case": 0.49020000457763674, + "IoU.pool table": 0.9197000122070312, + "IoU.pillow": 0.5375, + "IoU.screen door": 0.55, + "IoU.stairway": 0.3383000183105469, + "IoU.river": 0.17530000686645508, + "IoU.bridge": 0.7079000091552734, + "IoU.bookcase": 0.36290000915527343, + "IoU.blind": 0.3833000183105469, + "IoU.coffee table": 0.5997999954223633, + "IoU.toilet": 0.8204000091552734, + "IoU.flower": 0.34959999084472654, + "IoU.book": 0.4318000030517578, + "IoU.hill": 0.08489999771118165, + "IoU.bench": 0.4538999938964844, + "IoU.countertop": 0.49770000457763675, + "IoU.stove": 0.7094999694824219, + "IoU.palm": 0.44110000610351563, + "IoU.kitchen island": 0.37310001373291013, + "IoU.computer": 0.7197000122070313, + "IoU.swivel chair": 0.45689998626708983, + "IoU.boat": 0.6988999938964844, + "IoU.bar": 0.5072999954223633, + "IoU.arcade machine": 0.5366999816894531, + "IoU.hovel": 0.5056999969482422, + "IoU.bus": 0.8076000213623047, + "IoU.towel": 0.5663000106811523, + "IoU.light": 0.23549999237060548, + "IoU.truck": 0.22280000686645507, + "IoU.tower": 0.29, + "IoU.chandelier": 0.5863000106811523, + "IoU.awning": 0.2620000076293945, + "IoU.streetlight": 0.16209999084472657, + "IoU.booth": 0.3136000061035156, + "IoU.television receiver": 0.5736999893188477, + "IoU.airplane": 0.5725, + "IoU.dirt track": 0.07210000038146973, + "IoU.apparel": 0.36529998779296874, + "IoU.pole": 0.23079999923706054, + "IoU.land": 0.01309999942779541, + "IoU.bannister": 0.07019999980926514, + "IoU.escalator": 0.3618000030517578, + "IoU.ottoman": 0.4527000045776367, + "IoU.bottle": 0.32810001373291015, + "IoU.buffet": 0.3975, + "IoU.poster": 0.21520000457763672, + "IoU.stage": 0.1540999984741211, + "IoU.van": 0.3611000061035156, + "IoU.ship": 0.4306999969482422, + "IoU.fountain": 0.20010000228881836, + "IoU.conveyer belt": 0.7862000274658203, + "IoU.canopy": 0.23340000152587892, + "IoU.washer": 0.6925, + "IoU.plaything": 0.23790000915527343, + "IoU.swimming pool": 0.5416999816894531, + "IoU.stool": 0.28459999084472654, + "IoU.barrel": 0.5863999938964843, + "IoU.basket": 0.23100000381469726, + "IoU.waterfall": 0.6356999969482422, + "IoU.tent": 0.9409999847412109, + "IoU.bag": 0.08119999885559082, + "IoU.minibike": 0.555099983215332, + "IoU.cradle": 0.7997000122070312, + "IoU.oven": 0.20809999465942383, + "IoU.ball": 0.3958000183105469, + "IoU.food": 0.5316999816894531, + "IoU.step": 0.12270000457763672, + "IoU.tank": 0.5465000152587891, + "IoU.trade name": 0.16260000228881835, + "IoU.microwave": 0.31829999923706054, + "IoU.pot": 0.3436000061035156, + "IoU.animal": 0.6302000045776367, + "IoU.bicycle": 0.4665999984741211, + "IoU.lake": 0.5818999862670898, + "IoU.dishwasher": 0.5858000183105468, + "IoU.screen": 0.6851000213623046, + "IoU.blanket": 0.11489999771118165, + "IoU.sculpture": 0.4393000030517578, + "IoU.hood": 0.45790000915527346, + "IoU.sconce": 0.30989999771118165, + "IoU.vase": 0.26059999465942385, + "IoU.traffic light": 0.22459999084472657, + "IoU.tray": 0.008199999928474427, + "IoU.ashcan": 0.3763999938964844, + "IoU.fan": 0.4861000061035156, + "IoU.pier": 0.2940999984741211, + "IoU.crt screen": 0.0, + "IoU.plate": 0.4493999862670898, + "IoU.monitor": 0.081899995803833, + "IoU.bulletin board": 0.37509998321533206, + "IoU.shower": 0.00949999988079071, + "IoU.radiator": 0.4929999923706055, + "IoU.glass": 0.06860000133514405, + "IoU.clock": 0.20950000762939452, + "IoU.flag": 0.38599998474121094, + "Acc.wall": 0.893499984741211, + "Acc.building": 0.9323000335693359, + "Acc.sky": 0.977699966430664, + "Acc.floor": 0.9102999877929687, + "Acc.tree": 0.8748000335693359, + "Acc.ceiling": 0.9016999816894531, + "Acc.road": 0.9065000152587891, + "Acc.bed ": 0.9575, + "Acc.windowpane": 0.7376000213623047, + "Acc.grass": 0.8136000061035156, + "Acc.cabinet": 0.7434999847412109, + "Acc.sidewalk": 0.7909999847412109, + "Acc.person": 0.8956999969482422, + "Acc.earth": 0.5379000091552735, + "Acc.door": 0.6548999786376953, + "Acc.table": 0.7294000244140625, + "Acc.mountain": 0.7211000061035157, + "Acc.plant": 0.5879000091552734, + "Acc.curtain": 0.8180000305175781, + "Acc.chair": 0.6681999969482422, + "Acc.car": 0.8931999969482421, + "Acc.water": 0.7244000244140625, + "Acc.painting": 0.8352999877929688, + "Acc.sofa": 0.8012999725341797, + "Acc.shelf": 0.6040000152587891, + "Acc.house": 0.5422999954223633, + "Acc.sea": 0.7898999786376953, + "Acc.mirror": 0.685199966430664, + "Acc.rug": 0.6883999633789063, + "Acc.field": 0.4590000152587891, + "Acc.armchair": 0.6184999847412109, + "Acc.seat": 0.7955000305175781, + "Acc.fence": 0.5936999893188477, + "Acc.desk": 0.658499984741211, + "Acc.rock": 0.6136000061035156, + "Acc.wardrobe": 0.6701000213623047, + "Acc.lamp": 0.6437000274658203, + "Acc.bathtub": 0.7597000122070312, + "Acc.railing": 0.46540000915527346, + "Acc.cushion": 0.6093000030517578, + "Acc.base": 0.46240001678466797, + "Acc.box": 0.3281999969482422, + "Acc.column": 0.5970999908447265, + "Acc.signboard": 0.40889999389648435, + "Acc.chest of drawers": 0.5622999954223633, + "Acc.counter": 0.32880001068115233, + "Acc.sand": 0.5015999984741211, + "Acc.sink": 0.706500015258789, + "Acc.skyscraper": 0.57, + "Acc.fireplace": 0.8688999938964844, + "Acc.refrigerator": 0.8494000244140625, + "Acc.grandstand": 0.7163999938964843, + "Acc.path": 0.28600000381469726, + "Acc.stairs": 0.395099983215332, + "Acc.runway": 0.7944000244140625, + "Acc.case": 0.6458999633789062, + "Acc.pool table": 0.9601000213623047, + "Acc.pillow": 0.6222000122070312, + "Acc.screen door": 0.6433000183105468, + "Acc.stairway": 0.44110000610351563, + "Acc.river": 0.41150001525878904, + "Acc.bridge": 0.8344999694824219, + "Acc.bookcase": 0.5902000045776368, + "Acc.blind": 0.4165999984741211, + "Acc.coffee table": 0.7733000183105468, + "Acc.toilet": 0.8798999786376953, + "Acc.flower": 0.47950000762939454, + "Acc.book": 0.5518999862670898, + "Acc.hill": 0.154399995803833, + "Acc.bench": 0.5261999893188477, + "Acc.countertop": 0.6598999786376953, + "Acc.stove": 0.7818000030517578, + "Acc.palm": 0.5545999908447266, + "Acc.kitchen island": 0.592599983215332, + "Acc.computer": 0.8298999786376953, + "Acc.swivel chair": 0.5527999877929688, + "Acc.boat": 0.8119000244140625, + "Acc.bar": 0.694800033569336, + "Acc.arcade machine": 0.5825, + "Acc.hovel": 0.547400016784668, + "Acc.bus": 0.9163999938964844, + "Acc.towel": 0.6505999755859375, + "Acc.light": 0.24600000381469728, + "Acc.truck": 0.3040999984741211, + "Acc.tower": 0.3990000152587891, + "Acc.chandelier": 0.7112000274658203, + "Acc.awning": 0.2801000022888184, + "Acc.streetlight": 0.18420000076293946, + "Acc.booth": 0.4290999984741211, + "Acc.television receiver": 0.710999984741211, + "Acc.airplane": 0.614900016784668, + "Acc.dirt track": 0.08329999923706055, + "Acc.apparel": 0.48689998626708986, + "Acc.pole": 0.3078000068664551, + "Acc.land": 0.022799999713897706, + "Acc.bannister": 0.09439999580383301, + "Acc.escalator": 0.4315999984741211, + "Acc.ottoman": 0.5429000091552735, + "Acc.bottle": 0.48709999084472655, + "Acc.buffet": 0.46029998779296877, + "Acc.poster": 0.2890999984741211, + "Acc.stage": 0.33220001220703127, + "Acc.van": 0.42060001373291017, + "Acc.ship": 0.4488999938964844, + "Acc.fountain": 0.20350000381469727, + "Acc.conveyer belt": 0.8744999694824219, + "Acc.canopy": 0.3006999969482422, + "Acc.washer": 0.694800033569336, + "Acc.plaything": 0.35770000457763673, + "Acc.swimming pool": 0.6291999816894531, + "Acc.stool": 0.34580001831054685, + "Acc.barrel": 0.6245000076293945, + "Acc.basket": 0.2688999938964844, + "Acc.waterfall": 0.6858999633789062, + "Acc.tent": 0.9894999694824219, + "Acc.bag": 0.08739999771118163, + "Acc.minibike": 0.6458000183105469, + "Acc.cradle": 0.9588999938964844, + "Acc.oven": 0.5884999847412109, + "Acc.ball": 0.4527000045776367, + "Acc.food": 0.6090999984741211, + "Acc.step": 0.13760000228881836, + "Acc.tank": 0.6168000030517579, + "Acc.trade name": 0.17389999389648436, + "Acc.microwave": 0.3308000183105469, + "Acc.pot": 0.38919998168945313, + "Acc.animal": 0.6562999725341797, + "Acc.bicycle": 0.6313999938964844, + "Acc.lake": 0.6816000366210937, + "Acc.dishwasher": 0.6587000274658203, + "Acc.screen": 0.877300033569336, + "Acc.blanket": 0.12369999885559083, + "Acc.sculpture": 0.579900016784668, + "Acc.hood": 0.4881999969482422, + "Acc.sconce": 0.3527000045776367, + "Acc.vase": 0.36200000762939455, + "Acc.traffic light": 0.2979000091552734, + "Acc.tray": 0.01100000023841858, + "Acc.ashcan": 0.5441999816894532, + "Acc.fan": 0.581500015258789, + "Acc.pier": 0.4186000061035156, + "Acc.crt screen": 0.0, + "Acc.plate": 0.5225999832153321, + "Acc.monitor": 0.088100004196167, + "Acc.bulletin board": 0.46430000305175784, + "Acc.shower": 0.012699999809265138, + "Acc.radiator": 0.5391999816894532, + "Acc.glass": 0.07380000114440918, + "Acc.clock": 0.2347999954223633, + "Acc.flag": 0.41380001068115235 + } + }, + "59": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8284999999999999, + "mIoU": 0.48969999999999997, + "mAcc": 0.6186, + "IoU.wall": 0.770199966430664, + "IoU.building": 0.8279000091552734, + "IoU.sky": 0.9373999786376953, + "IoU.floor": 0.8133000183105469, + "IoU.tree": 0.74, + "IoU.ceiling": 0.8323000335693359, + "IoU.road": 0.8280000305175781, + "IoU.bed ": 0.8812999725341797, + "IoU.windowpane": 0.6140999984741211, + "IoU.grass": 0.7030999755859375, + "IoU.cabinet": 0.591500015258789, + "IoU.sidewalk": 0.6498000335693359, + "IoU.person": 0.7969000244140625, + "IoU.earth": 0.3741999816894531, + "IoU.door": 0.4740999984741211, + "IoU.table": 0.5754000091552735, + "IoU.mountain": 0.597599983215332, + "IoU.plant": 0.500999984741211, + "IoU.curtain": 0.7220999908447265, + "IoU.chair": 0.5583000183105469, + "IoU.car": 0.8327999877929687, + "IoU.water": 0.5840000152587891, + "IoU.painting": 0.7068000030517578, + "IoU.sofa": 0.6654000091552734, + "IoU.shelf": 0.43779998779296875, + "IoU.house": 0.49400001525878906, + "IoU.sea": 0.6598999786376953, + "IoU.mirror": 0.6415000152587891, + "IoU.rug": 0.6934999847412109, + "IoU.field": 0.32029998779296875, + "IoU.armchair": 0.45380001068115233, + "IoU.seat": 0.6437999725341796, + "IoU.fence": 0.4161000061035156, + "IoU.desk": 0.48580001831054687, + "IoU.rock": 0.5252999877929687, + "IoU.wardrobe": 0.5397999954223632, + "IoU.lamp": 0.5556999969482422, + "IoU.bathtub": 0.7995999908447265, + "IoU.railing": 0.3429000091552734, + "IoU.cushion": 0.5677999877929687, + "IoU.base": 0.34950000762939454, + "IoU.box": 0.25860000610351563, + "IoU.column": 0.4727999877929687, + "IoU.signboard": 0.3452999877929688, + "IoU.chest of drawers": 0.3110000038146973, + "IoU.counter": 0.30139999389648436, + "IoU.sand": 0.43259998321533205, + "IoU.sink": 0.7055000305175781, + "IoU.skyscraper": 0.5465000152587891, + "IoU.fireplace": 0.7286000061035156, + "IoU.refrigerator": 0.6837000274658203, + "IoU.grandstand": 0.49200000762939455, + "IoU.path": 0.268799991607666, + "IoU.stairs": 0.28170000076293944, + "IoU.runway": 0.7219999694824218, + "IoU.case": 0.5304000091552734, + "IoU.pool table": 0.9083999633789063, + "IoU.pillow": 0.5077000045776368, + "IoU.screen door": 0.5388999938964844, + "IoU.stairway": 0.3431999969482422, + "IoU.river": 0.18700000762939453, + "IoU.bridge": 0.519900016784668, + "IoU.bookcase": 0.347400016784668, + "IoU.blind": 0.3195000076293945, + "IoU.coffee table": 0.5313000106811523, + "IoU.toilet": 0.8405999755859375, + "IoU.flower": 0.38110000610351563, + "IoU.book": 0.4488999938964844, + "IoU.hill": 0.135, + "IoU.bench": 0.519900016784668, + "IoU.countertop": 0.5345999908447265, + "IoU.stove": 0.7405999755859375, + "IoU.palm": 0.4956999969482422, + "IoU.kitchen island": 0.3625, + "IoU.computer": 0.7437000274658203, + "IoU.swivel chair": 0.4916999816894531, + "IoU.boat": 0.7066000366210937, + "IoU.bar": 0.48119998931884767, + "IoU.arcade machine": 0.6912000274658203, + "IoU.hovel": 0.39689998626708983, + "IoU.bus": 0.8788999938964843, + "IoU.towel": 0.6379000091552735, + "IoU.light": 0.43290000915527344, + "IoU.truck": 0.3784000015258789, + "IoU.tower": 0.30620000839233397, + "IoU.chandelier": 0.6220999908447266, + "IoU.awning": 0.31659999847412107, + "IoU.streetlight": 0.20709999084472655, + "IoU.booth": 0.4590999984741211, + "IoU.television receiver": 0.6268000030517578, + "IoU.airplane": 0.5783000183105469, + "IoU.dirt track": 0.015, + "IoU.apparel": 0.3168000030517578, + "IoU.pole": 0.1568000030517578, + "IoU.land": 0.06570000171661378, + "IoU.bannister": 0.12739999771118163, + "IoU.escalator": 0.48209999084472654, + "IoU.ottoman": 0.40310001373291016, + "IoU.bottle": 0.18600000381469728, + "IoU.buffet": 0.5238999938964843, + "IoU.poster": 0.19969999313354492, + "IoU.stage": 0.18389999389648437, + "IoU.van": 0.36369998931884767, + "IoU.ship": 0.40200000762939453, + "IoU.fountain": 0.25010000228881835, + "IoU.conveyer belt": 0.6880000305175781, + "IoU.canopy": 0.23569999694824217, + "IoU.washer": 0.7526999664306641, + "IoU.plaything": 0.30829999923706053, + "IoU.swimming pool": 0.6102000045776367, + "IoU.stool": 0.3743000030517578, + "IoU.barrel": 0.6041999816894531, + "IoU.basket": 0.24360000610351562, + "IoU.waterfall": 0.6252999877929688, + "IoU.tent": 0.8755999755859375, + "IoU.bag": 0.15460000038146973, + "IoU.minibike": 0.6980000305175781, + "IoU.cradle": 0.7933999633789063, + "IoU.oven": 0.3716999816894531, + "IoU.ball": 0.48209999084472654, + "IoU.food": 0.422400016784668, + "IoU.step": 0.12869999885559083, + "IoU.tank": 0.5365000152587891, + "IoU.trade name": 0.1915999984741211, + "IoU.microwave": 0.7866999816894531, + "IoU.pot": 0.44150001525878907, + "IoU.animal": 0.6481999969482422, + "IoU.bicycle": 0.5661000061035156, + "IoU.lake": 0.6819999694824219, + "IoU.dishwasher": 0.6675, + "IoU.screen": 0.5127000045776368, + "IoU.blanket": 0.09819999694824219, + "IoU.sculpture": 0.653499984741211, + "IoU.hood": 0.49650001525878906, + "IoU.sconce": 0.29270000457763673, + "IoU.vase": 0.34110000610351565, + "IoU.traffic light": 0.2663999938964844, + "IoU.tray": 0.07119999885559082, + "IoU.ashcan": 0.4465999984741211, + "IoU.fan": 0.5177999877929688, + "IoU.pier": 0.1881999969482422, + "IoU.crt screen": 0.04070000171661377, + "IoU.plate": 0.4715999984741211, + "IoU.monitor": 0.16959999084472657, + "IoU.bulletin board": 0.47080001831054685, + "IoU.shower": 0.009900000095367432, + "IoU.radiator": 0.6213999938964844, + "IoU.glass": 0.11699999809265137, + "IoU.clock": 0.2909000015258789, + "IoU.flag": 0.6081000137329101, + "Acc.wall": 0.867699966430664, + "Acc.building": 0.9301000213623047, + "Acc.sky": 0.9616999816894531, + "Acc.floor": 0.8962000274658203, + "Acc.tree": 0.8891000366210937, + "Acc.ceiling": 0.902300033569336, + "Acc.road": 0.8876000213623046, + "Acc.bed ": 0.9683000183105469, + "Acc.windowpane": 0.762300033569336, + "Acc.grass": 0.8527999877929687, + "Acc.cabinet": 0.7244999694824219, + "Acc.sidewalk": 0.825, + "Acc.person": 0.9145999908447265, + "Acc.earth": 0.5391999816894532, + "Acc.door": 0.6318000030517578, + "Acc.table": 0.7338999938964844, + "Acc.mountain": 0.7293000030517578, + "Acc.plant": 0.5836000061035156, + "Acc.curtain": 0.8801999664306641, + "Acc.chair": 0.7063999938964843, + "Acc.car": 0.9241999816894532, + "Acc.water": 0.705, + "Acc.painting": 0.8526000213623047, + "Acc.sofa": 0.8083999633789063, + "Acc.shelf": 0.5845000076293946, + "Acc.house": 0.6815000152587891, + "Acc.sea": 0.8237000274658203, + "Acc.mirror": 0.7502999877929688, + "Acc.rug": 0.8058000183105469, + "Acc.field": 0.42580001831054687, + "Acc.armchair": 0.664000015258789, + "Acc.seat": 0.8348999786376953, + "Acc.fence": 0.5663999938964843, + "Acc.desk": 0.7619000244140625, + "Acc.rock": 0.677300033569336, + "Acc.wardrobe": 0.73, + "Acc.lamp": 0.7358000183105469, + "Acc.bathtub": 0.855, + "Acc.railing": 0.4581999969482422, + "Acc.cushion": 0.7320999908447265, + "Acc.base": 0.6061000061035157, + "Acc.box": 0.32880001068115233, + "Acc.column": 0.6002999877929688, + "Acc.signboard": 0.43200000762939456, + "Acc.chest of drawers": 0.6234999847412109, + "Acc.counter": 0.43689998626708987, + "Acc.sand": 0.6347000122070312, + "Acc.sink": 0.7694999694824218, + "Acc.skyscraper": 0.6629000091552735, + "Acc.fireplace": 0.9218000030517578, + "Acc.refrigerator": 0.8226000213623047, + "Acc.grandstand": 0.7538999938964843, + "Acc.path": 0.39310001373291015, + "Acc.stairs": 0.4134999847412109, + "Acc.runway": 0.9583000183105469, + "Acc.case": 0.7106999969482422, + "Acc.pool table": 0.9708000183105469, + "Acc.pillow": 0.5718000030517578, + "Acc.screen door": 0.6633000183105469, + "Acc.stairway": 0.48310001373291017, + "Acc.river": 0.4695999908447266, + "Acc.bridge": 0.7216000366210937, + "Acc.bookcase": 0.5322999954223633, + "Acc.blind": 0.36520000457763674, + "Acc.coffee table": 0.8708000183105469, + "Acc.toilet": 0.9055000305175781, + "Acc.flower": 0.5684999847412109, + "Acc.book": 0.6368999862670899, + "Acc.hill": 0.26049999237060545, + "Acc.bench": 0.620099983215332, + "Acc.countertop": 0.6862000274658203, + "Acc.stove": 0.8636000061035156, + "Acc.palm": 0.6969999694824218, + "Acc.kitchen island": 0.700999984741211, + "Acc.computer": 0.8845999908447265, + "Acc.swivel chair": 0.6354999923706055, + "Acc.boat": 0.8572000122070312, + "Acc.bar": 0.6911000061035156, + "Acc.arcade machine": 0.7812999725341797, + "Acc.hovel": 0.4038999938964844, + "Acc.bus": 0.9308000183105469, + "Acc.towel": 0.7505999755859375, + "Acc.light": 0.5191999816894531, + "Acc.truck": 0.48279998779296873, + "Acc.tower": 0.4584000015258789, + "Acc.chandelier": 0.7775, + "Acc.awning": 0.3809000015258789, + "Acc.streetlight": 0.28280000686645507, + "Acc.booth": 0.5931999969482422, + "Acc.television receiver": 0.7563999938964844, + "Acc.airplane": 0.6593000030517578, + "Acc.dirt track": 0.0325, + "Acc.apparel": 0.42830001831054687, + "Acc.pole": 0.20139999389648439, + "Acc.land": 0.1309000015258789, + "Acc.bannister": 0.1975, + "Acc.escalator": 0.6573999786376953, + "Acc.ottoman": 0.5913000106811523, + "Acc.bottle": 0.22149999618530272, + "Acc.buffet": 0.7283999633789062, + "Acc.poster": 0.24510000228881837, + "Acc.stage": 0.46, + "Acc.van": 0.41779998779296873, + "Acc.ship": 0.41400001525878904, + "Acc.fountain": 0.25379999160766603, + "Acc.conveyer belt": 0.9323999786376953, + "Acc.canopy": 0.325099983215332, + "Acc.washer": 0.7761000061035156, + "Acc.plaything": 0.4609000015258789, + "Acc.swimming pool": 0.8602999877929688, + "Acc.stool": 0.5109000015258789, + "Acc.barrel": 0.7926999664306641, + "Acc.basket": 0.35580001831054686, + "Acc.waterfall": 0.8829000091552734, + "Acc.tent": 0.9926000213623047, + "Acc.bag": 0.17200000762939452, + "Acc.minibike": 0.790199966430664, + "Acc.cradle": 0.9819000244140625, + "Acc.oven": 0.5081999969482421, + "Acc.ball": 0.5506999969482422, + "Acc.food": 0.45529998779296876, + "Acc.step": 0.1777000045776367, + "Acc.tank": 0.6452999877929687, + "Acc.trade name": 0.201299991607666, + "Acc.microwave": 0.8844999694824218, + "Acc.pot": 0.5245000076293945, + "Acc.animal": 0.6844999694824219, + "Acc.bicycle": 0.7158000183105468, + "Acc.lake": 0.7770999908447266, + "Acc.dishwasher": 0.7166999816894531, + "Acc.screen": 0.7222000122070312, + "Acc.blanket": 0.11239999771118164, + "Acc.sculpture": 0.7583999633789062, + "Acc.hood": 0.6751000213623047, + "Acc.sconce": 0.35560001373291017, + "Acc.vase": 0.5320000076293945, + "Acc.traffic light": 0.45470001220703127, + "Acc.tray": 0.09960000038146972, + "Acc.ashcan": 0.6147999954223633, + "Acc.fan": 0.7345999908447266, + "Acc.pier": 0.40099998474121096, + "Acc.crt screen": 0.12149999618530273, + "Acc.plate": 0.6443000030517578, + "Acc.monitor": 0.19860000610351564, + "Acc.bulletin board": 0.7241000366210938, + "Acc.shower": 0.05, + "Acc.radiator": 0.7205999755859375, + "Acc.glass": 0.125600004196167, + "Acc.clock": 0.3159000015258789, + "Acc.flag": 0.7012999725341796 + } + }, + "60": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8243, + "mIoU": 0.4782, + "mAcc": 0.6106, + "IoU.wall": 0.7622000122070313, + "IoU.building": 0.8288999938964844, + "IoU.sky": 0.9362000274658203, + "IoU.floor": 0.8141000366210938, + "IoU.tree": 0.7345999908447266, + "IoU.ceiling": 0.8301000213623047, + "IoU.road": 0.8247000122070313, + "IoU.bed ": 0.8708000183105469, + "IoU.windowpane": 0.6102999877929688, + "IoU.grass": 0.6840000152587891, + "IoU.cabinet": 0.586500015258789, + "IoU.sidewalk": 0.6476000213623047, + "IoU.person": 0.7958999633789062, + "IoU.earth": 0.365, + "IoU.door": 0.4818000030517578, + "IoU.table": 0.5775, + "IoU.mountain": 0.5945999908447266, + "IoU.plant": 0.5225999832153321, + "IoU.curtain": 0.730999984741211, + "IoU.chair": 0.5441999816894532, + "IoU.car": 0.8276000213623047, + "IoU.water": 0.5231000137329102, + "IoU.painting": 0.6991999816894531, + "IoU.sofa": 0.6526999664306641, + "IoU.shelf": 0.43340000152587893, + "IoU.house": 0.5336000061035157, + "IoU.sea": 0.6006999969482422, + "IoU.mirror": 0.6368999862670899, + "IoU.rug": 0.6995999908447266, + "IoU.field": 0.3134000015258789, + "IoU.armchair": 0.3827000045776367, + "IoU.seat": 0.640199966430664, + "IoU.fence": 0.3606999969482422, + "IoU.desk": 0.45849998474121095, + "IoU.rock": 0.46279998779296877, + "IoU.wardrobe": 0.5434000015258789, + "IoU.lamp": 0.5409999847412109, + "IoU.bathtub": 0.8158000183105468, + "IoU.railing": 0.33599998474121096, + "IoU.cushion": 0.5470000076293945, + "IoU.base": 0.3634000015258789, + "IoU.box": 0.251200008392334, + "IoU.column": 0.4734000015258789, + "IoU.signboard": 0.3425, + "IoU.chest of drawers": 0.3104999923706055, + "IoU.counter": 0.3390000152587891, + "IoU.sand": 0.43, + "IoU.sink": 0.6605999755859375, + "IoU.skyscraper": 0.6519999694824219, + "IoU.fireplace": 0.7101999664306641, + "IoU.refrigerator": 0.7134999847412109, + "IoU.grandstand": 0.39049999237060545, + "IoU.path": 0.25829999923706054, + "IoU.stairs": 0.2543000030517578, + "IoU.runway": 0.6888999938964844, + "IoU.case": 0.492400016784668, + "IoU.pool table": 0.914000015258789, + "IoU.pillow": 0.5236999893188476, + "IoU.screen door": 0.5677999877929687, + "IoU.stairway": 0.2669000053405762, + "IoU.river": 0.12489999771118164, + "IoU.bridge": 0.6527999877929688, + "IoU.bookcase": 0.33349998474121095, + "IoU.blind": 0.3933000183105469, + "IoU.coffee table": 0.5183000183105468, + "IoU.toilet": 0.8344000244140625, + "IoU.flower": 0.34400001525878904, + "IoU.book": 0.45060001373291014, + "IoU.hill": 0.11670000076293946, + "IoU.bench": 0.4727999877929687, + "IoU.countertop": 0.507400016784668, + "IoU.stove": 0.7362999725341797, + "IoU.palm": 0.5041999816894531, + "IoU.kitchen island": 0.36259998321533204, + "IoU.computer": 0.7208999633789063, + "IoU.swivel chair": 0.42270000457763673, + "IoU.boat": 0.6229000091552734, + "IoU.bar": 0.5577000045776367, + "IoU.arcade machine": 0.7319000244140625, + "IoU.hovel": 0.4784000015258789, + "IoU.bus": 0.8608999633789063, + "IoU.towel": 0.5856000137329102, + "IoU.light": 0.40310001373291016, + "IoU.truck": 0.34200000762939453, + "IoU.tower": 0.2847999954223633, + "IoU.chandelier": 0.609900016784668, + "IoU.awning": 0.30440000534057615, + "IoU.streetlight": 0.20440000534057617, + "IoU.booth": 0.3377000045776367, + "IoU.television receiver": 0.6272000122070313, + "IoU.airplane": 0.5754999923706055, + "IoU.dirt track": 0.018600000143051146, + "IoU.apparel": 0.3106999969482422, + "IoU.pole": 0.1734000015258789, + "IoU.land": 0.04409999847412109, + "IoU.bannister": 0.099399995803833, + "IoU.escalator": 0.36189998626708986, + "IoU.ottoman": 0.4266999816894531, + "IoU.bottle": 0.17690000534057618, + "IoU.buffet": 0.629000015258789, + "IoU.poster": 0.2592000007629395, + "IoU.stage": 0.16280000686645507, + "IoU.van": 0.4361000061035156, + "IoU.ship": 0.8173999786376953, + "IoU.fountain": 0.20399999618530273, + "IoU.conveyer belt": 0.6730000305175782, + "IoU.canopy": 0.3256999969482422, + "IoU.washer": 0.6922000122070312, + "IoU.plaything": 0.2539999961853027, + "IoU.swimming pool": 0.6243000030517578, + "IoU.stool": 0.34040000915527346, + "IoU.barrel": 0.5866999816894531, + "IoU.basket": 0.22989999771118164, + "IoU.waterfall": 0.6179999923706054, + "IoU.tent": 0.8622000122070312, + "IoU.bag": 0.16899999618530273, + "IoU.minibike": 0.6712000274658203, + "IoU.cradle": 0.8168000030517578, + "IoU.oven": 0.20059999465942382, + "IoU.ball": 0.5097999954223633, + "IoU.food": 0.5272000122070313, + "IoU.step": 0.136899995803833, + "IoU.tank": 0.5329000091552735, + "IoU.trade name": 0.18079999923706055, + "IoU.microwave": 0.43040000915527343, + "IoU.pot": 0.34, + "IoU.animal": 0.5761999893188476, + "IoU.bicycle": 0.5497000122070312, + "IoU.lake": 0.5197000122070312, + "IoU.dishwasher": 0.6368999862670899, + "IoU.screen": 0.5359000015258789, + "IoU.blanket": 0.14819999694824218, + "IoU.sculpture": 0.6608999633789062, + "IoU.hood": 0.5075, + "IoU.sconce": 0.273799991607666, + "IoU.vase": 0.3234000015258789, + "IoU.traffic light": 0.25510000228881835, + "IoU.tray": 0.04639999866485596, + "IoU.ashcan": 0.4061000061035156, + "IoU.fan": 0.47700000762939454, + "IoU.pier": 0.20280000686645508, + "IoU.crt screen": 0.04309999942779541, + "IoU.plate": 0.48520000457763673, + "IoU.monitor": 0.08340000152587891, + "IoU.bulletin board": 0.4558000183105469, + "IoU.shower": 0.013799999952316283, + "IoU.radiator": 0.6138999938964844, + "IoU.glass": 0.10890000343322753, + "IoU.clock": 0.2777000045776367, + "IoU.flag": 0.3114999961853027, + "Acc.wall": 0.8691000366210937, + "Acc.building": 0.922300033569336, + "Acc.sky": 0.9601999664306641, + "Acc.floor": 0.8923999786376953, + "Acc.tree": 0.8859999847412109, + "Acc.ceiling": 0.889000015258789, + "Acc.road": 0.8826999664306641, + "Acc.bed ": 0.9622000122070312, + "Acc.windowpane": 0.7618000030517578, + "Acc.grass": 0.8387999725341797, + "Acc.cabinet": 0.7252999877929688, + "Acc.sidewalk": 0.8272000122070312, + "Acc.person": 0.9141999816894532, + "Acc.earth": 0.5054999923706055, + "Acc.door": 0.6513999938964844, + "Acc.table": 0.7193000030517578, + "Acc.mountain": 0.745, + "Acc.plant": 0.6097000122070313, + "Acc.curtain": 0.8734999847412109, + "Acc.chair": 0.6955000305175781, + "Acc.car": 0.9255999755859375, + "Acc.water": 0.6277999877929688, + "Acc.painting": 0.8370999908447265, + "Acc.sofa": 0.8319999694824218, + "Acc.shelf": 0.606500015258789, + "Acc.house": 0.7248999786376953, + "Acc.sea": 0.7751000213623047, + "Acc.mirror": 0.7493000030517578, + "Acc.rug": 0.7887000274658204, + "Acc.field": 0.4638999938964844, + "Acc.armchair": 0.5288000106811523, + "Acc.seat": 0.8347000122070313, + "Acc.fence": 0.5063999938964844, + "Acc.desk": 0.7743000030517578, + "Acc.rock": 0.6133000183105469, + "Acc.wardrobe": 0.7622000122070313, + "Acc.lamp": 0.7333000183105469, + "Acc.bathtub": 0.8868000030517578, + "Acc.railing": 0.4356999969482422, + "Acc.cushion": 0.6831999969482422, + "Acc.base": 0.6104000091552735, + "Acc.box": 0.31610000610351563, + "Acc.column": 0.5924000167846679, + "Acc.signboard": 0.43849998474121094, + "Acc.chest of drawers": 0.630099983215332, + "Acc.counter": 0.48650001525878905, + "Acc.sand": 0.6202999877929688, + "Acc.sink": 0.7505000305175781, + "Acc.skyscraper": 0.7718000030517578, + "Acc.fireplace": 0.9161000061035156, + "Acc.refrigerator": 0.8318000030517578, + "Acc.grandstand": 0.7362000274658204, + "Acc.path": 0.3761999893188477, + "Acc.stairs": 0.38860000610351564, + "Acc.runway": 0.9044999694824218, + "Acc.case": 0.6605000305175781, + "Acc.pool table": 0.9730999755859375, + "Acc.pillow": 0.6061000061035157, + "Acc.screen door": 0.7312000274658204, + "Acc.stairway": 0.3609999847412109, + "Acc.river": 0.3740999984741211, + "Acc.bridge": 0.7837999725341797, + "Acc.bookcase": 0.51, + "Acc.blind": 0.4540000152587891, + "Acc.coffee table": 0.8648999786376953, + "Acc.toilet": 0.9073999786376953, + "Acc.flower": 0.48080001831054686, + "Acc.book": 0.6416000366210938, + "Acc.hill": 0.266200008392334, + "Acc.bench": 0.6020000076293945, + "Acc.countertop": 0.643499984741211, + "Acc.stove": 0.8468000030517578, + "Acc.palm": 0.7497000122070312, + "Acc.kitchen island": 0.6716999816894531, + "Acc.computer": 0.892300033569336, + "Acc.swivel chair": 0.5495000076293945, + "Acc.boat": 0.8543000030517578, + "Acc.bar": 0.7738999938964843, + "Acc.arcade machine": 0.811500015258789, + "Acc.hovel": 0.5327000045776367, + "Acc.bus": 0.9448999786376953, + "Acc.towel": 0.773499984741211, + "Acc.light": 0.47689998626708985, + "Acc.truck": 0.4463999938964844, + "Acc.tower": 0.39110000610351564, + "Acc.chandelier": 0.7530000305175781, + "Acc.awning": 0.3643000030517578, + "Acc.streetlight": 0.2775, + "Acc.booth": 0.5145999908447265, + "Acc.television receiver": 0.7772000122070313, + "Acc.airplane": 0.6508999633789062, + "Acc.dirt track": 0.025799999237060545, + "Acc.apparel": 0.44279998779296875, + "Acc.pole": 0.21809999465942384, + "Acc.land": 0.08489999771118165, + "Acc.bannister": 0.144399995803833, + "Acc.escalator": 0.4613999938964844, + "Acc.ottoman": 0.6444999694824218, + "Acc.bottle": 0.21139999389648437, + "Acc.buffet": 0.8804000091552734, + "Acc.poster": 0.32599998474121095, + "Acc.stage": 0.43340000152587893, + "Acc.van": 0.5216999816894531, + "Acc.ship": 0.8498999786376953, + "Acc.fountain": 0.2052000045776367, + "Acc.conveyer belt": 0.9318000030517578, + "Acc.canopy": 0.40130001068115234, + "Acc.washer": 0.7625, + "Acc.plaything": 0.3877000045776367, + "Acc.swimming pool": 0.8351000213623047, + "Acc.stool": 0.48869998931884767, + "Acc.barrel": 0.7008999633789063, + "Acc.basket": 0.36720001220703125, + "Acc.waterfall": 0.8373999786376953, + "Acc.tent": 0.9931999969482422, + "Acc.bag": 0.1931999969482422, + "Acc.minibike": 0.7987999725341797, + "Acc.cradle": 0.9651999664306641, + "Acc.oven": 0.5270999908447266, + "Acc.ball": 0.6347000122070312, + "Acc.food": 0.5708000183105468, + "Acc.step": 0.17959999084472655, + "Acc.tank": 0.6359000015258789, + "Acc.trade name": 0.18899999618530272, + "Acc.microwave": 0.4875, + "Acc.pot": 0.4054000091552734, + "Acc.animal": 0.6233000183105468, + "Acc.bicycle": 0.7101000213623047, + "Acc.lake": 0.7427999877929687, + "Acc.dishwasher": 0.719800033569336, + "Acc.screen": 0.8056999969482422, + "Acc.blanket": 0.16700000762939454, + "Acc.sculpture": 0.8011000061035156, + "Acc.hood": 0.6168000030517579, + "Acc.sconce": 0.33380001068115234, + "Acc.vase": 0.5179000091552735, + "Acc.traffic light": 0.42689998626708986, + "Acc.tray": 0.06119999885559082, + "Acc.ashcan": 0.5543999862670899, + "Acc.fan": 0.6951999664306641, + "Acc.pier": 0.4584000015258789, + "Acc.crt screen": 0.11220000267028808, + "Acc.plate": 0.6480000305175782, + "Acc.monitor": 0.09279999732971192, + "Acc.bulletin board": 0.6898999786376954, + "Acc.shower": 0.05, + "Acc.radiator": 0.7156999969482422, + "Acc.glass": 0.11890000343322754, + "Acc.clock": 0.3334000015258789, + "Acc.flag": 0.34549999237060547 + } + }, + "61": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8236, + "mIoU": 0.46840000000000004, + "mAcc": 0.6048, + "IoU.wall": 0.7611000061035156, + "IoU.building": 0.8305999755859375, + "IoU.sky": 0.938499984741211, + "IoU.floor": 0.8119999694824219, + "IoU.tree": 0.7408000183105469, + "IoU.ceiling": 0.8333999633789062, + "IoU.road": 0.8208999633789062, + "IoU.bed ": 0.8713999938964844, + "IoU.windowpane": 0.6111000061035157, + "IoU.grass": 0.6676000213623047, + "IoU.cabinet": 0.6004000091552735, + "IoU.sidewalk": 0.6479000091552735, + "IoU.person": 0.7959999847412109, + "IoU.earth": 0.367400016784668, + "IoU.door": 0.4684000015258789, + "IoU.table": 0.55, + "IoU.mountain": 0.590900001525879, + "IoU.plant": 0.5272000122070313, + "IoU.curtain": 0.7281999969482422, + "IoU.chair": 0.5261999893188477, + "IoU.car": 0.8248999786376953, + "IoU.water": 0.5890999984741211, + "IoU.painting": 0.6918000030517578, + "IoU.sofa": 0.6512000274658203, + "IoU.shelf": 0.447599983215332, + "IoU.house": 0.505099983215332, + "IoU.sea": 0.6568000030517578, + "IoU.mirror": 0.6366999816894531, + "IoU.rug": 0.697699966430664, + "IoU.field": 0.2978000068664551, + "IoU.armchair": 0.36450000762939455, + "IoU.seat": 0.6361999893188477, + "IoU.fence": 0.32740001678466796, + "IoU.desk": 0.44279998779296875, + "IoU.rock": 0.4811000061035156, + "IoU.wardrobe": 0.5554000091552734, + "IoU.lamp": 0.5352999877929687, + "IoU.bathtub": 0.7563999938964844, + "IoU.railing": 0.35819999694824217, + "IoU.cushion": 0.5379000091552735, + "IoU.base": 0.337599983215332, + "IoU.box": 0.2559000015258789, + "IoU.column": 0.4647999954223633, + "IoU.signboard": 0.3502000045776367, + "IoU.chest of drawers": 0.3315999984741211, + "IoU.counter": 0.27260000228881837, + "IoU.sand": 0.40700000762939453, + "IoU.sink": 0.6766999816894531, + "IoU.skyscraper": 0.6651000213623047, + "IoU.fireplace": 0.6751000213623047, + "IoU.refrigerator": 0.7373000335693359, + "IoU.grandstand": 0.43709999084472656, + "IoU.path": 0.27600000381469725, + "IoU.stairs": 0.271200008392334, + "IoU.runway": 0.6736000061035157, + "IoU.case": 0.49509998321533205, + "IoU.pool table": 0.9208000183105469, + "IoU.pillow": 0.5577999877929688, + "IoU.screen door": 0.5831999969482422, + "IoU.stairway": 0.30200000762939455, + "IoU.river": 0.17920000076293946, + "IoU.bridge": 0.6365999984741211, + "IoU.bookcase": 0.3215000152587891, + "IoU.blind": 0.4268000030517578, + "IoU.coffee table": 0.5036999893188476, + "IoU.toilet": 0.7926999664306641, + "IoU.flower": 0.32919998168945314, + "IoU.book": 0.445, + "IoU.hill": 0.11340000152587891, + "IoU.bench": 0.41200000762939454, + "IoU.countertop": 0.5011000061035156, + "IoU.stove": 0.6948999786376953, + "IoU.palm": 0.4840999984741211, + "IoU.kitchen island": 0.308799991607666, + "IoU.computer": 0.7316999816894532, + "IoU.swivel chair": 0.41700000762939454, + "IoU.boat": 0.7076000213623047, + "IoU.bar": 0.5145000076293945, + "IoU.arcade machine": 0.7676999664306641, + "IoU.hovel": 0.5904999923706055, + "IoU.bus": 0.7872000122070313, + "IoU.towel": 0.587400016784668, + "IoU.light": 0.41200000762939454, + "IoU.truck": 0.3209000015258789, + "IoU.tower": 0.3572999954223633, + "IoU.chandelier": 0.6125, + "IoU.awning": 0.27360000610351565, + "IoU.streetlight": 0.19950000762939454, + "IoU.booth": 0.32990001678466796, + "IoU.television receiver": 0.6519999694824219, + "IoU.airplane": 0.5459999847412109, + "IoU.dirt track": 0.114399995803833, + "IoU.apparel": 0.34099998474121096, + "IoU.pole": 0.1372999954223633, + "IoU.land": 0.05269999980926514, + "IoU.bannister": 0.105600004196167, + "IoU.escalator": 0.29399999618530276, + "IoU.ottoman": 0.4316999816894531, + "IoU.bottle": 0.17600000381469727, + "IoU.buffet": 0.6705999755859375, + "IoU.poster": 0.2929000091552734, + "IoU.stage": 0.1331999969482422, + "IoU.van": 0.39180000305175783, + "IoU.ship": 0.31190000534057616, + "IoU.fountain": 0.20540000915527343, + "IoU.conveyer belt": 0.627400016784668, + "IoU.canopy": 0.31020000457763675, + "IoU.washer": 0.700999984741211, + "IoU.plaything": 0.2720000076293945, + "IoU.swimming pool": 0.6858000183105468, + "IoU.stool": 0.3118000030517578, + "IoU.barrel": 0.4818000030517578, + "IoU.basket": 0.21559999465942384, + "IoU.waterfall": 0.5066999816894531, + "IoU.tent": 0.8541000366210938, + "IoU.bag": 0.12460000038146973, + "IoU.minibike": 0.644000015258789, + "IoU.cradle": 0.792300033569336, + "IoU.oven": 0.17399999618530274, + "IoU.ball": 0.49650001525878906, + "IoU.food": 0.5193000030517578, + "IoU.step": 0.10430000305175781, + "IoU.tank": 0.5154000091552734, + "IoU.trade name": 0.2493000030517578, + "IoU.microwave": 0.36400001525878906, + "IoU.pot": 0.43759998321533206, + "IoU.animal": 0.6181000137329101, + "IoU.bicycle": 0.5161000061035156, + "IoU.lake": 0.447599983215332, + "IoU.dishwasher": 0.5179999923706055, + "IoU.screen": 0.48630001068115236, + "IoU.blanket": 0.14489999771118164, + "IoU.sculpture": 0.5540000152587891, + "IoU.hood": 0.47400001525878904, + "IoU.sconce": 0.28950000762939454, + "IoU.vase": 0.29739999771118164, + "IoU.traffic light": 0.21989999771118163, + "IoU.tray": 0.046799998283386234, + "IoU.ashcan": 0.34400001525878904, + "IoU.fan": 0.4613999938964844, + "IoU.pier": 0.2559000015258789, + "IoU.crt screen": 0.03490000009536743, + "IoU.plate": 0.44310001373291014, + "IoU.monitor": 0.025299999713897705, + "IoU.bulletin board": 0.43810001373291013, + "IoU.shower": 0.008999999761581421, + "IoU.radiator": 0.5879999923706055, + "IoU.glass": 0.10760000228881836, + "IoU.clock": 0.2043000030517578, + "IoU.flag": 0.3625, + "Acc.wall": 0.8647000122070313, + "Acc.building": 0.9237000274658204, + "Acc.sky": 0.961500015258789, + "Acc.floor": 0.890199966430664, + "Acc.tree": 0.8941999816894531, + "Acc.ceiling": 0.8944999694824218, + "Acc.road": 0.8856999969482422, + "Acc.bed ": 0.9594999694824219, + "Acc.windowpane": 0.7641000366210937, + "Acc.grass": 0.8220999908447265, + "Acc.cabinet": 0.7387999725341797, + "Acc.sidewalk": 0.8130999755859375, + "Acc.person": 0.9180999755859375, + "Acc.earth": 0.5084999847412109, + "Acc.door": 0.6247999954223633, + "Acc.table": 0.6954000091552734, + "Acc.mountain": 0.7369000244140625, + "Acc.plant": 0.6102000045776367, + "Acc.curtain": 0.8690000152587891, + "Acc.chair": 0.683499984741211, + "Acc.car": 0.9129000091552735, + "Acc.water": 0.7179000091552734, + "Acc.painting": 0.8691000366210937, + "Acc.sofa": 0.8426000213623047, + "Acc.shelf": 0.622599983215332, + "Acc.house": 0.6651000213623047, + "Acc.sea": 0.7930999755859375, + "Acc.mirror": 0.7415000152587891, + "Acc.rug": 0.7998000335693359, + "Acc.field": 0.5036000061035156, + "Acc.armchair": 0.5272000122070313, + "Acc.seat": 0.8373000335693359, + "Acc.fence": 0.44220001220703126, + "Acc.desk": 0.7498999786376953, + "Acc.rock": 0.6448999786376953, + "Acc.wardrobe": 0.7387999725341797, + "Acc.lamp": 0.7416000366210938, + "Acc.bathtub": 0.8194999694824219, + "Acc.railing": 0.4709000015258789, + "Acc.cushion": 0.659000015258789, + "Acc.base": 0.582400016784668, + "Acc.box": 0.33939998626708984, + "Acc.column": 0.5840999984741211, + "Acc.signboard": 0.45599998474121095, + "Acc.chest of drawers": 0.5941999816894531, + "Acc.counter": 0.3915999984741211, + "Acc.sand": 0.652300033569336, + "Acc.sink": 0.7605999755859375, + "Acc.skyscraper": 0.7906999969482422, + "Acc.fireplace": 0.9170999908447266, + "Acc.refrigerator": 0.8741000366210937, + "Acc.grandstand": 0.7741000366210937, + "Acc.path": 0.3927000045776367, + "Acc.stairs": 0.41259998321533203, + "Acc.runway": 0.8666000366210938, + "Acc.case": 0.6338999938964843, + "Acc.pool table": 0.9744999694824219, + "Acc.pillow": 0.6679000091552735, + "Acc.screen door": 0.7706999969482422, + "Acc.stairway": 0.41470001220703123, + "Acc.river": 0.42220001220703124, + "Acc.bridge": 0.8466000366210937, + "Acc.bookcase": 0.46849998474121096, + "Acc.blind": 0.5115000152587891, + "Acc.coffee table": 0.8451000213623047, + "Acc.toilet": 0.9054000091552734, + "Acc.flower": 0.4759999847412109, + "Acc.book": 0.6537999725341797, + "Acc.hill": 0.23530000686645508, + "Acc.bench": 0.5036999893188476, + "Acc.countertop": 0.6479000091552735, + "Acc.stove": 0.8152999877929688, + "Acc.palm": 0.7070999908447265, + "Acc.kitchen island": 0.6668000030517578, + "Acc.computer": 0.890199966430664, + "Acc.swivel chair": 0.5508000183105469, + "Acc.boat": 0.857699966430664, + "Acc.bar": 0.6975, + "Acc.arcade machine": 0.8955999755859375, + "Acc.hovel": 0.6848999786376954, + "Acc.bus": 0.9205999755859375, + "Acc.towel": 0.768499984741211, + "Acc.light": 0.5091999816894531, + "Acc.truck": 0.46919998168945315, + "Acc.tower": 0.5620999908447266, + "Acc.chandelier": 0.7536000061035156, + "Acc.awning": 0.33790000915527346, + "Acc.streetlight": 0.2876000022888184, + "Acc.booth": 0.4490999984741211, + "Acc.television receiver": 0.7943000030517579, + "Acc.airplane": 0.6547000122070312, + "Acc.dirt track": 0.21719999313354493, + "Acc.apparel": 0.49259998321533205, + "Acc.pole": 0.1725, + "Acc.land": 0.09899999618530274, + "Acc.bannister": 0.17989999771118165, + "Acc.escalator": 0.35279998779296873, + "Acc.ottoman": 0.6184999847412109, + "Acc.bottle": 0.22260000228881835, + "Acc.buffet": 0.8358999633789063, + "Acc.poster": 0.34049999237060546, + "Acc.stage": 0.32849998474121095, + "Acc.van": 0.48439998626708985, + "Acc.ship": 0.3189999961853027, + "Acc.fountain": 0.2153000068664551, + "Acc.conveyer belt": 0.9259999847412109, + "Acc.canopy": 0.34099998474121096, + "Acc.washer": 0.7295999908447266, + "Acc.plaything": 0.41569999694824217, + "Acc.swimming pool": 0.852699966430664, + "Acc.stool": 0.44240001678466795, + "Acc.barrel": 0.6480999755859375, + "Acc.basket": 0.2990999984741211, + "Acc.waterfall": 0.6370000076293946, + "Acc.tent": 0.9916000366210938, + "Acc.bag": 0.13399999618530273, + "Acc.minibike": 0.7616999816894531, + "Acc.cradle": 0.9641000366210938, + "Acc.oven": 0.4556999969482422, + "Acc.ball": 0.6562999725341797, + "Acc.food": 0.5918999862670898, + "Acc.step": 0.130600004196167, + "Acc.tank": 0.6190999984741211, + "Acc.trade name": 0.2745000076293945, + "Acc.microwave": 0.4129000091552734, + "Acc.pot": 0.5341999816894532, + "Acc.animal": 0.6680999755859375, + "Acc.bicycle": 0.7369000244140625, + "Acc.lake": 0.716500015258789, + "Acc.dishwasher": 0.6890000152587891, + "Acc.screen": 0.7180999755859375, + "Acc.blanket": 0.16950000762939454, + "Acc.sculpture": 0.7745999908447265, + "Acc.hood": 0.6052000045776367, + "Acc.sconce": 0.38599998474121094, + "Acc.vase": 0.47689998626708985, + "Acc.traffic light": 0.48400001525878905, + "Acc.tray": 0.06440000057220459, + "Acc.ashcan": 0.4647999954223633, + "Acc.fan": 0.7194999694824219, + "Acc.pier": 0.4836000061035156, + "Acc.crt screen": 0.1140999984741211, + "Acc.plate": 0.6518000030517578, + "Acc.monitor": 0.02690000057220459, + "Acc.bulletin board": 0.7623999786376953, + "Acc.shower": 0.03059999942779541, + "Acc.radiator": 0.7051000213623047, + "Acc.glass": 0.12050000190734864, + "Acc.clock": 0.2564999961853027, + "Acc.flag": 0.40130001068115234 + } + }, + "62": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8228, + "mIoU": 0.4619, + "mAcc": 0.5974, + "IoU.wall": 0.7604000091552734, + "IoU.building": 0.8252999877929688, + "IoU.sky": 0.9390000152587891, + "IoU.floor": 0.8095999908447266, + "IoU.tree": 0.7376000213623047, + "IoU.ceiling": 0.8330000305175781, + "IoU.road": 0.8172000122070312, + "IoU.bed ": 0.8704000091552735, + "IoU.windowpane": 0.6077999877929687, + "IoU.grass": 0.6887999725341797, + "IoU.cabinet": 0.5966999816894532, + "IoU.sidewalk": 0.6352000045776367, + "IoU.person": 0.7987999725341797, + "IoU.earth": 0.38720001220703126, + "IoU.door": 0.47970001220703123, + "IoU.table": 0.5740999984741211, + "IoU.mountain": 0.6136000061035156, + "IoU.plant": 0.5095999908447265, + "IoU.curtain": 0.731500015258789, + "IoU.chair": 0.5227000045776368, + "IoU.car": 0.8090000152587891, + "IoU.water": 0.5277000045776368, + "IoU.painting": 0.7023999786376953, + "IoU.sofa": 0.6602999877929687, + "IoU.shelf": 0.455099983215332, + "IoU.house": 0.46419998168945314, + "IoU.sea": 0.6336999893188476, + "IoU.mirror": 0.6379999923706055, + "IoU.rug": 0.6898999786376954, + "IoU.field": 0.3193000030517578, + "IoU.armchair": 0.38790000915527345, + "IoU.seat": 0.6476000213623047, + "IoU.fence": 0.4109999847412109, + "IoU.desk": 0.43, + "IoU.rock": 0.48279998779296873, + "IoU.wardrobe": 0.5411999893188476, + "IoU.lamp": 0.5293999862670898, + "IoU.bathtub": 0.752300033569336, + "IoU.railing": 0.365, + "IoU.cushion": 0.5358000183105469, + "IoU.base": 0.32380001068115233, + "IoU.box": 0.26799999237060546, + "IoU.column": 0.48470001220703124, + "IoU.signboard": 0.3525, + "IoU.chest of drawers": 0.3189999961853027, + "IoU.counter": 0.26629999160766604, + "IoU.sand": 0.42950000762939455, + "IoU.sink": 0.6343999862670898, + "IoU.skyscraper": 0.6052999877929688, + "IoU.fireplace": 0.6708000183105469, + "IoU.refrigerator": 0.7026000213623047, + "IoU.grandstand": 0.3883000183105469, + "IoU.path": 0.2713999938964844, + "IoU.stairs": 0.30870000839233397, + "IoU.runway": 0.7073000335693359, + "IoU.case": 0.4866999816894531, + "IoU.pool table": 0.8998000335693359, + "IoU.pillow": 0.5409999847412109, + "IoU.screen door": 0.5125, + "IoU.stairway": 0.3813000106811523, + "IoU.river": 0.13640000343322753, + "IoU.bridge": 0.5375, + "IoU.bookcase": 0.31549999237060544, + "IoU.blind": 0.405, + "IoU.coffee table": 0.514900016784668, + "IoU.toilet": 0.7766000366210938, + "IoU.flower": 0.3381999969482422, + "IoU.book": 0.44990001678466796, + "IoU.hill": 0.12670000076293944, + "IoU.bench": 0.48689998626708986, + "IoU.countertop": 0.5206000137329102, + "IoU.stove": 0.6609999847412109, + "IoU.palm": 0.45869998931884765, + "IoU.kitchen island": 0.3497999954223633, + "IoU.computer": 0.6462000274658203, + "IoU.swivel chair": 0.44479999542236326, + "IoU.boat": 0.7072000122070312, + "IoU.bar": 0.47310001373291016, + "IoU.arcade machine": 0.5461000061035156, + "IoU.hovel": 0.538499984741211, + "IoU.bus": 0.72, + "IoU.towel": 0.5604000091552734, + "IoU.light": 0.4281000137329102, + "IoU.truck": 0.2722999954223633, + "IoU.tower": 0.37270000457763675, + "IoU.chandelier": 0.5918999862670898, + "IoU.awning": 0.3452000045776367, + "IoU.streetlight": 0.19639999389648438, + "IoU.booth": 0.2802000045776367, + "IoU.television receiver": 0.6163000106811524, + "IoU.airplane": 0.5481000137329102, + "IoU.dirt track": 0.30239999771118165, + "IoU.apparel": 0.32060001373291014, + "IoU.pole": 0.13760000228881836, + "IoU.land": 0.133100004196167, + "IoU.bannister": 0.13350000381469726, + "IoU.escalator": 0.4122999954223633, + "IoU.ottoman": 0.4366999816894531, + "IoU.bottle": 0.18950000762939453, + "IoU.buffet": 0.6329000091552734, + "IoU.poster": 0.29600000381469727, + "IoU.stage": 0.115, + "IoU.van": 0.3681999969482422, + "IoU.ship": 0.30200000762939455, + "IoU.fountain": 0.195, + "IoU.conveyer belt": 0.6926000213623047, + "IoU.canopy": 0.2552000045776367, + "IoU.washer": 0.6629000091552735, + "IoU.plaything": 0.26440000534057617, + "IoU.swimming pool": 0.675, + "IoU.stool": 0.2989999961853027, + "IoU.barrel": 0.02319999933242798, + "IoU.basket": 0.225, + "IoU.waterfall": 0.5463000106811523, + "IoU.tent": 0.9062999725341797, + "IoU.bag": 0.11319999694824219, + "IoU.minibike": 0.6595999908447265, + "IoU.cradle": 0.7876999664306641, + "IoU.oven": 0.15380000114440917, + "IoU.ball": 0.4677000045776367, + "IoU.food": 0.5343999862670898, + "IoU.step": 0.07659999847412109, + "IoU.tank": 0.49630001068115237, + "IoU.trade name": 0.20459999084472658, + "IoU.microwave": 0.3541999816894531, + "IoU.pot": 0.3984000015258789, + "IoU.animal": 0.5790999984741211, + "IoU.bicycle": 0.5304999923706055, + "IoU.lake": 0.4697000122070312, + "IoU.dishwasher": 0.5218000030517578, + "IoU.screen": 0.5416999816894531, + "IoU.blanket": 0.12710000038146974, + "IoU.sculpture": 0.5847000122070313, + "IoU.hood": 0.495, + "IoU.sconce": 0.29760000228881833, + "IoU.vase": 0.2744000053405762, + "IoU.traffic light": 0.22110000610351563, + "IoU.tray": 0.048000001907348634, + "IoU.ashcan": 0.31989999771118166, + "IoU.fan": 0.44009998321533206, + "IoU.pier": 0.2618000030517578, + "IoU.crt screen": 0.03200000047683716, + "IoU.plate": 0.4590999984741211, + "IoU.monitor": 0.04519999980926514, + "IoU.bulletin board": 0.4383000183105469, + "IoU.shower": 0.017899999618530272, + "IoU.radiator": 0.5754000091552735, + "IoU.glass": 0.08369999885559082, + "IoU.clock": 0.1490999984741211, + "IoU.flag": 0.35619998931884767, + "Acc.wall": 0.86, + "Acc.building": 0.9247000122070312, + "Acc.sky": 0.9647000122070313, + "Acc.floor": 0.8918000030517578, + "Acc.tree": 0.8819000244140625, + "Acc.ceiling": 0.8894000244140625, + "Acc.road": 0.8823999786376953, + "Acc.bed ": 0.9602999877929688, + "Acc.windowpane": 0.7623999786376953, + "Acc.grass": 0.8455000305175782, + "Acc.cabinet": 0.7297000122070313, + "Acc.sidewalk": 0.8312999725341796, + "Acc.person": 0.9179000091552735, + "Acc.earth": 0.5179999923706055, + "Acc.door": 0.6522000122070313, + "Acc.table": 0.7148999786376953, + "Acc.mountain": 0.7451999664306641, + "Acc.plant": 0.586500015258789, + "Acc.curtain": 0.8805999755859375, + "Acc.chair": 0.6733000183105469, + "Acc.car": 0.9058000183105469, + "Acc.water": 0.6591000366210937, + "Acc.painting": 0.8815000152587891, + "Acc.sofa": 0.821500015258789, + "Acc.shelf": 0.6295999908447265, + "Acc.house": 0.6118999862670899, + "Acc.sea": 0.8747000122070312, + "Acc.mirror": 0.755, + "Acc.rug": 0.7833999633789063, + "Acc.field": 0.5143000030517578, + "Acc.armchair": 0.610999984741211, + "Acc.seat": 0.8241999816894531, + "Acc.fence": 0.5820999908447265, + "Acc.desk": 0.7548999786376953, + "Acc.rock": 0.697699966430664, + "Acc.wardrobe": 0.7413999938964844, + "Acc.lamp": 0.7213999938964843, + "Acc.bathtub": 0.8318000030517578, + "Acc.railing": 0.5020999908447266, + "Acc.cushion": 0.6479000091552735, + "Acc.base": 0.5702000045776368, + "Acc.box": 0.3443000030517578, + "Acc.column": 0.611599998474121, + "Acc.signboard": 0.46580001831054685, + "Acc.chest of drawers": 0.5684999847412109, + "Acc.counter": 0.3868000030517578, + "Acc.sand": 0.6183000183105469, + "Acc.sink": 0.7306999969482422, + "Acc.skyscraper": 0.7329000091552734, + "Acc.fireplace": 0.9180000305175782, + "Acc.refrigerator": 0.8555999755859375, + "Acc.grandstand": 0.7251999664306641, + "Acc.path": 0.36950000762939456, + "Acc.stairs": 0.4386999893188477, + "Acc.runway": 0.8438999938964844, + "Acc.case": 0.6245999908447266, + "Acc.pool table": 0.9783999633789062, + "Acc.pillow": 0.6427999877929688, + "Acc.screen door": 0.6856999969482422, + "Acc.stairway": 0.5040999984741211, + "Acc.river": 0.28280000686645507, + "Acc.bridge": 0.7362999725341797, + "Acc.bookcase": 0.5143000030517578, + "Acc.blind": 0.46700000762939453, + "Acc.coffee table": 0.8527999877929687, + "Acc.toilet": 0.9023999786376953, + "Acc.flower": 0.5658000183105468, + "Acc.book": 0.6687999725341797, + "Acc.hill": 0.2543000030517578, + "Acc.bench": 0.5629000091552734, + "Acc.countertop": 0.6618000030517578, + "Acc.stove": 0.8161000061035156, + "Acc.palm": 0.725, + "Acc.kitchen island": 0.7583000183105468, + "Acc.computer": 0.7694000244140625, + "Acc.swivel chair": 0.6252999877929688, + "Acc.boat": 0.8425, + "Acc.bar": 0.6286000061035156, + "Acc.arcade machine": 0.6234000015258789, + "Acc.hovel": 0.6108000183105469, + "Acc.bus": 0.9263999938964844, + "Acc.towel": 0.7147000122070313, + "Acc.light": 0.5404999923706054, + "Acc.truck": 0.4254999923706055, + "Acc.tower": 0.5763000106811523, + "Acc.chandelier": 0.790199966430664, + "Acc.awning": 0.4416999816894531, + "Acc.streetlight": 0.2788999938964844, + "Acc.booth": 0.467599983215332, + "Acc.television receiver": 0.7837999725341797, + "Acc.airplane": 0.654000015258789, + "Acc.dirt track": 0.33430000305175783, + "Acc.apparel": 0.4304999923706055, + "Acc.pole": 0.173700008392334, + "Acc.land": 0.24030000686645508, + "Acc.bannister": 0.1875, + "Acc.escalator": 0.5700999832153321, + "Acc.ottoman": 0.6088999938964844, + "Acc.bottle": 0.23780000686645508, + "Acc.buffet": 0.8329000091552734, + "Acc.poster": 0.36150001525878905, + "Acc.stage": 0.2954000091552734, + "Acc.van": 0.43759998321533206, + "Acc.ship": 0.3085000038146973, + "Acc.fountain": 0.19680000305175782, + "Acc.conveyer belt": 0.9490000152587891, + "Acc.canopy": 0.3139999961853027, + "Acc.washer": 0.674000015258789, + "Acc.plaything": 0.45, + "Acc.swimming pool": 0.7452999877929688, + "Acc.stool": 0.4127999877929687, + "Acc.barrel": 0.17719999313354493, + "Acc.basket": 0.2943000030517578, + "Acc.waterfall": 0.6359999847412109, + "Acc.tent": 0.9919999694824219, + "Acc.bag": 0.12850000381469726, + "Acc.minibike": 0.7983999633789063, + "Acc.cradle": 0.9612999725341796, + "Acc.oven": 0.41069999694824216, + "Acc.ball": 0.5777000045776367, + "Acc.food": 0.5900999832153321, + "Acc.step": 0.0925, + "Acc.tank": 0.5716999816894531, + "Acc.trade name": 0.22280000686645507, + "Acc.microwave": 0.39110000610351564, + "Acc.pot": 0.4834999847412109, + "Acc.animal": 0.6752999877929687, + "Acc.bicycle": 0.691500015258789, + "Acc.lake": 0.6648000335693359, + "Acc.dishwasher": 0.7112000274658203, + "Acc.screen": 0.7208000183105469, + "Acc.blanket": 0.146899995803833, + "Acc.sculpture": 0.774000015258789, + "Acc.hood": 0.5890999984741211, + "Acc.sconce": 0.3806999969482422, + "Acc.vase": 0.45549999237060546, + "Acc.traffic light": 0.45939998626708983, + "Acc.tray": 0.0653000020980835, + "Acc.ashcan": 0.4909000015258789, + "Acc.fan": 0.696500015258789, + "Acc.pier": 0.46560001373291016, + "Acc.crt screen": 0.09979999542236329, + "Acc.plate": 0.6926000213623047, + "Acc.monitor": 0.06360000133514404, + "Acc.bulletin board": 0.7181999969482422, + "Acc.shower": 0.0584000015258789, + "Acc.radiator": 0.6866000366210937, + "Acc.glass": 0.09239999771118164, + "Acc.clock": 0.17389999389648436, + "Acc.flag": 0.39490001678466796 + } + }, + "63": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8243, + "mIoU": 0.4727, + "mAcc": 0.6081, + "IoU.wall": 0.7641999816894531, + "IoU.building": 0.8277999877929687, + "IoU.sky": 0.9395999908447266, + "IoU.floor": 0.8133000183105469, + "IoU.tree": 0.7441999816894531, + "IoU.ceiling": 0.8362999725341796, + "IoU.road": 0.8255999755859375, + "IoU.bed ": 0.8690000152587891, + "IoU.windowpane": 0.6095999908447266, + "IoU.grass": 0.6756999969482422, + "IoU.cabinet": 0.6011000061035157, + "IoU.sidewalk": 0.6375999832153321, + "IoU.person": 0.790199966430664, + "IoU.earth": 0.37009998321533205, + "IoU.door": 0.48009998321533204, + "IoU.table": 0.580999984741211, + "IoU.mountain": 0.5886000061035156, + "IoU.plant": 0.5231999969482422, + "IoU.curtain": 0.7280000305175781, + "IoU.chair": 0.5358000183105469, + "IoU.car": 0.8116999816894531, + "IoU.water": 0.5227000045776368, + "IoU.painting": 0.7170999908447265, + "IoU.sofa": 0.6616000366210938, + "IoU.shelf": 0.4443000030517578, + "IoU.house": 0.49630001068115237, + "IoU.sea": 0.6074000167846679, + "IoU.mirror": 0.6463999938964844, + "IoU.rug": 0.6658999633789062, + "IoU.field": 0.3206999969482422, + "IoU.armchair": 0.4079000091552734, + "IoU.seat": 0.6306000137329102, + "IoU.fence": 0.41369998931884766, + "IoU.desk": 0.4386999893188477, + "IoU.rock": 0.4656999969482422, + "IoU.wardrobe": 0.5272999954223633, + "IoU.lamp": 0.561500015258789, + "IoU.bathtub": 0.7565000152587891, + "IoU.railing": 0.3659000015258789, + "IoU.cushion": 0.5361999893188476, + "IoU.base": 0.3202000045776367, + "IoU.box": 0.245, + "IoU.column": 0.4804000091552734, + "IoU.signboard": 0.3566999816894531, + "IoU.chest of drawers": 0.32380001068115233, + "IoU.counter": 0.30790000915527344, + "IoU.sand": 0.38709999084472657, + "IoU.sink": 0.6586000061035157, + "IoU.skyscraper": 0.6401000213623047, + "IoU.fireplace": 0.7012999725341796, + "IoU.refrigerator": 0.6927999877929687, + "IoU.grandstand": 0.3915000152587891, + "IoU.path": 0.2347999954223633, + "IoU.stairs": 0.2875, + "IoU.runway": 0.6766999816894531, + "IoU.case": 0.5181999969482421, + "IoU.pool table": 0.8969999694824219, + "IoU.pillow": 0.5640000152587891, + "IoU.screen door": 0.44669998168945313, + "IoU.stairway": 0.3125, + "IoU.river": 0.12949999809265136, + "IoU.bridge": 0.610099983215332, + "IoU.bookcase": 0.3325, + "IoU.blind": 0.42150001525878905, + "IoU.coffee table": 0.5788000106811524, + "IoU.toilet": 0.8009999847412109, + "IoU.flower": 0.3515999984741211, + "IoU.book": 0.44290000915527344, + "IoU.hill": 0.11859999656677246, + "IoU.bench": 0.47720001220703123, + "IoU.countertop": 0.4956999969482422, + "IoU.stove": 0.6844000244140624, + "IoU.palm": 0.48869998931884767, + "IoU.kitchen island": 0.3890999984741211, + "IoU.computer": 0.7391999816894531, + "IoU.swivel chair": 0.4570999908447266, + "IoU.boat": 0.7097000122070313, + "IoU.bar": 0.5331999969482422, + "IoU.arcade machine": 0.6154000091552735, + "IoU.hovel": 0.569900016784668, + "IoU.bus": 0.7441999816894531, + "IoU.towel": 0.5758000183105468, + "IoU.light": 0.3983000183105469, + "IoU.truck": 0.3370000076293945, + "IoU.tower": 0.2893000030517578, + "IoU.chandelier": 0.6195999908447266, + "IoU.awning": 0.31629999160766603, + "IoU.streetlight": 0.2093000030517578, + "IoU.booth": 0.34439998626708984, + "IoU.television receiver": 0.6518000030517578, + "IoU.airplane": 0.5808000183105468, + "IoU.dirt track": 0.1165999984741211, + "IoU.apparel": 0.3068000030517578, + "IoU.pole": 0.19829999923706054, + "IoU.land": 0.12420000076293945, + "IoU.bannister": 0.15300000190734864, + "IoU.escalator": 0.48400001525878905, + "IoU.ottoman": 0.4418000030517578, + "IoU.bottle": 0.19270000457763672, + "IoU.buffet": 0.587599983215332, + "IoU.poster": 0.31659999847412107, + "IoU.stage": 0.1484000015258789, + "IoU.van": 0.39919998168945314, + "IoU.ship": 0.6211999893188477, + "IoU.fountain": 0.20010000228881836, + "IoU.conveyer belt": 0.653499984741211, + "IoU.canopy": 0.2325, + "IoU.washer": 0.689800033569336, + "IoU.plaything": 0.23920000076293946, + "IoU.swimming pool": 0.6462999725341797, + "IoU.stool": 0.32169998168945313, + "IoU.barrel": 0.3927000045776367, + "IoU.basket": 0.23370000839233399, + "IoU.waterfall": 0.5836999893188477, + "IoU.tent": 0.9297000122070312, + "IoU.bag": 0.15930000305175782, + "IoU.minibike": 0.6505999755859375, + "IoU.cradle": 0.7602999877929687, + "IoU.oven": 0.16899999618530273, + "IoU.ball": 0.44159999847412107, + "IoU.food": 0.5052000045776367, + "IoU.step": 0.12170000076293945, + "IoU.tank": 0.5097000122070312, + "IoU.trade name": 0.23020000457763673, + "IoU.microwave": 0.35060001373291017, + "IoU.pot": 0.354900016784668, + "IoU.animal": 0.6415000152587891, + "IoU.bicycle": 0.5395000076293945, + "IoU.lake": 0.5833000183105469, + "IoU.dishwasher": 0.5741999816894531, + "IoU.screen": 0.4943000030517578, + "IoU.blanket": 0.06190000057220459, + "IoU.sculpture": 0.5091999816894531, + "IoU.hood": 0.49540000915527344, + "IoU.sconce": 0.3559999847412109, + "IoU.vase": 0.28569999694824216, + "IoU.traffic light": 0.22709999084472657, + "IoU.tray": 0.03720000028610229, + "IoU.ashcan": 0.337599983215332, + "IoU.fan": 0.48520000457763673, + "IoU.pier": 0.24360000610351562, + "IoU.crt screen": 0.03130000114440918, + "IoU.plate": 0.4688999938964844, + "IoU.monitor": 0.026500000953674316, + "IoU.bulletin board": 0.504000015258789, + "IoU.shower": 0.007300000190734863, + "IoU.radiator": 0.6097000122070313, + "IoU.glass": 0.10649999618530273, + "IoU.clock": 0.2610000038146973, + "IoU.flag": 0.35080001831054686, + "Acc.wall": 0.8669000244140626, + "Acc.building": 0.9215000152587891, + "Acc.sky": 0.9644999694824219, + "Acc.floor": 0.8937000274658203, + "Acc.tree": 0.8833999633789062, + "Acc.ceiling": 0.8970999908447266, + "Acc.road": 0.8927999877929688, + "Acc.bed ": 0.9616999816894531, + "Acc.windowpane": 0.7619000244140625, + "Acc.grass": 0.8072000122070313, + "Acc.cabinet": 0.7212999725341797, + "Acc.sidewalk": 0.8155999755859376, + "Acc.person": 0.9259999847412109, + "Acc.earth": 0.49459999084472656, + "Acc.door": 0.6458000183105469, + "Acc.table": 0.7205999755859375, + "Acc.mountain": 0.739800033569336, + "Acc.plant": 0.602599983215332, + "Acc.curtain": 0.8834999847412109, + "Acc.chair": 0.6938999938964844, + "Acc.car": 0.9151000213623047, + "Acc.water": 0.6533000183105468, + "Acc.painting": 0.8604000091552735, + "Acc.sofa": 0.8247000122070313, + "Acc.shelf": 0.6461000061035156, + "Acc.house": 0.6576999664306641, + "Acc.sea": 0.8341999816894531, + "Acc.mirror": 0.7551999664306641, + "Acc.rug": 0.7133999633789062, + "Acc.field": 0.5647000122070313, + "Acc.armchair": 0.6245999908447266, + "Acc.seat": 0.835999984741211, + "Acc.fence": 0.6102999877929688, + "Acc.desk": 0.7055999755859375, + "Acc.rock": 0.7069000244140625, + "Acc.wardrobe": 0.71, + "Acc.lamp": 0.7181999969482422, + "Acc.bathtub": 0.8341000366210938, + "Acc.railing": 0.5015000152587891, + "Acc.cushion": 0.6311999893188477, + "Acc.base": 0.47189998626708984, + "Acc.box": 0.29719999313354495, + "Acc.column": 0.6318000030517578, + "Acc.signboard": 0.4613999938964844, + "Acc.chest of drawers": 0.5956000137329102, + "Acc.counter": 0.4336999893188477, + "Acc.sand": 0.5475, + "Acc.sink": 0.7484999847412109, + "Acc.skyscraper": 0.7711000061035156, + "Acc.fireplace": 0.9230000305175782, + "Acc.refrigerator": 0.8587999725341797, + "Acc.grandstand": 0.7362000274658204, + "Acc.path": 0.3245000076293945, + "Acc.stairs": 0.40040000915527346, + "Acc.runway": 0.8681999969482422, + "Acc.case": 0.7723999786376953, + "Acc.pool table": 0.9784999847412109, + "Acc.pillow": 0.7027999877929687, + "Acc.screen door": 0.5658000183105468, + "Acc.stairway": 0.4240999984741211, + "Acc.river": 0.32779998779296876, + "Acc.bridge": 0.7697000122070312, + "Acc.bookcase": 0.5906000137329102, + "Acc.blind": 0.48830001831054687, + "Acc.coffee table": 0.8212999725341796, + "Acc.toilet": 0.8969999694824219, + "Acc.flower": 0.5145999908447265, + "Acc.book": 0.6252999877929688, + "Acc.hill": 0.2589999961853027, + "Acc.bench": 0.571500015258789, + "Acc.countertop": 0.6620999908447266, + "Acc.stove": 0.8230000305175781, + "Acc.palm": 0.7138999938964844, + "Acc.kitchen island": 0.7920999908447266, + "Acc.computer": 0.8694999694824219, + "Acc.swivel chair": 0.655, + "Acc.boat": 0.8197000122070313, + "Acc.bar": 0.7275, + "Acc.arcade machine": 0.6598000335693359, + "Acc.hovel": 0.6338999938964843, + "Acc.bus": 0.9394999694824219, + "Acc.towel": 0.7233999633789062, + "Acc.light": 0.46, + "Acc.truck": 0.48950000762939455, + "Acc.tower": 0.45790000915527346, + "Acc.chandelier": 0.7881999969482422, + "Acc.awning": 0.38709999084472657, + "Acc.streetlight": 0.2844000053405762, + "Acc.booth": 0.5477000045776367, + "Acc.television receiver": 0.8451000213623047, + "Acc.airplane": 0.6587000274658203, + "Acc.dirt track": 0.18649999618530275, + "Acc.apparel": 0.4413000106811523, + "Acc.pole": 0.2880999946594238, + "Acc.land": 0.26379999160766604, + "Acc.bannister": 0.23569999694824217, + "Acc.escalator": 0.7051999664306641, + "Acc.ottoman": 0.5822000122070312, + "Acc.bottle": 0.256200008392334, + "Acc.buffet": 0.7669000244140625, + "Acc.poster": 0.40169998168945314, + "Acc.stage": 0.34119998931884765, + "Acc.van": 0.4652000045776367, + "Acc.ship": 0.6783000183105469, + "Acc.fountain": 0.20569999694824218, + "Acc.conveyer belt": 0.9473999786376953, + "Acc.canopy": 0.31020000457763675, + "Acc.washer": 0.6959999847412109, + "Acc.plaything": 0.3647999954223633, + "Acc.swimming pool": 0.824800033569336, + "Acc.stool": 0.4215999984741211, + "Acc.barrel": 0.6265999984741211, + "Acc.basket": 0.34150001525878904, + "Acc.waterfall": 0.6980999755859375, + "Acc.tent": 0.989800033569336, + "Acc.bag": 0.21149999618530274, + "Acc.minibike": 0.8140000152587891, + "Acc.cradle": 0.9690000152587891, + "Acc.oven": 0.43939998626708987, + "Acc.ball": 0.5979000091552734, + "Acc.food": 0.5570000076293945, + "Acc.step": 0.15939999580383302, + "Acc.tank": 0.629000015258789, + "Acc.trade name": 0.2585000038146973, + "Acc.microwave": 0.39930000305175783, + "Acc.pot": 0.42459999084472655, + "Acc.animal": 0.6836000061035157, + "Acc.bicycle": 0.7225, + "Acc.lake": 0.6716999816894531, + "Acc.dishwasher": 0.6754000091552734, + "Acc.screen": 0.7529000091552734, + "Acc.blanket": 0.06710000038146972, + "Acc.sculpture": 0.7144000244140625, + "Acc.hood": 0.6186999893188476, + "Acc.sconce": 0.4841999816894531, + "Acc.vase": 0.44720001220703126, + "Acc.traffic light": 0.41770000457763673, + "Acc.tray": 0.05860000133514404, + "Acc.ashcan": 0.48779998779296874, + "Acc.fan": 0.6926999664306641, + "Acc.pier": 0.4281000137329102, + "Acc.crt screen": 0.09819999694824219, + "Acc.plate": 0.6441999816894531, + "Acc.monitor": 0.02880000114440918, + "Acc.bulletin board": 0.6691000366210937, + "Acc.shower": 0.05880000114440918, + "Acc.radiator": 0.7534999847412109, + "Acc.glass": 0.12010000228881836, + "Acc.clock": 0.33490001678466796, + "Acc.flag": 0.3759000015258789 + } + }, + "64": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.825, + "mIoU": 0.465, + "mAcc": 0.5666, + "IoU.wall": 0.7605999755859375, + "IoU.building": 0.826500015258789, + "IoU.sky": 0.9380999755859375, + "IoU.floor": 0.8033999633789063, + "IoU.tree": 0.7380000305175781, + "IoU.ceiling": 0.8329000091552734, + "IoU.road": 0.8276000213623047, + "IoU.bed ": 0.8679000091552734, + "IoU.windowpane": 0.6140999984741211, + "IoU.grass": 0.6626000213623047, + "IoU.cabinet": 0.601599998474121, + "IoU.sidewalk": 0.6402999877929687, + "IoU.person": 0.7956999969482422, + "IoU.earth": 0.37959999084472656, + "IoU.door": 0.46740001678466797, + "IoU.table": 0.5988999938964844, + "IoU.mountain": 0.5791999816894531, + "IoU.plant": 0.5091999816894531, + "IoU.curtain": 0.7416999816894532, + "IoU.chair": 0.5379000091552735, + "IoU.car": 0.8230999755859375, + "IoU.water": 0.5602999877929687, + "IoU.painting": 0.7166999816894531, + "IoU.sofa": 0.6566000366210938, + "IoU.shelf": 0.4495000076293945, + "IoU.house": 0.4815999984741211, + "IoU.sea": 0.6327999877929688, + "IoU.mirror": 0.6377000045776368, + "IoU.rug": 0.6202999877929688, + "IoU.field": 0.320099983215332, + "IoU.armchair": 0.41009998321533203, + "IoU.seat": 0.6558999633789062, + "IoU.fence": 0.39229999542236327, + "IoU.desk": 0.45810001373291015, + "IoU.rock": 0.47069999694824216, + "IoU.wardrobe": 0.5286000061035157, + "IoU.lamp": 0.5684000015258789, + "IoU.bathtub": 0.7434999847412109, + "IoU.railing": 0.3597999954223633, + "IoU.cushion": 0.5252999877929687, + "IoU.base": 0.31860000610351563, + "IoU.box": 0.25879999160766604, + "IoU.column": 0.4768000030517578, + "IoU.signboard": 0.3315000152587891, + "IoU.chest of drawers": 0.3175, + "IoU.counter": 0.2826000022888184, + "IoU.sand": 0.39849998474121096, + "IoU.sink": 0.6638999938964844, + "IoU.skyscraper": 0.609900016784668, + "IoU.fireplace": 0.7066000366210937, + "IoU.refrigerator": 0.7619999694824219, + "IoU.grandstand": 0.40419998168945315, + "IoU.path": 0.21569999694824218, + "IoU.stairs": 0.2905999946594238, + "IoU.runway": 0.6944999694824219, + "IoU.case": 0.5093999862670898, + "IoU.pool table": 0.9269999694824219, + "IoU.pillow": 0.5240999984741211, + "IoU.screen door": 0.49509998321533205, + "IoU.stairway": 0.35619998931884767, + "IoU.river": 0.136899995803833, + "IoU.bridge": 0.6627999877929688, + "IoU.bookcase": 0.35439998626708985, + "IoU.blind": 0.42119998931884767, + "IoU.coffee table": 0.6038999938964844, + "IoU.toilet": 0.8079000091552735, + "IoU.flower": 0.3265000152587891, + "IoU.book": 0.44369998931884763, + "IoU.hill": 0.11789999961853027, + "IoU.bench": 0.4784999847412109, + "IoU.countertop": 0.4531999969482422, + "IoU.stove": 0.7180000305175781, + "IoU.palm": 0.45689998626708983, + "IoU.kitchen island": 0.37099998474121093, + "IoU.computer": 0.7336000061035156, + "IoU.swivel chair": 0.40369998931884765, + "IoU.boat": 0.7269999694824218, + "IoU.bar": 0.5431999969482422, + "IoU.arcade machine": 0.4111000061035156, + "IoU.hovel": 0.49200000762939455, + "IoU.bus": 0.7804000091552734, + "IoU.towel": 0.579900016784668, + "IoU.light": 0.243700008392334, + "IoU.truck": 0.2929000091552734, + "IoU.tower": 0.33189998626708983, + "IoU.chandelier": 0.6077999877929687, + "IoU.awning": 0.2773999977111816, + "IoU.streetlight": 0.16209999084472657, + "IoU.booth": 0.34119998931884765, + "IoU.television receiver": 0.595, + "IoU.airplane": 0.5695999908447266, + "IoU.dirt track": 0.13949999809265137, + "IoU.apparel": 0.2820999908447266, + "IoU.pole": 0.19350000381469726, + "IoU.land": 0.022100000381469725, + "IoU.bannister": 0.06929999828338623, + "IoU.escalator": 0.36279998779296874, + "IoU.ottoman": 0.4393000030517578, + "IoU.bottle": 0.1934000015258789, + "IoU.buffet": 0.4615000152587891, + "IoU.poster": 0.2478000068664551, + "IoU.stage": 0.15939999580383302, + "IoU.van": 0.38540000915527345, + "IoU.ship": 0.18299999237060546, + "IoU.fountain": 0.20040000915527345, + "IoU.conveyer belt": 0.78, + "IoU.canopy": 0.27549999237060546, + "IoU.washer": 0.7322000122070312, + "IoU.plaything": 0.2684000015258789, + "IoU.swimming pool": 0.5968999862670898, + "IoU.stool": 0.29690000534057615, + "IoU.barrel": 0.585900001525879, + "IoU.basket": 0.23959999084472655, + "IoU.waterfall": 0.5868999862670898, + "IoU.tent": 0.9548999786376953, + "IoU.bag": 0.12409999847412109, + "IoU.minibike": 0.635, + "IoU.cradle": 0.7733000183105468, + "IoU.oven": 0.16969999313354492, + "IoU.ball": 0.46299999237060546, + "IoU.food": 0.49130001068115237, + "IoU.step": 0.08670000076293945, + "IoU.tank": 0.5, + "IoU.trade name": 0.149399995803833, + "IoU.microwave": 0.3195000076293945, + "IoU.pot": 0.3634999847412109, + "IoU.animal": 0.614900016784668, + "IoU.bicycle": 0.5045999908447265, + "IoU.lake": 0.6215000152587891, + "IoU.dishwasher": 0.5625, + "IoU.screen": 0.6375999832153321, + "IoU.blanket": 0.09850000381469727, + "IoU.sculpture": 0.5036999893188476, + "IoU.hood": 0.4772999954223633, + "IoU.sconce": 0.2897999954223633, + "IoU.vase": 0.28950000762939454, + "IoU.traffic light": 0.21760000228881837, + "IoU.tray": 0.024800000190734865, + "IoU.ashcan": 0.3788999938964844, + "IoU.fan": 0.49400001525878906, + "IoU.pier": 0.28739999771118163, + "IoU.crt screen": 0.028499999046325684, + "IoU.plate": 0.46610000610351565, + "IoU.monitor": 0.10529999732971192, + "IoU.bulletin board": 0.5281999969482422, + "IoU.shower": 0.0044999998807907105, + "IoU.radiator": 0.5177999877929688, + "IoU.glass": 0.07639999866485596, + "IoU.clock": 0.22579999923706054, + "IoU.flag": 0.34049999237060546, + "Acc.wall": 0.8926000213623047, + "Acc.building": 0.9298000335693359, + "Acc.sky": 0.977699966430664, + "Acc.floor": 0.9127999877929688, + "Acc.tree": 0.8766999816894532, + "Acc.ceiling": 0.9, + "Acc.road": 0.9063999938964844, + "Acc.bed ": 0.9568000030517578, + "Acc.windowpane": 0.7437000274658203, + "Acc.grass": 0.7968000030517578, + "Acc.cabinet": 0.7468000030517579, + "Acc.sidewalk": 0.8026999664306641, + "Acc.person": 0.8963999938964844, + "Acc.earth": 0.5634000015258789, + "Acc.door": 0.6456999969482422, + "Acc.table": 0.7362999725341797, + "Acc.mountain": 0.7037000274658203, + "Acc.plant": 0.5847999954223633, + "Acc.curtain": 0.8458000183105469, + "Acc.chair": 0.672300033569336, + "Acc.car": 0.8898999786376953, + "Acc.water": 0.7001999664306641, + "Acc.painting": 0.8301999664306641, + "Acc.sofa": 0.8138999938964844, + "Acc.shelf": 0.6370999908447266, + "Acc.house": 0.6125, + "Acc.sea": 0.830999984741211, + "Acc.mirror": 0.7016999816894531, + "Acc.rug": 0.6552999877929687, + "Acc.field": 0.5368999862670898, + "Acc.armchair": 0.6229999923706054, + "Acc.seat": 0.8206999969482421, + "Acc.fence": 0.5288000106811523, + "Acc.desk": 0.6676999664306641, + "Acc.rock": 0.6697000122070312, + "Acc.wardrobe": 0.6938999938964844, + "Acc.lamp": 0.6736000061035157, + "Acc.bathtub": 0.7926000213623047, + "Acc.railing": 0.5104999923706055, + "Acc.cushion": 0.606500015258789, + "Acc.base": 0.500099983215332, + "Acc.box": 0.33599998474121096, + "Acc.column": 0.615999984741211, + "Acc.signboard": 0.4111000061035156, + "Acc.chest of drawers": 0.5745999908447266, + "Acc.counter": 0.37310001373291013, + "Acc.sand": 0.5163999938964844, + "Acc.sink": 0.6995999908447266, + "Acc.skyscraper": 0.6958000183105468, + "Acc.fireplace": 0.8956999969482422, + "Acc.refrigerator": 0.8401000213623047, + "Acc.grandstand": 0.7522000122070313, + "Acc.path": 0.28200000762939453, + "Acc.stairs": 0.3818000030517578, + "Acc.runway": 0.8284999847412109, + "Acc.case": 0.6862000274658203, + "Acc.pool table": 0.9618000030517578, + "Acc.pillow": 0.6018000030517578, + "Acc.screen door": 0.5815999984741211, + "Acc.stairway": 0.47709999084472654, + "Acc.river": 0.2934000015258789, + "Acc.bridge": 0.78, + "Acc.bookcase": 0.6086000061035156, + "Acc.blind": 0.4652000045776367, + "Acc.coffee table": 0.7943000030517579, + "Acc.toilet": 0.8793000030517578, + "Acc.flower": 0.43459999084472656, + "Acc.book": 0.5691999816894531, + "Acc.hill": 0.21959999084472656, + "Acc.bench": 0.547400016784668, + "Acc.countertop": 0.6143999862670898, + "Acc.stove": 0.790199966430664, + "Acc.palm": 0.587400016784668, + "Acc.kitchen island": 0.5620000076293945, + "Acc.computer": 0.8393000030517578, + "Acc.swivel chair": 0.4961000061035156, + "Acc.boat": 0.8362000274658203, + "Acc.bar": 0.7180999755859375, + "Acc.arcade machine": 0.43740001678466794, + "Acc.hovel": 0.5790999984741211, + "Acc.bus": 0.9358000183105468, + "Acc.towel": 0.6544000244140625, + "Acc.light": 0.25489999771118166, + "Acc.truck": 0.39310001373291015, + "Acc.tower": 0.44299999237060544, + "Acc.chandelier": 0.7322000122070312, + "Acc.awning": 0.2961000061035156, + "Acc.streetlight": 0.18860000610351563, + "Acc.booth": 0.47389999389648435, + "Acc.television receiver": 0.7170999908447265, + "Acc.airplane": 0.6120000076293945, + "Acc.dirt track": 0.19360000610351563, + "Acc.apparel": 0.3990000152587891, + "Acc.pole": 0.2559000015258789, + "Acc.land": 0.03279999971389771, + "Acc.bannister": 0.09510000228881836, + "Acc.escalator": 0.44060001373291013, + "Acc.ottoman": 0.5540000152587891, + "Acc.bottle": 0.24459999084472656, + "Acc.buffet": 0.5356000137329101, + "Acc.poster": 0.33490001678466796, + "Acc.stage": 0.27430000305175783, + "Acc.van": 0.44610000610351563, + "Acc.ship": 0.18799999237060547, + "Acc.fountain": 0.2006999969482422, + "Acc.conveyer belt": 0.9222000122070313, + "Acc.canopy": 0.3572999954223633, + "Acc.washer": 0.7362999725341797, + "Acc.plaything": 0.4054000091552734, + "Acc.swimming pool": 0.6883999633789063, + "Acc.stool": 0.35369998931884766, + "Acc.barrel": 0.6195000076293945, + "Acc.basket": 0.3015999984741211, + "Acc.waterfall": 0.6393000030517578, + "Acc.tent": 0.9887999725341797, + "Acc.bag": 0.13600000381469726, + "Acc.minibike": 0.7369000244140625, + "Acc.cradle": 0.9561000061035156, + "Acc.oven": 0.4793000030517578, + "Acc.ball": 0.552400016784668, + "Acc.food": 0.5443999862670899, + "Acc.step": 0.10149999618530274, + "Acc.tank": 0.6041999816894531, + "Acc.trade name": 0.15689999580383301, + "Acc.microwave": 0.33669998168945314, + "Acc.pot": 0.4152000045776367, + "Acc.animal": 0.6420999908447266, + "Acc.bicycle": 0.6406999969482422, + "Acc.lake": 0.6819000244140625, + "Acc.dishwasher": 0.6568000030517578, + "Acc.screen": 0.8651000213623047, + "Acc.blanket": 0.105600004196167, + "Acc.sculpture": 0.7173000335693359, + "Acc.hood": 0.5145999908447265, + "Acc.sconce": 0.32990001678466796, + "Acc.vase": 0.3908000183105469, + "Acc.traffic light": 0.29489999771118164, + "Acc.tray": 0.030499999523162843, + "Acc.ashcan": 0.5640999984741211, + "Acc.fan": 0.5850999832153321, + "Acc.pier": 0.39169998168945314, + "Acc.crt screen": 0.06349999904632568, + "Acc.plate": 0.5758000183105468, + "Acc.monitor": 0.116899995803833, + "Acc.bulletin board": 0.6483000183105468, + "Acc.shower": 0.012000000476837159, + "Acc.radiator": 0.5684000015258789, + "Acc.glass": 0.0815999984741211, + "Acc.clock": 0.2770999908447266, + "Acc.flag": 0.3568000030517578 + } + }, + "65": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.828, + "mIoU": 0.4819, + "mAcc": 0.6118, + "IoU.wall": 0.7715000152587891, + "IoU.building": 0.8356999969482422, + "IoU.sky": 0.9348999786376954, + "IoU.floor": 0.8145999908447266, + "IoU.tree": 0.7386000061035156, + "IoU.ceiling": 0.8311000061035156, + "IoU.road": 0.8251000213623046, + "IoU.bed ": 0.8802999877929687, + "IoU.windowpane": 0.620099983215332, + "IoU.grass": 0.678499984741211, + "IoU.cabinet": 0.6091999816894531, + "IoU.sidewalk": 0.6358000183105469, + "IoU.person": 0.8020999908447266, + "IoU.earth": 0.3631999969482422, + "IoU.door": 0.48130001068115236, + "IoU.table": 0.5886000061035156, + "IoU.mountain": 0.6291999816894531, + "IoU.plant": 0.5072999954223633, + "IoU.curtain": 0.735999984741211, + "IoU.chair": 0.5461999893188476, + "IoU.car": 0.845999984741211, + "IoU.water": 0.5079999923706054, + "IoU.painting": 0.7158999633789063, + "IoU.sofa": 0.6641000366210937, + "IoU.shelf": 0.44849998474121094, + "IoU.house": 0.5252999877929687, + "IoU.sea": 0.6705999755859375, + "IoU.mirror": 0.6413999938964844, + "IoU.rug": 0.7094000244140625, + "IoU.field": 0.30700000762939456, + "IoU.armchair": 0.43290000915527344, + "IoU.seat": 0.617599983215332, + "IoU.fence": 0.3990999984741211, + "IoU.desk": 0.4779000091552734, + "IoU.rock": 0.47509998321533203, + "IoU.wardrobe": 0.549099998474121, + "IoU.lamp": 0.5479999923706055, + "IoU.bathtub": 0.8319000244140625, + "IoU.railing": 0.33180000305175783, + "IoU.cushion": 0.5668000030517578, + "IoU.base": 0.37, + "IoU.box": 0.2670000076293945, + "IoU.column": 0.48119998931884767, + "IoU.signboard": 0.3504999923706055, + "IoU.chest of drawers": 0.34130001068115234, + "IoU.counter": 0.24299999237060546, + "IoU.sand": 0.4363000106811523, + "IoU.sink": 0.6804000091552734, + "IoU.skyscraper": 0.6268000030517578, + "IoU.fireplace": 0.7008000183105468, + "IoU.refrigerator": 0.71, + "IoU.grandstand": 0.42200000762939455, + "IoU.path": 0.25610000610351563, + "IoU.stairs": 0.23329999923706055, + "IoU.runway": 0.7245999908447266, + "IoU.case": 0.5381000137329102, + "IoU.pool table": 0.9181999969482422, + "IoU.pillow": 0.5320000076293945, + "IoU.screen door": 0.6576999664306641, + "IoU.stairway": 0.3240999984741211, + "IoU.river": 0.13340000152587891, + "IoU.bridge": 0.6338000106811523, + "IoU.bookcase": 0.3434000015258789, + "IoU.blind": 0.40330001831054685, + "IoU.coffee table": 0.5561000061035156, + "IoU.toilet": 0.8202999877929688, + "IoU.flower": 0.36779998779296874, + "IoU.book": 0.445099983215332, + "IoU.hill": 0.09850000381469727, + "IoU.bench": 0.4204999923706055, + "IoU.countertop": 0.5166999816894531, + "IoU.stove": 0.7394999694824219, + "IoU.palm": 0.48709999084472655, + "IoU.kitchen island": 0.4365999984741211, + "IoU.computer": 0.7366999816894532, + "IoU.swivel chair": 0.4975, + "IoU.boat": 0.6181999969482422, + "IoU.bar": 0.5181999969482421, + "IoU.arcade machine": 0.6881999969482422, + "IoU.hovel": 0.4443000030517578, + "IoU.bus": 0.8123000335693359, + "IoU.towel": 0.5952000045776367, + "IoU.light": 0.4077000045776367, + "IoU.truck": 0.3213999938964844, + "IoU.tower": 0.24209999084472655, + "IoU.chandelier": 0.6165000152587891, + "IoU.awning": 0.43090000152587893, + "IoU.streetlight": 0.19579999923706054, + "IoU.booth": 0.31239999771118165, + "IoU.television receiver": 0.6372999954223633, + "IoU.airplane": 0.587400016784668, + "IoU.dirt track": 0.0, + "IoU.apparel": 0.3743000030517578, + "IoU.pole": 0.15699999809265136, + "IoU.land": 0.05050000190734863, + "IoU.bannister": 0.10630000114440918, + "IoU.escalator": 0.4765999984741211, + "IoU.ottoman": 0.4540000152587891, + "IoU.bottle": 0.19559999465942382, + "IoU.buffet": 0.6172000122070312, + "IoU.poster": 0.20819999694824218, + "IoU.stage": 0.15550000190734864, + "IoU.van": 0.4243000030517578, + "IoU.ship": 0.37279998779296875, + "IoU.fountain": 0.2209000015258789, + "IoU.conveyer belt": 0.7320999908447265, + "IoU.canopy": 0.33490001678466796, + "IoU.washer": 0.6666000366210938, + "IoU.plaything": 0.34599998474121096, + "IoU.swimming pool": 0.6575, + "IoU.stool": 0.3465999984741211, + "IoU.barrel": 0.5656000137329101, + "IoU.basket": 0.22450000762939454, + "IoU.waterfall": 0.6231999969482422, + "IoU.tent": 0.9225, + "IoU.bag": 0.1515999984741211, + "IoU.minibike": 0.6390999984741211, + "IoU.cradle": 0.8237000274658203, + "IoU.oven": 0.22760000228881835, + "IoU.ball": 0.5352999877929687, + "IoU.food": 0.5247000122070312, + "IoU.step": 0.12470000267028808, + "IoU.tank": 0.5670999908447265, + "IoU.trade name": 0.20559999465942383, + "IoU.microwave": 0.475, + "IoU.pot": 0.41380001068115235, + "IoU.animal": 0.6018000030517578, + "IoU.bicycle": 0.5316999816894531, + "IoU.lake": 0.4520999908447266, + "IoU.dishwasher": 0.5734999847412109, + "IoU.screen": 0.5622999954223633, + "IoU.blanket": 0.15, + "IoU.sculpture": 0.6454000091552734, + "IoU.hood": 0.49650001525878906, + "IoU.sconce": 0.24639999389648437, + "IoU.vase": 0.3377000045776367, + "IoU.traffic light": 0.22049999237060547, + "IoU.tray": 0.04239999771118164, + "IoU.ashcan": 0.4006999969482422, + "IoU.fan": 0.49029998779296874, + "IoU.pier": 0.22840000152587892, + "IoU.crt screen": 0.053299999237060545, + "IoU.plate": 0.47700000762939454, + "IoU.monitor": 0.03140000104904175, + "IoU.bulletin board": 0.4725, + "IoU.shower": 0.03390000104904175, + "IoU.radiator": 0.6284000015258789, + "IoU.glass": 0.12020000457763672, + "IoU.clock": 0.28579999923706056, + "IoU.flag": 0.31420000076293947, + "Acc.wall": 0.879000015258789, + "Acc.building": 0.9323000335693359, + "Acc.sky": 0.9591000366210938, + "Acc.floor": 0.8936000061035156, + "Acc.tree": 0.8887000274658203, + "Acc.ceiling": 0.8879000091552735, + "Acc.road": 0.8831999969482421, + "Acc.bed ": 0.9640000152587891, + "Acc.windowpane": 0.7623999786376953, + "Acc.grass": 0.8175, + "Acc.cabinet": 0.7423000335693359, + "Acc.sidewalk": 0.8158000183105468, + "Acc.person": 0.9165000152587891, + "Acc.earth": 0.5022999954223633, + "Acc.door": 0.6447000122070312, + "Acc.table": 0.7419000244140626, + "Acc.mountain": 0.7530000305175781, + "Acc.plant": 0.5938000106811523, + "Acc.curtain": 0.8726000213623046, + "Acc.chair": 0.6954000091552734, + "Acc.car": 0.9277999877929688, + "Acc.water": 0.6045000076293945, + "Acc.painting": 0.8487999725341797, + "Acc.sofa": 0.8033999633789063, + "Acc.shelf": 0.6247999954223633, + "Acc.house": 0.6862000274658203, + "Acc.sea": 0.8608000183105469, + "Acc.mirror": 0.7544000244140625, + "Acc.rug": 0.7977999877929688, + "Acc.field": 0.5113000106811524, + "Acc.armchair": 0.6331999969482421, + "Acc.seat": 0.8231999969482422, + "Acc.fence": 0.558499984741211, + "Acc.desk": 0.790199966430664, + "Acc.rock": 0.6356999969482422, + "Acc.wardrobe": 0.757300033569336, + "Acc.lamp": 0.7458999633789063, + "Acc.bathtub": 0.902300033569336, + "Acc.railing": 0.4545999908447266, + "Acc.cushion": 0.6822000122070313, + "Acc.base": 0.6318999862670899, + "Acc.box": 0.3308000183105469, + "Acc.column": 0.5591999816894532, + "Acc.signboard": 0.4508000183105469, + "Acc.chest of drawers": 0.6583999633789063, + "Acc.counter": 0.33240001678466796, + "Acc.sand": 0.6175, + "Acc.sink": 0.7491999816894531, + "Acc.skyscraper": 0.7340000152587891, + "Acc.fireplace": 0.9241999816894532, + "Acc.refrigerator": 0.8172000122070312, + "Acc.grandstand": 0.7154000091552735, + "Acc.path": 0.37349998474121093, + "Acc.stairs": 0.34200000762939453, + "Acc.runway": 0.970999984741211, + "Acc.case": 0.6856999969482422, + "Acc.pool table": 0.9734999847412109, + "Acc.pillow": 0.6288999938964843, + "Acc.screen door": 0.7698999786376953, + "Acc.stairway": 0.47709999084472654, + "Acc.river": 0.40119998931884765, + "Acc.bridge": 0.8141999816894532, + "Acc.bookcase": 0.5152999877929687, + "Acc.blind": 0.45430000305175783, + "Acc.coffee table": 0.8780000305175781, + "Acc.toilet": 0.9041999816894531, + "Acc.flower": 0.5191999816894531, + "Acc.book": 0.6279000091552734, + "Acc.hill": 0.24260000228881837, + "Acc.bench": 0.5025, + "Acc.countertop": 0.6561000061035156, + "Acc.stove": 0.829800033569336, + "Acc.palm": 0.680199966430664, + "Acc.kitchen island": 0.6629000091552735, + "Acc.computer": 0.9177999877929688, + "Acc.swivel chair": 0.6412999725341797, + "Acc.boat": 0.8716000366210938, + "Acc.bar": 0.7462000274658203, + "Acc.arcade machine": 0.7677999877929688, + "Acc.hovel": 0.532400016784668, + "Acc.bus": 0.9355999755859375, + "Acc.towel": 0.7669000244140625, + "Acc.light": 0.47509998321533203, + "Acc.truck": 0.42, + "Acc.tower": 0.3227000045776367, + "Acc.chandelier": 0.7866999816894531, + "Acc.awning": 0.5297000122070312, + "Acc.streetlight": 0.2744000053405762, + "Acc.booth": 0.45119998931884764, + "Acc.television receiver": 0.8245999908447266, + "Acc.airplane": 0.6575, + "Acc.dirt track": 0.0, + "Acc.apparel": 0.5463000106811523, + "Acc.pole": 0.19670000076293945, + "Acc.land": 0.11029999732971191, + "Acc.bannister": 0.15670000076293944, + "Acc.escalator": 0.5945000076293945, + "Acc.ottoman": 0.6705999755859375, + "Acc.bottle": 0.23760000228881836, + "Acc.buffet": 0.8666999816894532, + "Acc.poster": 0.2784000015258789, + "Acc.stage": 0.3963999938964844, + "Acc.van": 0.5272999954223633, + "Acc.ship": 0.37860000610351563, + "Acc.fountain": 0.22459999084472657, + "Acc.conveyer belt": 0.9322000122070313, + "Acc.canopy": 0.417400016784668, + "Acc.washer": 0.779000015258789, + "Acc.plaything": 0.5113000106811524, + "Acc.swimming pool": 0.8994000244140625, + "Acc.stool": 0.5115999984741211, + "Acc.barrel": 0.6475, + "Acc.basket": 0.33380001068115234, + "Acc.waterfall": 0.7444000244140625, + "Acc.tent": 0.9913999938964844, + "Acc.bag": 0.1681999969482422, + "Acc.minibike": 0.7458999633789063, + "Acc.cradle": 0.9733999633789062, + "Acc.oven": 0.5797999954223633, + "Acc.ball": 0.617599983215332, + "Acc.food": 0.5659000015258789, + "Acc.step": 0.15960000038146974, + "Acc.tank": 0.6561000061035156, + "Acc.trade name": 0.2168000030517578, + "Acc.microwave": 0.5343999862670898, + "Acc.pot": 0.47950000762939454, + "Acc.animal": 0.6263000106811524, + "Acc.bicycle": 0.7079000091552734, + "Acc.lake": 0.6468000030517578, + "Acc.dishwasher": 0.7038999938964844, + "Acc.screen": 0.855999984741211, + "Acc.blanket": 0.17149999618530273, + "Acc.sculpture": 0.8236000061035156, + "Acc.hood": 0.5900999832153321, + "Acc.sconce": 0.31940000534057617, + "Acc.vase": 0.5191999816894531, + "Acc.traffic light": 0.32040000915527345, + "Acc.tray": 0.05849999904632568, + "Acc.ashcan": 0.585, + "Acc.fan": 0.6968000030517578, + "Acc.pier": 0.45419998168945314, + "Acc.crt screen": 0.15489999771118165, + "Acc.plate": 0.6311999893188477, + "Acc.monitor": 0.03769999980926514, + "Acc.bulletin board": 0.6905999755859376, + "Acc.shower": 0.04960000038146973, + "Acc.radiator": 0.7486000061035156, + "Acc.glass": 0.13170000076293945, + "Acc.clock": 0.3363999938964844, + "Acc.flag": 0.35700000762939454 + } + }, + "66": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8276, + "mIoU": 0.4779, + "mAcc": 0.6131, + "IoU.wall": 0.7693000030517578, + "IoU.building": 0.8352999877929688, + "IoU.sky": 0.9395999908447266, + "IoU.floor": 0.8183999633789063, + "IoU.tree": 0.7433000183105469, + "IoU.ceiling": 0.8351999664306641, + "IoU.road": 0.8336000061035156, + "IoU.bed ": 0.8863999938964844, + "IoU.windowpane": 0.6265000152587891, + "IoU.grass": 0.6637000274658204, + "IoU.cabinet": 0.6033000183105469, + "IoU.sidewalk": 0.6555000305175781, + "IoU.person": 0.8033999633789063, + "IoU.earth": 0.38110000610351563, + "IoU.door": 0.47880001068115235, + "IoU.table": 0.5602999877929687, + "IoU.mountain": 0.6361999893188477, + "IoU.plant": 0.5040999984741211, + "IoU.curtain": 0.727699966430664, + "IoU.chair": 0.5413000106811523, + "IoU.car": 0.8408999633789063, + "IoU.water": 0.5241999816894531, + "IoU.painting": 0.6869000244140625, + "IoU.sofa": 0.6433999633789063, + "IoU.shelf": 0.4509000015258789, + "IoU.house": 0.495, + "IoU.sea": 0.5997000122070313, + "IoU.mirror": 0.6275999832153321, + "IoU.rug": 0.6791000366210938, + "IoU.field": 0.3134000015258789, + "IoU.armchair": 0.3915000152587891, + "IoU.seat": 0.6363999938964844, + "IoU.fence": 0.38549999237060545, + "IoU.desk": 0.4458000183105469, + "IoU.rock": 0.4856000137329102, + "IoU.wardrobe": 0.5556000137329101, + "IoU.lamp": 0.5341999816894532, + "IoU.bathtub": 0.8201000213623046, + "IoU.railing": 0.3522999954223633, + "IoU.cushion": 0.550099983215332, + "IoU.base": 0.3766999816894531, + "IoU.box": 0.2603000068664551, + "IoU.column": 0.4679000091552734, + "IoU.signboard": 0.3672999954223633, + "IoU.chest of drawers": 0.34810001373291016, + "IoU.counter": 0.27469999313354493, + "IoU.sand": 0.447599983215332, + "IoU.sink": 0.6731999969482422, + "IoU.skyscraper": 0.6393999862670898, + "IoU.fireplace": 0.6643000030517578, + "IoU.refrigerator": 0.7104000091552735, + "IoU.grandstand": 0.46700000762939453, + "IoU.path": 0.2642000007629395, + "IoU.stairs": 0.2822999954223633, + "IoU.runway": 0.702699966430664, + "IoU.case": 0.5441999816894532, + "IoU.pool table": 0.9215000152587891, + "IoU.pillow": 0.5725, + "IoU.screen door": 0.5650999832153321, + "IoU.stairway": 0.43290000915527344, + "IoU.river": 0.18979999542236328, + "IoU.bridge": 0.6027000045776367, + "IoU.bookcase": 0.31790000915527344, + "IoU.blind": 0.4484000015258789, + "IoU.coffee table": 0.5281999969482422, + "IoU.toilet": 0.7677999877929688, + "IoU.flower": 0.3579999923706055, + "IoU.book": 0.45720001220703127, + "IoU.hill": 0.11859999656677246, + "IoU.bench": 0.40439998626708984, + "IoU.countertop": 0.5195999908447265, + "IoU.stove": 0.7062000274658203, + "IoU.palm": 0.4286000061035156, + "IoU.kitchen island": 0.32479999542236326, + "IoU.computer": 0.7119999694824218, + "IoU.swivel chair": 0.5159999847412109, + "IoU.boat": 0.6141999816894531, + "IoU.bar": 0.5129000091552735, + "IoU.arcade machine": 0.7225, + "IoU.hovel": 0.5645000076293946, + "IoU.bus": 0.8297000122070313, + "IoU.towel": 0.5922999954223633, + "IoU.light": 0.40630001068115235, + "IoU.truck": 0.3270999908447266, + "IoU.tower": 0.36439998626708986, + "IoU.chandelier": 0.6138000106811523, + "IoU.awning": 0.39180000305175783, + "IoU.streetlight": 0.20120000839233398, + "IoU.booth": 0.3616999816894531, + "IoU.television receiver": 0.6906999969482421, + "IoU.airplane": 0.5254999923706055, + "IoU.dirt track": 0.043400001525878903, + "IoU.apparel": 0.3766999816894531, + "IoU.pole": 0.13420000076293945, + "IoU.land": 0.024700000286102294, + "IoU.bannister": 0.08930000305175781, + "IoU.escalator": 0.3893000030517578, + "IoU.ottoman": 0.4672000122070312, + "IoU.bottle": 0.19920000076293945, + "IoU.buffet": 0.6236000061035156, + "IoU.poster": 0.2575, + "IoU.stage": 0.12529999732971192, + "IoU.van": 0.4256999969482422, + "IoU.ship": 0.31979999542236326, + "IoU.fountain": 0.21030000686645509, + "IoU.conveyer belt": 0.6819999694824219, + "IoU.canopy": 0.3125, + "IoU.washer": 0.7133999633789062, + "IoU.plaything": 0.38990001678466796, + "IoU.swimming pool": 0.7601000213623047, + "IoU.stool": 0.3118000030517578, + "IoU.barrel": 0.5666999816894531, + "IoU.basket": 0.18989999771118163, + "IoU.waterfall": 0.6011000061035157, + "IoU.tent": 0.8637000274658203, + "IoU.bag": 0.1015999984741211, + "IoU.minibike": 0.5940000152587891, + "IoU.cradle": 0.798499984741211, + "IoU.oven": 0.22510000228881835, + "IoU.ball": 0.5358000183105469, + "IoU.food": 0.5804999923706055, + "IoU.step": 0.07539999961853028, + "IoU.tank": 0.5002000045776367, + "IoU.trade name": 0.2675, + "IoU.microwave": 0.37470001220703125, + "IoU.pot": 0.41959999084472654, + "IoU.animal": 0.6648999786376953, + "IoU.bicycle": 0.49340000152587893, + "IoU.lake": 0.48450000762939455, + "IoU.dishwasher": 0.49290000915527343, + "IoU.screen": 0.514900016784668, + "IoU.blanket": 0.155, + "IoU.sculpture": 0.625099983215332, + "IoU.hood": 0.5004000091552734, + "IoU.sconce": 0.2459000015258789, + "IoU.vase": 0.31629999160766603, + "IoU.traffic light": 0.22969999313354492, + "IoU.tray": 0.052100000381469724, + "IoU.ashcan": 0.37209999084472656, + "IoU.fan": 0.48389999389648436, + "IoU.pier": 0.2545000076293945, + "IoU.crt screen": 0.03309999942779541, + "IoU.plate": 0.4366999816894531, + "IoU.monitor": 0.014600000381469726, + "IoU.bulletin board": 0.44400001525878907, + "IoU.shower": 0.008899999856948853, + "IoU.radiator": 0.6252000045776367, + "IoU.glass": 0.12359999656677247, + "IoU.clock": 0.251200008392334, + "IoU.flag": 0.3468000030517578, + "Acc.wall": 0.8723999786376954, + "Acc.building": 0.9294000244140626, + "Acc.sky": 0.9619000244140625, + "Acc.floor": 0.8894000244140625, + "Acc.tree": 0.8961000061035156, + "Acc.ceiling": 0.8929000091552735, + "Acc.road": 0.8948000335693359, + "Acc.bed ": 0.9611000061035156, + "Acc.windowpane": 0.7691000366210937, + "Acc.grass": 0.7938999938964844, + "Acc.cabinet": 0.7351999664306641, + "Acc.sidewalk": 0.8158999633789062, + "Acc.person": 0.9212000274658203, + "Acc.earth": 0.5211000061035156, + "Acc.door": 0.6462999725341797, + "Acc.table": 0.717699966430664, + "Acc.mountain": 0.7580000305175781, + "Acc.plant": 0.5884999847412109, + "Acc.curtain": 0.8675, + "Acc.chair": 0.6858000183105468, + "Acc.car": 0.930199966430664, + "Acc.water": 0.6648999786376953, + "Acc.painting": 0.8819000244140625, + "Acc.sofa": 0.8187999725341797, + "Acc.shelf": 0.6079999923706054, + "Acc.house": 0.6426000213623047, + "Acc.sea": 0.7534999847412109, + "Acc.mirror": 0.7529000091552734, + "Acc.rug": 0.8143000030517578, + "Acc.field": 0.5625, + "Acc.armchair": 0.5856999969482422, + "Acc.seat": 0.8543000030517578, + "Acc.fence": 0.5354000091552734, + "Acc.desk": 0.7608000183105469, + "Acc.rock": 0.6576999664306641, + "Acc.wardrobe": 0.7138999938964844, + "Acc.lamp": 0.7408000183105469, + "Acc.bathtub": 0.8681999969482422, + "Acc.railing": 0.45849998474121095, + "Acc.cushion": 0.6487000274658203, + "Acc.base": 0.6648999786376953, + "Acc.box": 0.3347999954223633, + "Acc.column": 0.5690000152587891, + "Acc.signboard": 0.4793000030517578, + "Acc.chest of drawers": 0.6234999847412109, + "Acc.counter": 0.3691999816894531, + "Acc.sand": 0.6711000061035156, + "Acc.sink": 0.7637999725341796, + "Acc.skyscraper": 0.7719000244140625, + "Acc.fireplace": 0.9133000183105469, + "Acc.refrigerator": 0.8676000213623047, + "Acc.grandstand": 0.7643000030517578, + "Acc.path": 0.3972999954223633, + "Acc.stairs": 0.42009998321533204, + "Acc.runway": 0.9098000335693359, + "Acc.case": 0.6416000366210938, + "Acc.pool table": 0.9756999969482422, + "Acc.pillow": 0.6808000183105469, + "Acc.screen door": 0.6681999969482422, + "Acc.stairway": 0.5816999816894531, + "Acc.river": 0.4563999938964844, + "Acc.bridge": 0.8752999877929688, + "Acc.bookcase": 0.46060001373291015, + "Acc.blind": 0.5159999847412109, + "Acc.coffee table": 0.8591999816894531, + "Acc.toilet": 0.9061000061035156, + "Acc.flower": 0.5304000091552734, + "Acc.book": 0.6641000366210937, + "Acc.hill": 0.26079999923706054, + "Acc.bench": 0.47299999237060547, + "Acc.countertop": 0.6483000183105468, + "Acc.stove": 0.8137000274658203, + "Acc.palm": 0.6741000366210937, + "Acc.kitchen island": 0.6494000244140625, + "Acc.computer": 0.8966000366210938, + "Acc.swivel chair": 0.6975, + "Acc.boat": 0.8745999908447266, + "Acc.bar": 0.6819000244140625, + "Acc.arcade machine": 0.8345999908447266, + "Acc.hovel": 0.6340999984741211, + "Acc.bus": 0.909000015258789, + "Acc.towel": 0.7748000335693359, + "Acc.light": 0.5045000076293945, + "Acc.truck": 0.474900016784668, + "Acc.tower": 0.5002999877929688, + "Acc.chandelier": 0.7859999847412109, + "Acc.awning": 0.5172000122070313, + "Acc.streetlight": 0.3128000068664551, + "Acc.booth": 0.44369998931884763, + "Acc.television receiver": 0.7916999816894531, + "Acc.airplane": 0.6543000030517578, + "Acc.dirt track": 0.06360000133514404, + "Acc.apparel": 0.5784000015258789, + "Acc.pole": 0.17450000762939452, + "Acc.land": 0.05739999771118164, + "Acc.bannister": 0.12739999771118163, + "Acc.escalator": 0.47709999084472654, + "Acc.ottoman": 0.6704000091552734, + "Acc.bottle": 0.2490999984741211, + "Acc.buffet": 0.8786000061035156, + "Acc.poster": 0.32369998931884764, + "Acc.stage": 0.2854000091552734, + "Acc.van": 0.5409999847412109, + "Acc.ship": 0.32560001373291014, + "Acc.fountain": 0.21989999771118163, + "Acc.conveyer belt": 0.9243000030517579, + "Acc.canopy": 0.37659999847412107, + "Acc.washer": 0.7462000274658203, + "Acc.plaything": 0.5736000061035156, + "Acc.swimming pool": 0.9030000305175782, + "Acc.stool": 0.46349998474121096, + "Acc.barrel": 0.649000015258789, + "Acc.basket": 0.27969999313354493, + "Acc.waterfall": 0.6827999877929688, + "Acc.tent": 0.9888999938964844, + "Acc.bag": 0.10789999961853028, + "Acc.minibike": 0.6951999664306641, + "Acc.cradle": 0.9636000061035156, + "Acc.oven": 0.5409000015258789, + "Acc.ball": 0.6231000137329101, + "Acc.food": 0.6902999877929688, + "Acc.step": 0.09310000419616699, + "Acc.tank": 0.590999984741211, + "Acc.trade name": 0.2936000061035156, + "Acc.microwave": 0.42689998626708986, + "Acc.pot": 0.4986000061035156, + "Acc.animal": 0.6933999633789063, + "Acc.bicycle": 0.727699966430664, + "Acc.lake": 0.6798999786376954, + "Acc.dishwasher": 0.7036000061035156, + "Acc.screen": 0.7944000244140625, + "Acc.blanket": 0.18420000076293946, + "Acc.sculpture": 0.822699966430664, + "Acc.hood": 0.6134000015258789, + "Acc.sconce": 0.3327000045776367, + "Acc.vase": 0.48270000457763673, + "Acc.traffic light": 0.4463000106811523, + "Acc.tray": 0.07449999809265137, + "Acc.ashcan": 0.514000015258789, + "Acc.fan": 0.6883999633789063, + "Acc.pier": 0.4786000061035156, + "Acc.crt screen": 0.11279999732971191, + "Acc.plate": 0.6462000274658203, + "Acc.monitor": 0.017200000286102295, + "Acc.bulletin board": 0.7519000244140625, + "Acc.shower": 0.043600001335144044, + "Acc.radiator": 0.7287999725341797, + "Acc.glass": 0.1397999954223633, + "Acc.clock": 0.28709999084472654, + "Acc.flag": 0.3995000076293945 + } + }, + "67": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8267, + "mIoU": 0.4705, + "mAcc": 0.6086, + "IoU.wall": 0.7676000213623047, + "IoU.building": 0.8326000213623047, + "IoU.sky": 0.9402999877929688, + "IoU.floor": 0.8208000183105468, + "IoU.tree": 0.7416000366210938, + "IoU.ceiling": 0.8355000305175782, + "IoU.road": 0.8333000183105469, + "IoU.bed ": 0.8845999908447265, + "IoU.windowpane": 0.6238999938964844, + "IoU.grass": 0.680199966430664, + "IoU.cabinet": 0.6059000015258789, + "IoU.sidewalk": 0.642300033569336, + "IoU.person": 0.8051000213623047, + "IoU.earth": 0.4020999908447266, + "IoU.door": 0.4741999816894531, + "IoU.table": 0.571500015258789, + "IoU.mountain": 0.6441000366210937, + "IoU.plant": 0.4915999984741211, + "IoU.curtain": 0.7337999725341797, + "IoU.chair": 0.5299000167846679, + "IoU.car": 0.827699966430664, + "IoU.water": 0.5229999923706055, + "IoU.painting": 0.686500015258789, + "IoU.sofa": 0.6547000122070312, + "IoU.shelf": 0.45220001220703127, + "IoU.house": 0.46450000762939453, + "IoU.sea": 0.5597999954223633, + "IoU.mirror": 0.6302999877929687, + "IoU.rug": 0.6833999633789063, + "IoU.field": 0.3238999938964844, + "IoU.armchair": 0.4102999877929687, + "IoU.seat": 0.6604000091552734, + "IoU.fence": 0.3708000183105469, + "IoU.desk": 0.4127999877929687, + "IoU.rock": 0.4702999877929688, + "IoU.wardrobe": 0.5445000076293945, + "IoU.lamp": 0.5443999862670899, + "IoU.bathtub": 0.8186000061035156, + "IoU.railing": 0.36189998626708986, + "IoU.cushion": 0.5322000122070313, + "IoU.base": 0.35009998321533203, + "IoU.box": 0.2706999969482422, + "IoU.column": 0.4922999954223633, + "IoU.signboard": 0.3706999969482422, + "IoU.chest of drawers": 0.306299991607666, + "IoU.counter": 0.27360000610351565, + "IoU.sand": 0.45490001678466796, + "IoU.sink": 0.6602999877929687, + "IoU.skyscraper": 0.6315000152587891, + "IoU.fireplace": 0.6536000061035157, + "IoU.refrigerator": 0.7208999633789063, + "IoU.grandstand": 0.46430000305175784, + "IoU.path": 0.2638999938964844, + "IoU.stairs": 0.293799991607666, + "IoU.runway": 0.7090000152587891, + "IoU.case": 0.5316999816894531, + "IoU.pool table": 0.9095999908447265, + "IoU.pillow": 0.5441999816894532, + "IoU.screen door": 0.5568000030517578, + "IoU.stairway": 0.41439998626708985, + "IoU.river": 0.11350000381469727, + "IoU.bridge": 0.5586999893188477, + "IoU.bookcase": 0.3225, + "IoU.blind": 0.41080001831054686, + "IoU.coffee table": 0.5461999893188476, + "IoU.toilet": 0.7593000030517578, + "IoU.flower": 0.34240001678466797, + "IoU.book": 0.44459999084472657, + "IoU.hill": 0.11699999809265137, + "IoU.bench": 0.4777000045776367, + "IoU.countertop": 0.5252000045776367, + "IoU.stove": 0.6837000274658203, + "IoU.palm": 0.44790000915527345, + "IoU.kitchen island": 0.37990001678466795, + "IoU.computer": 0.6377999877929688, + "IoU.swivel chair": 0.4754999923706055, + "IoU.boat": 0.6636000061035157, + "IoU.bar": 0.4704000091552734, + "IoU.arcade machine": 0.719800033569336, + "IoU.hovel": 0.5265999984741211, + "IoU.bus": 0.8247000122070313, + "IoU.towel": 0.589900016784668, + "IoU.light": 0.4065999984741211, + "IoU.truck": 0.275, + "IoU.tower": 0.44040000915527344, + "IoU.chandelier": 0.6054000091552735, + "IoU.awning": 0.4284999847412109, + "IoU.streetlight": 0.22110000610351563, + "IoU.booth": 0.2684000015258789, + "IoU.television receiver": 0.6501000213623047, + "IoU.airplane": 0.5665999984741211, + "IoU.dirt track": 0.19350000381469726, + "IoU.apparel": 0.3181999969482422, + "IoU.pole": 0.1463000011444092, + "IoU.land": 0.03430000066757202, + "IoU.bannister": 0.12180000305175781, + "IoU.escalator": 0.45549999237060546, + "IoU.ottoman": 0.46380001068115234, + "IoU.bottle": 0.1815999984741211, + "IoU.buffet": 0.6208000183105469, + "IoU.poster": 0.19719999313354492, + "IoU.stage": 0.11590000152587891, + "IoU.van": 0.44240001678466795, + "IoU.ship": 0.1640999984741211, + "IoU.fountain": 0.20719999313354492, + "IoU.conveyer belt": 0.7383000183105469, + "IoU.canopy": 0.265, + "IoU.washer": 0.6861000061035156, + "IoU.plaything": 0.36080001831054687, + "IoU.swimming pool": 0.7198999786376953, + "IoU.stool": 0.3146999931335449, + "IoU.barrel": 0.10199999809265137, + "IoU.basket": 0.23780000686645508, + "IoU.waterfall": 0.6313999938964844, + "IoU.tent": 0.933499984741211, + "IoU.bag": 0.11909999847412109, + "IoU.minibike": 0.6345999908447265, + "IoU.cradle": 0.7609999847412109, + "IoU.oven": 0.20829999923706055, + "IoU.ball": 0.4859000015258789, + "IoU.food": 0.5616999816894531, + "IoU.step": 0.08289999961853027, + "IoU.tank": 0.49990001678466794, + "IoU.trade name": 0.20860000610351562, + "IoU.microwave": 0.3865999984741211, + "IoU.pot": 0.41819999694824217, + "IoU.animal": 0.6308000183105469, + "IoU.bicycle": 0.5165999984741211, + "IoU.lake": 0.4431999969482422, + "IoU.dishwasher": 0.5397999954223632, + "IoU.screen": 0.6004000091552735, + "IoU.blanket": 0.13590000152587892, + "IoU.sculpture": 0.5870999908447265, + "IoU.hood": 0.44959999084472657, + "IoU.sconce": 0.253700008392334, + "IoU.vase": 0.31309999465942384, + "IoU.traffic light": 0.21719999313354493, + "IoU.tray": 0.04760000228881836, + "IoU.ashcan": 0.3479000091552734, + "IoU.fan": 0.43520000457763675, + "IoU.pier": 0.26, + "IoU.crt screen": 0.03980000019073486, + "IoU.plate": 0.4502000045776367, + "IoU.monitor": 0.04070000171661377, + "IoU.bulletin board": 0.4256000137329102, + "IoU.shower": 0.010099999904632569, + "IoU.radiator": 0.605999984741211, + "IoU.glass": 0.09479999542236328, + "IoU.clock": 0.21770000457763672, + "IoU.flag": 0.3315000152587891, + "Acc.wall": 0.8662000274658204, + "Acc.building": 0.9311000061035156, + "Acc.sky": 0.9658000183105468, + "Acc.floor": 0.8958000183105469, + "Acc.tree": 0.8826999664306641, + "Acc.ceiling": 0.892300033569336, + "Acc.road": 0.8915000152587891, + "Acc.bed ": 0.9613999938964843, + "Acc.windowpane": 0.7659999847412109, + "Acc.grass": 0.8133000183105469, + "Acc.cabinet": 0.7275, + "Acc.sidewalk": 0.8211000061035156, + "Acc.person": 0.9205999755859375, + "Acc.earth": 0.5411000061035156, + "Acc.door": 0.6737000274658204, + "Acc.table": 0.7193000030517578, + "Acc.mountain": 0.7527999877929688, + "Acc.plant": 0.5741999816894531, + "Acc.curtain": 0.8762999725341797, + "Acc.chair": 0.6694999694824219, + "Acc.car": 0.9305000305175781, + "Acc.water": 0.6511000061035156, + "Acc.painting": 0.8909999847412109, + "Acc.sofa": 0.7913999938964844, + "Acc.shelf": 0.6286999893188476, + "Acc.house": 0.5866999816894531, + "Acc.sea": 0.8137000274658203, + "Acc.mirror": 0.7793000030517578, + "Acc.rug": 0.8029000091552735, + "Acc.field": 0.5547000122070312, + "Acc.armchair": 0.6733000183105469, + "Acc.seat": 0.8573999786376953, + "Acc.fence": 0.51, + "Acc.desk": 0.7559999847412109, + "Acc.rock": 0.7030000305175781, + "Acc.wardrobe": 0.7288999938964844, + "Acc.lamp": 0.7273000335693359, + "Acc.bathtub": 0.8770999908447266, + "Acc.railing": 0.48880001068115236, + "Acc.cushion": 0.6598000335693359, + "Acc.base": 0.5940999984741211, + "Acc.box": 0.34669998168945315, + "Acc.column": 0.614900016784668, + "Acc.signboard": 0.5018000030517578, + "Acc.chest of drawers": 0.5543000030517579, + "Acc.counter": 0.38299999237060545, + "Acc.sand": 0.6422000122070313, + "Acc.sink": 0.74, + "Acc.skyscraper": 0.7733999633789063, + "Acc.fireplace": 0.9273999786376953, + "Acc.refrigerator": 0.8747000122070312, + "Acc.grandstand": 0.7319000244140625, + "Acc.path": 0.3606000137329102, + "Acc.stairs": 0.4168000030517578, + "Acc.runway": 0.9030000305175782, + "Acc.case": 0.6302999877929687, + "Acc.pool table": 0.9776000213623047, + "Acc.pillow": 0.6433999633789063, + "Acc.screen door": 0.6462999725341797, + "Acc.stairway": 0.5834999847412109, + "Acc.river": 0.2377000045776367, + "Acc.bridge": 0.8416000366210937, + "Acc.bookcase": 0.5277999877929688, + "Acc.blind": 0.46369998931884765, + "Acc.coffee table": 0.8702999877929688, + "Acc.toilet": 0.9073999786376953, + "Acc.flower": 0.5804000091552735, + "Acc.book": 0.649000015258789, + "Acc.hill": 0.26059999465942385, + "Acc.bench": 0.542599983215332, + "Acc.countertop": 0.6516999816894531, + "Acc.stove": 0.8297000122070313, + "Acc.palm": 0.7152999877929688, + "Acc.kitchen island": 0.6890000152587891, + "Acc.computer": 0.775199966430664, + "Acc.swivel chair": 0.6752999877929687, + "Acc.boat": 0.8616000366210937, + "Acc.bar": 0.6311000061035156, + "Acc.arcade machine": 0.8430000305175781, + "Acc.hovel": 0.585900001525879, + "Acc.bus": 0.9255000305175781, + "Acc.towel": 0.7429000091552734, + "Acc.light": 0.49139999389648437, + "Acc.truck": 0.41220001220703123, + "Acc.tower": 0.635099983215332, + "Acc.chandelier": 0.8206999969482421, + "Acc.awning": 0.5347999954223632, + "Acc.streetlight": 0.31809999465942385, + "Acc.booth": 0.4408000183105469, + "Acc.television receiver": 0.8091000366210938, + "Acc.airplane": 0.6791000366210938, + "Acc.dirt track": 0.2534000015258789, + "Acc.apparel": 0.4545999908447266, + "Acc.pole": 0.19049999237060547, + "Acc.land": 0.06519999980926514, + "Acc.bannister": 0.17670000076293946, + "Acc.escalator": 0.6462999725341797, + "Acc.ottoman": 0.6537999725341797, + "Acc.bottle": 0.23030000686645508, + "Acc.buffet": 0.8787000274658203, + "Acc.poster": 0.24670000076293946, + "Acc.stage": 0.231299991607666, + "Acc.van": 0.534000015258789, + "Acc.ship": 0.16510000228881835, + "Acc.fountain": 0.2115999984741211, + "Acc.conveyer belt": 0.9183000183105469, + "Acc.canopy": 0.35900001525878905, + "Acc.washer": 0.6990000152587891, + "Acc.plaything": 0.6045000076293945, + "Acc.swimming pool": 0.8701000213623047, + "Acc.stool": 0.44990001678466796, + "Acc.barrel": 0.6295000076293945, + "Acc.basket": 0.29889999389648436, + "Acc.waterfall": 0.6862000274658203, + "Acc.tent": 0.9901999664306641, + "Acc.bag": 0.12779999732971192, + "Acc.minibike": 0.7587999725341796, + "Acc.cradle": 0.9670999908447265, + "Acc.oven": 0.47959999084472654, + "Acc.ball": 0.5356000137329101, + "Acc.food": 0.6252000045776367, + "Acc.step": 0.10270000457763671, + "Acc.tank": 0.5552999877929687, + "Acc.trade name": 0.22760000228881835, + "Acc.microwave": 0.4306999969482422, + "Acc.pot": 0.4906000137329102, + "Acc.animal": 0.6809999847412109, + "Acc.bicycle": 0.6973000335693359, + "Acc.lake": 0.6295999908447265, + "Acc.dishwasher": 0.7386000061035156, + "Acc.screen": 0.8937999725341796, + "Acc.blanket": 0.15420000076293947, + "Acc.sculpture": 0.8320999908447265, + "Acc.hood": 0.5591999816894532, + "Acc.sconce": 0.32810001373291015, + "Acc.vase": 0.46669998168945315, + "Acc.traffic light": 0.4329999923706055, + "Acc.tray": 0.06579999923706055, + "Acc.ashcan": 0.5427000045776367, + "Acc.fan": 0.6808000183105469, + "Acc.pier": 0.46810001373291016, + "Acc.crt screen": 0.11779999732971191, + "Acc.plate": 0.6905999755859376, + "Acc.monitor": 0.06440000057220459, + "Acc.bulletin board": 0.6562000274658203, + "Acc.shower": 0.04960000038146973, + "Acc.radiator": 0.7026000213623047, + "Acc.glass": 0.10550000190734864, + "Acc.clock": 0.25420000076293947, + "Acc.flag": 0.39130001068115233 + } + }, + "68": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8283, + "mIoU": 0.4804, + "mAcc": 0.6153, + "IoU.wall": 0.7693000030517578, + "IoU.building": 0.8341999816894531, + "IoU.sky": 0.9405000305175781, + "IoU.floor": 0.825, + "IoU.tree": 0.7445999908447266, + "IoU.ceiling": 0.8387999725341797, + "IoU.road": 0.8255000305175781, + "IoU.bed ": 0.8748000335693359, + "IoU.windowpane": 0.6225, + "IoU.grass": 0.677300033569336, + "IoU.cabinet": 0.6141999816894531, + "IoU.sidewalk": 0.6448000335693359, + "IoU.person": 0.7983000183105469, + "IoU.earth": 0.3858000183105469, + "IoU.door": 0.49220001220703125, + "IoU.table": 0.5858000183105468, + "IoU.mountain": 0.6247999954223633, + "IoU.plant": 0.5061999893188477, + "IoU.curtain": 0.7244000244140625, + "IoU.chair": 0.5368000030517578, + "IoU.car": 0.82, + "IoU.water": 0.5143999862670898, + "IoU.painting": 0.6980000305175781, + "IoU.sofa": 0.6748000335693359, + "IoU.shelf": 0.4565000152587891, + "IoU.house": 0.49009998321533205, + "IoU.sea": 0.6081999969482422, + "IoU.mirror": 0.6309000015258789, + "IoU.rug": 0.6791999816894532, + "IoU.field": 0.33540000915527346, + "IoU.armchair": 0.4386999893188477, + "IoU.seat": 0.6405999755859375, + "IoU.fence": 0.4116999816894531, + "IoU.desk": 0.4468999862670898, + "IoU.rock": 0.44979999542236326, + "IoU.wardrobe": 0.566500015258789, + "IoU.lamp": 0.5561999893188476, + "IoU.bathtub": 0.8119000244140625, + "IoU.railing": 0.37090000152587893, + "IoU.cushion": 0.5497000122070312, + "IoU.base": 0.2840999984741211, + "IoU.box": 0.2610000038146973, + "IoU.column": 0.479900016784668, + "IoU.signboard": 0.3840000152587891, + "IoU.chest of drawers": 0.33810001373291015, + "IoU.counter": 0.3218999862670898, + "IoU.sand": 0.5368000030517578, + "IoU.sink": 0.6754000091552734, + "IoU.skyscraper": 0.6668000030517578, + "IoU.fireplace": 0.6866000366210937, + "IoU.refrigerator": 0.7187999725341797, + "IoU.grandstand": 0.47299999237060547, + "IoU.path": 0.221299991607666, + "IoU.stairs": 0.2836000061035156, + "IoU.runway": 0.6583000183105469, + "IoU.case": 0.5515999984741211, + "IoU.pool table": 0.9020999908447266, + "IoU.pillow": 0.563499984741211, + "IoU.screen door": 0.4722999954223633, + "IoU.stairway": 0.364900016784668, + "IoU.river": 0.13399999618530273, + "IoU.bridge": 0.6580999755859375, + "IoU.bookcase": 0.33470001220703127, + "IoU.blind": 0.4391999816894531, + "IoU.coffee table": 0.5695000076293946, + "IoU.toilet": 0.765199966430664, + "IoU.flower": 0.3438999938964844, + "IoU.book": 0.4406999969482422, + "IoU.hill": 0.10630000114440918, + "IoU.bench": 0.47150001525878904, + "IoU.countertop": 0.42470001220703124, + "IoU.stove": 0.6866999816894531, + "IoU.palm": 0.505099983215332, + "IoU.kitchen island": 0.3422999954223633, + "IoU.computer": 0.6741999816894532, + "IoU.swivel chair": 0.4784000015258789, + "IoU.boat": 0.7030000305175781, + "IoU.bar": 0.3843000030517578, + "IoU.arcade machine": 0.6918000030517578, + "IoU.hovel": 0.5220000076293946, + "IoU.bus": 0.7693000030517578, + "IoU.towel": 0.6234000015258789, + "IoU.light": 0.37310001373291013, + "IoU.truck": 0.32560001373291014, + "IoU.tower": 0.3252000045776367, + "IoU.chandelier": 0.6308000183105469, + "IoU.awning": 0.41220001220703123, + "IoU.streetlight": 0.2453000068664551, + "IoU.booth": 0.2781999969482422, + "IoU.television receiver": 0.6512999725341797, + "IoU.airplane": 0.7362999725341797, + "IoU.dirt track": 0.11, + "IoU.apparel": 0.3060000038146973, + "IoU.pole": 0.2688999938964844, + "IoU.land": 0.046799998283386234, + "IoU.bannister": 0.15130000114440917, + "IoU.escalator": 0.5113000106811524, + "IoU.ottoman": 0.4818000030517578, + "IoU.bottle": 0.19770000457763673, + "IoU.buffet": 0.5984000015258789, + "IoU.poster": 0.23690000534057618, + "IoU.stage": 0.12609999656677245, + "IoU.van": 0.4325, + "IoU.ship": 0.6213999938964844, + "IoU.fountain": 0.20350000381469727, + "IoU.conveyer belt": 0.697699966430664, + "IoU.canopy": 0.2368000030517578, + "IoU.washer": 0.7023999786376953, + "IoU.plaything": 0.32810001373291015, + "IoU.swimming pool": 0.705, + "IoU.stool": 0.31559999465942384, + "IoU.barrel": 0.4022000122070313, + "IoU.basket": 0.22229999542236328, + "IoU.waterfall": 0.7331999969482422, + "IoU.tent": 0.95, + "IoU.bag": 0.15550000190734864, + "IoU.minibike": 0.6081000137329101, + "IoU.cradle": 0.7693000030517578, + "IoU.oven": 0.18870000839233397, + "IoU.ball": 0.49770000457763675, + "IoU.food": 0.48069999694824217, + "IoU.step": 0.07739999771118164, + "IoU.tank": 0.5065999984741211, + "IoU.trade name": 0.2827000045776367, + "IoU.microwave": 0.37259998321533205, + "IoU.pot": 0.3843000030517578, + "IoU.animal": 0.64, + "IoU.bicycle": 0.51, + "IoU.lake": 0.5106999969482422, + "IoU.dishwasher": 0.5570999908447266, + "IoU.screen": 0.5595999908447266, + "IoU.blanket": 0.07900000095367432, + "IoU.sculpture": 0.5377000045776367, + "IoU.hood": 0.4679000091552734, + "IoU.sconce": 0.33549999237060546, + "IoU.vase": 0.2905999946594238, + "IoU.traffic light": 0.23049999237060548, + "IoU.tray": 0.028199999332427977, + "IoU.ashcan": 0.3579000091552734, + "IoU.fan": 0.47369998931884766, + "IoU.pier": 0.28350000381469725, + "IoU.crt screen": 0.0275, + "IoU.plate": 0.48139999389648436, + "IoU.monitor": 0.032400000095367434, + "IoU.bulletin board": 0.49290000915527343, + "IoU.shower": 0.0044999998807907105, + "IoU.radiator": 0.6158000183105469, + "IoU.glass": 0.11909999847412109, + "IoU.clock": 0.2818000030517578, + "IoU.flag": 0.33369998931884765, + "Acc.wall": 0.8708000183105469, + "Acc.building": 0.9290000152587891, + "Acc.sky": 0.9645999908447266, + "Acc.floor": 0.8959999847412109, + "Acc.tree": 0.8858000183105469, + "Acc.ceiling": 0.8972000122070313, + "Acc.road": 0.8954000091552734, + "Acc.bed ": 0.9620999908447265, + "Acc.windowpane": 0.7662999725341797, + "Acc.grass": 0.7954000091552734, + "Acc.cabinet": 0.725999984741211, + "Acc.sidewalk": 0.8129000091552734, + "Acc.person": 0.9291000366210938, + "Acc.earth": 0.5043999862670898, + "Acc.door": 0.6725, + "Acc.table": 0.7258999633789063, + "Acc.mountain": 0.7379000091552734, + "Acc.plant": 0.590900001525879, + "Acc.curtain": 0.8756999969482422, + "Acc.chair": 0.6876000213623047, + "Acc.car": 0.920199966430664, + "Acc.water": 0.6448000335693359, + "Acc.painting": 0.8694999694824219, + "Acc.sofa": 0.8294000244140625, + "Acc.shelf": 0.6655999755859375, + "Acc.house": 0.627400016784668, + "Acc.sea": 0.8936000061035156, + "Acc.mirror": 0.7611000061035156, + "Acc.rug": 0.726500015258789, + "Acc.field": 0.5829000091552734, + "Acc.armchair": 0.6704000091552734, + "Acc.seat": 0.8679000091552734, + "Acc.fence": 0.5958000183105469, + "Acc.desk": 0.7631999969482421, + "Acc.rock": 0.7137000274658203, + "Acc.wardrobe": 0.7338999938964844, + "Acc.lamp": 0.7191999816894531, + "Acc.bathtub": 0.8718000030517579, + "Acc.railing": 0.4922999954223633, + "Acc.cushion": 0.6441999816894531, + "Acc.base": 0.42880001068115237, + "Acc.box": 0.31260000228881835, + "Acc.column": 0.6263000106811524, + "Acc.signboard": 0.49700000762939456, + "Acc.chest of drawers": 0.6186999893188476, + "Acc.counter": 0.4434000015258789, + "Acc.sand": 0.7418000030517579, + "Acc.sink": 0.7565000152587891, + "Acc.skyscraper": 0.8130000305175781, + "Acc.fireplace": 0.9341999816894532, + "Acc.refrigerator": 0.8808999633789063, + "Acc.grandstand": 0.7426000213623047, + "Acc.path": 0.3021999931335449, + "Acc.stairs": 0.4047999954223633, + "Acc.runway": 0.9013999938964844, + "Acc.case": 0.7383000183105469, + "Acc.pool table": 0.9794000244140625, + "Acc.pillow": 0.7111000061035156, + "Acc.screen door": 0.5418999862670898, + "Acc.stairway": 0.49650001525878906, + "Acc.river": 0.2538999938964844, + "Acc.bridge": 0.8491000366210938, + "Acc.bookcase": 0.5979000091552734, + "Acc.blind": 0.5109000015258789, + "Acc.coffee table": 0.8401000213623047, + "Acc.toilet": 0.8995999908447265, + "Acc.flower": 0.5395000076293945, + "Acc.book": 0.6, + "Acc.hill": 0.25829999923706054, + "Acc.bench": 0.549000015258789, + "Acc.countertop": 0.6498000335693359, + "Acc.stove": 0.8323000335693359, + "Acc.palm": 0.7048999786376953, + "Acc.kitchen island": 0.6833999633789063, + "Acc.computer": 0.8029000091552735, + "Acc.swivel chair": 0.6665000152587891, + "Acc.boat": 0.8534999847412109, + "Acc.bar": 0.5302999877929687, + "Acc.arcade machine": 0.7470999908447266, + "Acc.hovel": 0.5629999923706055, + "Acc.bus": 0.9423999786376953, + "Acc.towel": 0.773499984741211, + "Acc.light": 0.43029998779296874, + "Acc.truck": 0.4847999954223633, + "Acc.tower": 0.4431999969482422, + "Acc.chandelier": 0.8023999786376953, + "Acc.awning": 0.5104999923706055, + "Acc.streetlight": 0.3213000106811523, + "Acc.booth": 0.4241999816894531, + "Acc.television receiver": 0.8290000152587891, + "Acc.airplane": 0.8209999847412109, + "Acc.dirt track": 0.178700008392334, + "Acc.apparel": 0.45970001220703127, + "Acc.pole": 0.4009000015258789, + "Acc.land": 0.13449999809265137, + "Acc.bannister": 0.23309999465942383, + "Acc.escalator": 0.7612999725341797, + "Acc.ottoman": 0.6279999923706054, + "Acc.bottle": 0.25809999465942385, + "Acc.buffet": 0.8533999633789062, + "Acc.poster": 0.3006999969482422, + "Acc.stage": 0.24770000457763672, + "Acc.van": 0.5218000030517578, + "Acc.ship": 0.6694999694824219, + "Acc.fountain": 0.21379999160766602, + "Acc.conveyer belt": 0.9191999816894532, + "Acc.canopy": 0.3313999938964844, + "Acc.washer": 0.7083000183105469, + "Acc.plaything": 0.5068000030517578, + "Acc.swimming pool": 0.885, + "Acc.stool": 0.44159999847412107, + "Acc.barrel": 0.638400001525879, + "Acc.basket": 0.335, + "Acc.waterfall": 0.8155000305175781, + "Acc.tent": 0.984800033569336, + "Acc.bag": 0.18879999160766603, + "Acc.minibike": 0.7430000305175781, + "Acc.cradle": 0.9769000244140625, + "Acc.oven": 0.47150001525878904, + "Acc.ball": 0.5615999984741211, + "Acc.food": 0.5677999877929687, + "Acc.step": 0.10850000381469727, + "Acc.tank": 0.5963999938964843, + "Acc.trade name": 0.3181999969482422, + "Acc.microwave": 0.4154000091552734, + "Acc.pot": 0.4488999938964844, + "Acc.animal": 0.670199966430664, + "Acc.bicycle": 0.7126000213623047, + "Acc.lake": 0.6320999908447266, + "Acc.dishwasher": 0.6716000366210938, + "Acc.screen": 0.8969000244140625, + "Acc.blanket": 0.08829999923706054, + "Acc.sculpture": 0.8212999725341796, + "Acc.hood": 0.5990000152587891, + "Acc.sconce": 0.4615000152587891, + "Acc.vase": 0.45220001220703127, + "Acc.traffic light": 0.35970001220703124, + "Acc.tray": 0.04659999847412109, + "Acc.ashcan": 0.5175, + "Acc.fan": 0.6752999877929687, + "Acc.pier": 0.465, + "Acc.crt screen": 0.07820000171661377, + "Acc.plate": 0.6629000091552735, + "Acc.monitor": 0.05050000190734863, + "Acc.bulletin board": 0.6638999938964844, + "Acc.shower": 0.043600001335144044, + "Acc.radiator": 0.7530999755859376, + "Acc.glass": 0.13420000076293945, + "Acc.clock": 0.3504000091552734, + "Acc.flag": 0.3654999923706055 + } + }, + "69": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8289, + "mIoU": 0.47450000000000003, + "mAcc": 0.5754, + "IoU.wall": 0.7670999908447266, + "IoU.building": 0.831500015258789, + "IoU.sky": 0.9387999725341797, + "IoU.floor": 0.8166999816894531, + "IoU.tree": 0.7363999938964844, + "IoU.ceiling": 0.8343000030517578, + "IoU.road": 0.8318000030517578, + "IoU.bed ": 0.8738999938964844, + "IoU.windowpane": 0.6141999816894531, + "IoU.grass": 0.6790000152587891, + "IoU.cabinet": 0.6206000137329102, + "IoU.sidewalk": 0.6508999633789062, + "IoU.person": 0.7997000122070312, + "IoU.earth": 0.4093000030517578, + "IoU.door": 0.4754999923706055, + "IoU.table": 0.5838000106811524, + "IoU.mountain": 0.595999984741211, + "IoU.plant": 0.49459999084472656, + "IoU.curtain": 0.7316999816894532, + "IoU.chair": 0.5408000183105469, + "IoU.car": 0.8429000091552734, + "IoU.water": 0.5361999893188476, + "IoU.painting": 0.7104000091552735, + "IoU.sofa": 0.6680999755859375, + "IoU.shelf": 0.447599983215332, + "IoU.house": 0.4761000061035156, + "IoU.sea": 0.6309999847412109, + "IoU.mirror": 0.6340999984741211, + "IoU.rug": 0.6441999816894531, + "IoU.field": 0.32360000610351564, + "IoU.armchair": 0.42509998321533204, + "IoU.seat": 0.6491999816894531, + "IoU.fence": 0.4020999908447266, + "IoU.desk": 0.48459999084472655, + "IoU.rock": 0.45529998779296876, + "IoU.wardrobe": 0.5472000122070313, + "IoU.lamp": 0.5725, + "IoU.bathtub": 0.7454000091552735, + "IoU.railing": 0.35830001831054686, + "IoU.cushion": 0.5404999923706054, + "IoU.base": 0.3004999923706055, + "IoU.box": 0.27010000228881836, + "IoU.column": 0.48970001220703124, + "IoU.signboard": 0.359900016784668, + "IoU.chest of drawers": 0.33189998626708983, + "IoU.counter": 0.2979999923706055, + "IoU.sand": 0.45169998168945313, + "IoU.sink": 0.6683000183105469, + "IoU.skyscraper": 0.624900016784668, + "IoU.fireplace": 0.6961000061035156, + "IoU.refrigerator": 0.785, + "IoU.grandstand": 0.48369998931884767, + "IoU.path": 0.21389999389648437, + "IoU.stairs": 0.27170000076293943, + "IoU.runway": 0.6288999938964843, + "IoU.case": 0.5602999877929687, + "IoU.pool table": 0.9295999908447266, + "IoU.pillow": 0.56, + "IoU.screen door": 0.44869998931884764, + "IoU.stairway": 0.3806999969482422, + "IoU.river": 0.2006999969482422, + "IoU.bridge": 0.6925, + "IoU.bookcase": 0.33299999237060546, + "IoU.blind": 0.39310001373291015, + "IoU.coffee table": 0.5904000091552735, + "IoU.toilet": 0.7736000061035156, + "IoU.flower": 0.327599983215332, + "IoU.book": 0.4458000183105469, + "IoU.hill": 0.09930000305175782, + "IoU.bench": 0.4361000061035156, + "IoU.countertop": 0.4225, + "IoU.stove": 0.7173999786376953, + "IoU.palm": 0.442599983215332, + "IoU.kitchen island": 0.37439998626708987, + "IoU.computer": 0.6647000122070312, + "IoU.swivel chair": 0.46299999237060546, + "IoU.boat": 0.7048999786376953, + "IoU.bar": 0.39810001373291015, + "IoU.arcade machine": 0.5156000137329102, + "IoU.hovel": 0.4256000137329102, + "IoU.bus": 0.8911000061035156, + "IoU.towel": 0.6341999816894531, + "IoU.light": 0.22840000152587892, + "IoU.truck": 0.32939998626708983, + "IoU.tower": 0.31739999771118166, + "IoU.chandelier": 0.6302999877929687, + "IoU.awning": 0.3868000030517578, + "IoU.streetlight": 0.18299999237060546, + "IoU.booth": 0.30889999389648437, + "IoU.television receiver": 0.6606999969482422, + "IoU.airplane": 0.6318000030517578, + "IoU.dirt track": 0.12260000228881836, + "IoU.apparel": 0.2944000053405762, + "IoU.pole": 0.24420000076293946, + "IoU.land": 0.009900000095367432, + "IoU.bannister": 0.11819999694824218, + "IoU.escalator": 0.5081000137329101, + "IoU.ottoman": 0.47580001831054686, + "IoU.bottle": 0.19670000076293945, + "IoU.buffet": 0.5072999954223633, + "IoU.poster": 0.1727000045776367, + "IoU.stage": 0.10470000267028809, + "IoU.van": 0.41200000762939454, + "IoU.ship": 0.21989999771118163, + "IoU.fountain": 0.20969999313354493, + "IoU.conveyer belt": 0.8163999938964843, + "IoU.canopy": 0.2725, + "IoU.washer": 0.7372000122070312, + "IoU.plaything": 0.34240001678466797, + "IoU.swimming pool": 0.689800033569336, + "IoU.stool": 0.32790000915527345, + "IoU.barrel": 0.6006999969482422, + "IoU.basket": 0.21719999313354493, + "IoU.waterfall": 0.7537000274658203, + "IoU.tent": 0.955999984741211, + "IoU.bag": 0.13550000190734862, + "IoU.minibike": 0.585, + "IoU.cradle": 0.7811000061035156, + "IoU.oven": 0.19690000534057617, + "IoU.ball": 0.46939998626708984, + "IoU.food": 0.47099998474121096, + "IoU.step": 0.07760000228881836, + "IoU.tank": 0.5399000167846679, + "IoU.trade name": 0.17709999084472655, + "IoU.microwave": 0.3793999862670898, + "IoU.pot": 0.3825, + "IoU.animal": 0.6277000045776367, + "IoU.bicycle": 0.4843000030517578, + "IoU.lake": 0.5943999862670899, + "IoU.dishwasher": 0.5613999938964844, + "IoU.screen": 0.6095999908447266, + "IoU.blanket": 0.09140000343322754, + "IoU.sculpture": 0.585900001525879, + "IoU.hood": 0.40439998626708984, + "IoU.sconce": 0.2588999938964844, + "IoU.vase": 0.31079999923706053, + "IoU.traffic light": 0.19280000686645507, + "IoU.tray": 0.020199999809265137, + "IoU.ashcan": 0.41139999389648435, + "IoU.fan": 0.47459999084472654, + "IoU.pier": 0.35139999389648435, + "IoU.crt screen": 0.029000000953674318, + "IoU.plate": 0.4684000015258789, + "IoU.monitor": 0.20059999465942382, + "IoU.bulletin board": 0.48529998779296873, + "IoU.shower": 0.0, + "IoU.radiator": 0.5295999908447265, + "IoU.glass": 0.08649999618530274, + "IoU.clock": 0.24819999694824219, + "IoU.flag": 0.34310001373291016, + "Acc.wall": 0.8970999908447266, + "Acc.building": 0.9376999664306641, + "Acc.sky": 0.9773999786376953, + "Acc.floor": 0.9176999664306641, + "Acc.tree": 0.875999984741211, + "Acc.ceiling": 0.9013999938964844, + "Acc.road": 0.9075, + "Acc.bed ": 0.954800033569336, + "Acc.windowpane": 0.7433999633789062, + "Acc.grass": 0.7977999877929688, + "Acc.cabinet": 0.7572000122070313, + "Acc.sidewalk": 0.8006999969482422, + "Acc.person": 0.8973999786376953, + "Acc.earth": 0.5966999816894532, + "Acc.door": 0.6658000183105469, + "Acc.table": 0.7343000030517578, + "Acc.mountain": 0.6901999664306641, + "Acc.plant": 0.5729999923706055, + "Acc.curtain": 0.8298999786376953, + "Acc.chair": 0.6704000091552734, + "Acc.car": 0.9111000061035156, + "Acc.water": 0.669000015258789, + "Acc.painting": 0.8429000091552734, + "Acc.sofa": 0.8172000122070312, + "Acc.shelf": 0.6469999694824219, + "Acc.house": 0.5845999908447266, + "Acc.sea": 0.8886000061035156, + "Acc.mirror": 0.7108999633789063, + "Acc.rug": 0.6783000183105469, + "Acc.field": 0.5481000137329102, + "Acc.armchair": 0.6516999816894531, + "Acc.seat": 0.8330000305175781, + "Acc.fence": 0.5447000122070312, + "Acc.desk": 0.7030999755859375, + "Acc.rock": 0.6680999755859375, + "Acc.wardrobe": 0.6888999938964844, + "Acc.lamp": 0.6743000030517579, + "Acc.bathtub": 0.7862000274658203, + "Acc.railing": 0.5002000045776367, + "Acc.cushion": 0.6225, + "Acc.base": 0.4834000015258789, + "Acc.box": 0.34080001831054685, + "Acc.column": 0.6124000167846679, + "Acc.signboard": 0.43979999542236325, + "Acc.chest of drawers": 0.5984999847412109, + "Acc.counter": 0.37240001678466794, + "Acc.sand": 0.5829000091552734, + "Acc.sink": 0.7048999786376953, + "Acc.skyscraper": 0.7088999938964844, + "Acc.fireplace": 0.9130999755859375, + "Acc.refrigerator": 0.8669000244140626, + "Acc.grandstand": 0.7483999633789062, + "Acc.path": 0.2677000045776367, + "Acc.stairs": 0.3484000015258789, + "Acc.runway": 0.8087000274658203, + "Acc.case": 0.7143000030517578, + "Acc.pool table": 0.964800033569336, + "Acc.pillow": 0.6537000274658203, + "Acc.screen door": 0.5045000076293945, + "Acc.stairway": 0.5547000122070312, + "Acc.river": 0.34689998626708984, + "Acc.bridge": 0.8368000030517578, + "Acc.bookcase": 0.5761999893188476, + "Acc.blind": 0.4406999969482422, + "Acc.coffee table": 0.8133000183105469, + "Acc.toilet": 0.8841000366210937, + "Acc.flower": 0.4525, + "Acc.book": 0.5666999816894531, + "Acc.hill": 0.21290000915527343, + "Acc.bench": 0.49150001525878906, + "Acc.countertop": 0.6058000183105469, + "Acc.stove": 0.7929000091552735, + "Acc.palm": 0.5508000183105469, + "Acc.kitchen island": 0.5441999816894532, + "Acc.computer": 0.7580000305175781, + "Acc.swivel chair": 0.5754000091552735, + "Acc.boat": 0.8447000122070313, + "Acc.bar": 0.5418999862670898, + "Acc.arcade machine": 0.5522000122070313, + "Acc.hovel": 0.44779998779296876, + "Acc.bus": 0.9323999786376953, + "Acc.towel": 0.7058999633789063, + "Acc.light": 0.24010000228881836, + "Acc.truck": 0.4584000015258789, + "Acc.tower": 0.41700000762939454, + "Acc.chandelier": 0.7531999969482421, + "Acc.awning": 0.41119998931884766, + "Acc.streetlight": 0.2059000015258789, + "Acc.booth": 0.40700000762939453, + "Acc.television receiver": 0.7694000244140625, + "Acc.airplane": 0.6702999877929687, + "Acc.dirt track": 0.16969999313354492, + "Acc.apparel": 0.42580001831054687, + "Acc.pole": 0.3384000015258789, + "Acc.land": 0.016399999856948854, + "Acc.bannister": 0.145, + "Acc.escalator": 0.6179000091552734, + "Acc.ottoman": 0.5902999877929688, + "Acc.bottle": 0.24559999465942384, + "Acc.buffet": 0.6277999877929688, + "Acc.poster": 0.21579999923706056, + "Acc.stage": 0.16799999237060548, + "Acc.van": 0.48310001373291017, + "Acc.ship": 0.22209999084472656, + "Acc.fountain": 0.21110000610351562, + "Acc.conveyer belt": 0.8955999755859375, + "Acc.canopy": 0.39119998931884764, + "Acc.washer": 0.7423000335693359, + "Acc.plaything": 0.5268999862670899, + "Acc.swimming pool": 0.8026999664306641, + "Acc.stool": 0.40849998474121096, + "Acc.barrel": 0.6336999893188476, + "Acc.basket": 0.2893000030517578, + "Acc.waterfall": 0.797300033569336, + "Acc.tent": 0.9830999755859375, + "Acc.bag": 0.14460000038146972, + "Acc.minibike": 0.6633999633789063, + "Acc.cradle": 0.9661000061035157, + "Acc.oven": 0.5245000076293945, + "Acc.ball": 0.4961999893188477, + "Acc.food": 0.5408000183105469, + "Acc.step": 0.09449999809265136, + "Acc.tank": 0.5858000183105468, + "Acc.trade name": 0.18440000534057618, + "Acc.microwave": 0.4015999984741211, + "Acc.pot": 0.41830001831054686, + "Acc.animal": 0.6504000091552734, + "Acc.bicycle": 0.6425, + "Acc.lake": 0.6272999954223633, + "Acc.dishwasher": 0.6497000122070312, + "Acc.screen": 0.8654000091552735, + "Acc.blanket": 0.10029999732971191, + "Acc.sculpture": 0.8136000061035156, + "Acc.hood": 0.46619998931884765, + "Acc.sconce": 0.3131999969482422, + "Acc.vase": 0.3952000045776367, + "Acc.traffic light": 0.23780000686645508, + "Acc.tray": 0.023299999237060547, + "Acc.ashcan": 0.5675, + "Acc.fan": 0.5586999893188477, + "Acc.pier": 0.4402000045776367, + "Acc.crt screen": 0.055300002098083494, + "Acc.plate": 0.5920999908447265, + "Acc.monitor": 0.3118000030517578, + "Acc.bulletin board": 0.5974000167846679, + "Acc.shower": 0.0, + "Acc.radiator": 0.5784999847412109, + "Acc.glass": 0.0934000015258789, + "Acc.clock": 0.3011000061035156, + "Acc.flag": 0.36369998931884767 + } + }, + "70": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8312, + "mIoU": 0.4844, + "mAcc": 0.614, + "IoU.wall": 0.7736000061035156, + "IoU.building": 0.8294999694824219, + "IoU.sky": 0.9387999725341797, + "IoU.floor": 0.8262000274658203, + "IoU.tree": 0.7398999786376953, + "IoU.ceiling": 0.835999984741211, + "IoU.road": 0.84, + "IoU.bed ": 0.8894999694824218, + "IoU.windowpane": 0.6281999969482421, + "IoU.grass": 0.686500015258789, + "IoU.cabinet": 0.6134000015258789, + "IoU.sidewalk": 0.6633000183105469, + "IoU.person": 0.8055999755859375, + "IoU.earth": 0.40360000610351565, + "IoU.door": 0.5041999816894531, + "IoU.table": 0.5788999938964844, + "IoU.mountain": 0.624900016784668, + "IoU.plant": 0.5215000152587891, + "IoU.curtain": 0.7236000061035156, + "IoU.chair": 0.5434000015258789, + "IoU.car": 0.8470999908447265, + "IoU.water": 0.5445000076293945, + "IoU.painting": 0.675, + "IoU.sofa": 0.6616999816894531, + "IoU.shelf": 0.425, + "IoU.house": 0.49270000457763674, + "IoU.sea": 0.6095999908447266, + "IoU.mirror": 0.653499984741211, + "IoU.rug": 0.6637000274658204, + "IoU.field": 0.31090000152587893, + "IoU.armchair": 0.40869998931884766, + "IoU.seat": 0.6644000244140625, + "IoU.fence": 0.45439998626708983, + "IoU.desk": 0.44779998779296876, + "IoU.rock": 0.5479999923706055, + "IoU.wardrobe": 0.6086999893188476, + "IoU.lamp": 0.5518000030517578, + "IoU.bathtub": 0.8001000213623047, + "IoU.railing": 0.3534000015258789, + "IoU.cushion": 0.5561000061035156, + "IoU.base": 0.30370000839233396, + "IoU.box": 0.24209999084472655, + "IoU.column": 0.4979999923706055, + "IoU.signboard": 0.35779998779296873, + "IoU.chest of drawers": 0.36150001525878905, + "IoU.counter": 0.37860000610351563, + "IoU.sand": 0.49509998321533205, + "IoU.sink": 0.6641999816894532, + "IoU.skyscraper": 0.5338999938964843, + "IoU.fireplace": 0.6808000183105469, + "IoU.refrigerator": 0.6833000183105469, + "IoU.grandstand": 0.4584000015258789, + "IoU.path": 0.2434000015258789, + "IoU.stairs": 0.2978000068664551, + "IoU.runway": 0.7533999633789062, + "IoU.case": 0.5645999908447266, + "IoU.pool table": 0.9266000366210938, + "IoU.pillow": 0.5606999969482422, + "IoU.screen door": 0.6106000137329102, + "IoU.stairway": 0.3736999893188477, + "IoU.river": 0.2325, + "IoU.bridge": 0.5672999954223633, + "IoU.bookcase": 0.30190000534057615, + "IoU.blind": 0.34950000762939454, + "IoU.coffee table": 0.5733000183105469, + "IoU.toilet": 0.7995999908447265, + "IoU.flower": 0.3756999969482422, + "IoU.book": 0.44990001678466796, + "IoU.hill": 0.1347999954223633, + "IoU.bench": 0.467599983215332, + "IoU.countertop": 0.5334000015258789, + "IoU.stove": 0.6901999664306641, + "IoU.palm": 0.4608000183105469, + "IoU.kitchen island": 0.35080001831054686, + "IoU.computer": 0.7355000305175782, + "IoU.swivel chair": 0.4336999893188477, + "IoU.boat": 0.6043000030517578, + "IoU.bar": 0.5770999908447265, + "IoU.arcade machine": 0.6848999786376954, + "IoU.hovel": 0.5302000045776367, + "IoU.bus": 0.8844000244140625, + "IoU.towel": 0.6145000076293945, + "IoU.light": 0.42060001373291017, + "IoU.truck": 0.3347999954223633, + "IoU.tower": 0.3347999954223633, + "IoU.chandelier": 0.6063000106811524, + "IoU.awning": 0.27780000686645506, + "IoU.streetlight": 0.18260000228881837, + "IoU.booth": 0.3597999954223633, + "IoU.television receiver": 0.6565000152587891, + "IoU.airplane": 0.5602999877929687, + "IoU.dirt track": 0.029600000381469725, + "IoU.apparel": 0.37240001678466794, + "IoU.pole": 0.15390000343322754, + "IoU.land": 0.04820000171661377, + "IoU.bannister": 0.08609999656677246, + "IoU.escalator": 0.45, + "IoU.ottoman": 0.4829999923706055, + "IoU.bottle": 0.3007999992370605, + "IoU.buffet": 0.5934000015258789, + "IoU.poster": 0.16139999389648438, + "IoU.stage": 0.12680000305175781, + "IoU.van": 0.4388000106811523, + "IoU.ship": 0.7023000335693359, + "IoU.fountain": 0.1990999984741211, + "IoU.conveyer belt": 0.7147000122070313, + "IoU.canopy": 0.23780000686645508, + "IoU.washer": 0.7052999877929688, + "IoU.plaything": 0.4231000137329102, + "IoU.swimming pool": 0.7208999633789063, + "IoU.stool": 0.3340999984741211, + "IoU.barrel": 0.40619998931884765, + "IoU.basket": 0.2338999938964844, + "IoU.waterfall": 0.5711999893188476, + "IoU.tent": 0.9087000274658203, + "IoU.bag": 0.10640000343322754, + "IoU.minibike": 0.6547000122070312, + "IoU.cradle": 0.8108000183105468, + "IoU.oven": 0.17459999084472655, + "IoU.ball": 0.5404000091552734, + "IoU.food": 0.575, + "IoU.step": 0.09140000343322754, + "IoU.tank": 0.49970001220703125, + "IoU.trade name": 0.293700008392334, + "IoU.microwave": 0.4127999877929687, + "IoU.pot": 0.4897999954223633, + "IoU.animal": 0.6516000366210938, + "IoU.bicycle": 0.5463999938964844, + "IoU.lake": 0.3079999923706055, + "IoU.dishwasher": 0.46610000610351565, + "IoU.screen": 0.6063000106811524, + "IoU.blanket": 0.14399999618530274, + "IoU.sculpture": 0.5920999908447265, + "IoU.hood": 0.5070999908447266, + "IoU.sconce": 0.2901000022888184, + "IoU.vase": 0.31010000228881834, + "IoU.traffic light": 0.24139999389648437, + "IoU.tray": 0.04570000171661377, + "IoU.ashcan": 0.3833000183105469, + "IoU.fan": 0.4972999954223633, + "IoU.pier": 0.23899999618530274, + "IoU.crt screen": 0.04880000114440918, + "IoU.plate": 0.4438000106811523, + "IoU.monitor": 0.17799999237060546, + "IoU.bulletin board": 0.40599998474121096, + "IoU.shower": 0.024100000858306884, + "IoU.radiator": 0.605099983215332, + "IoU.glass": 0.12439999580383301, + "IoU.clock": 0.24360000610351562, + "IoU.flag": 0.3806999969482422, + "Acc.wall": 0.8705999755859375, + "Acc.building": 0.9322000122070313, + "Acc.sky": 0.962699966430664, + "Acc.floor": 0.8970999908447266, + "Acc.tree": 0.8876000213623046, + "Acc.ceiling": 0.8947000122070312, + "Acc.road": 0.8980999755859375, + "Acc.bed ": 0.9633999633789062, + "Acc.windowpane": 0.7822000122070313, + "Acc.grass": 0.8069999694824219, + "Acc.cabinet": 0.7487000274658203, + "Acc.sidewalk": 0.8237000274658203, + "Acc.person": 0.9241000366210937, + "Acc.earth": 0.5727999877929687, + "Acc.door": 0.6751000213623047, + "Acc.table": 0.7436000061035156, + "Acc.mountain": 0.772300033569336, + "Acc.plant": 0.6077000045776367, + "Acc.curtain": 0.8751000213623047, + "Acc.chair": 0.6890000152587891, + "Acc.car": 0.9295999908447266, + "Acc.water": 0.6848999786376954, + "Acc.painting": 0.8788999938964843, + "Acc.sofa": 0.8469000244140625, + "Acc.shelf": 0.5638000106811524, + "Acc.house": 0.6411000061035156, + "Acc.sea": 0.7662999725341797, + "Acc.mirror": 0.7865000152587891, + "Acc.rug": 0.79, + "Acc.field": 0.515, + "Acc.armchair": 0.5925, + "Acc.seat": 0.8648999786376953, + "Acc.fence": 0.6498999786376953, + "Acc.desk": 0.7434999847412109, + "Acc.rock": 0.6772000122070313, + "Acc.wardrobe": 0.7440000152587891, + "Acc.lamp": 0.7363999938964844, + "Acc.bathtub": 0.8591000366210938, + "Acc.railing": 0.4672999954223633, + "Acc.cushion": 0.6731999969482422, + "Acc.base": 0.5602999877929687, + "Acc.box": 0.3122999954223633, + "Acc.column": 0.5870000076293945, + "Acc.signboard": 0.4609000015258789, + "Acc.chest of drawers": 0.6177000045776367, + "Acc.counter": 0.5029999923706054, + "Acc.sand": 0.7662999725341797, + "Acc.sink": 0.7455999755859375, + "Acc.skyscraper": 0.6576000213623047, + "Acc.fireplace": 0.8969000244140625, + "Acc.refrigerator": 0.7737999725341796, + "Acc.grandstand": 0.7133000183105469, + "Acc.path": 0.365, + "Acc.stairs": 0.43340000152587893, + "Acc.runway": 0.9698999786376953, + "Acc.case": 0.6826000213623047, + "Acc.pool table": 0.9734999847412109, + "Acc.pillow": 0.6512999725341797, + "Acc.screen door": 0.7609999847412109, + "Acc.stairway": 0.5022000122070313, + "Acc.river": 0.47650001525878904, + "Acc.bridge": 0.6591000366210937, + "Acc.bookcase": 0.4384000015258789, + "Acc.blind": 0.3908000183105469, + "Acc.coffee table": 0.8551000213623047, + "Acc.toilet": 0.9075, + "Acc.flower": 0.539000015258789, + "Acc.book": 0.6952999877929688, + "Acc.hill": 0.2596999931335449, + "Acc.bench": 0.5668000030517578, + "Acc.countertop": 0.6779000091552735, + "Acc.stove": 0.8316999816894531, + "Acc.palm": 0.6765000152587891, + "Acc.kitchen island": 0.6295999908447265, + "Acc.computer": 0.8712999725341797, + "Acc.swivel chair": 0.6075, + "Acc.boat": 0.8493000030517578, + "Acc.bar": 0.6955999755859374, + "Acc.arcade machine": 0.821500015258789, + "Acc.hovel": 0.592599983215332, + "Acc.bus": 0.9372000122070312, + "Acc.towel": 0.7818000030517578, + "Acc.light": 0.5231999969482422, + "Acc.truck": 0.47580001831054686, + "Acc.tower": 0.48439998626708985, + "Acc.chandelier": 0.7537000274658203, + "Acc.awning": 0.31790000915527344, + "Acc.streetlight": 0.2640999984741211, + "Acc.booth": 0.3997000122070313, + "Acc.television receiver": 0.8102999877929687, + "Acc.airplane": 0.6463999938964844, + "Acc.dirt track": 0.08859999656677246, + "Acc.apparel": 0.5461000061035156, + "Acc.pole": 0.19940000534057617, + "Acc.land": 0.08020000457763672, + "Acc.bannister": 0.14149999618530273, + "Acc.escalator": 0.6066999816894532, + "Acc.ottoman": 0.6626000213623047, + "Acc.bottle": 0.4416999816894531, + "Acc.buffet": 0.8644999694824219, + "Acc.poster": 0.20540000915527343, + "Acc.stage": 0.2734000015258789, + "Acc.van": 0.5434000015258789, + "Acc.ship": 0.7119999694824218, + "Acc.fountain": 0.2272999954223633, + "Acc.conveyer belt": 0.919000015258789, + "Acc.canopy": 0.2990999984741211, + "Acc.washer": 0.7119000244140625, + "Acc.plaything": 0.5065000152587891, + "Acc.swimming pool": 0.8883999633789063, + "Acc.stool": 0.4290999984741211, + "Acc.barrel": 0.6512000274658203, + "Acc.basket": 0.30450000762939455, + "Acc.waterfall": 0.7919000244140625, + "Acc.tent": 0.9876000213623047, + "Acc.bag": 0.111899995803833, + "Acc.minibike": 0.7665000152587891, + "Acc.cradle": 0.9769999694824218, + "Acc.oven": 0.48130001068115236, + "Acc.ball": 0.6268000030517578, + "Acc.food": 0.6473999786376953, + "Acc.step": 0.11270000457763672, + "Acc.tank": 0.5897999954223633, + "Acc.trade name": 0.32040000915527345, + "Acc.microwave": 0.47450000762939454, + "Acc.pot": 0.5850999832153321, + "Acc.animal": 0.6948999786376953, + "Acc.bicycle": 0.7083000183105469, + "Acc.lake": 0.5329000091552735, + "Acc.dishwasher": 0.5820999908447265, + "Acc.screen": 0.7433000183105469, + "Acc.blanket": 0.16139999389648438, + "Acc.sculpture": 0.7720999908447266, + "Acc.hood": 0.6102999877929688, + "Acc.sconce": 0.3759000015258789, + "Acc.vase": 0.46880001068115235, + "Acc.traffic light": 0.39689998626708983, + "Acc.tray": 0.06579999923706055, + "Acc.ashcan": 0.5668000030517578, + "Acc.fan": 0.6775, + "Acc.pier": 0.4816999816894531, + "Acc.crt screen": 0.12699999809265136, + "Acc.plate": 0.6545999908447265, + "Acc.monitor": 0.243700008392334, + "Acc.bulletin board": 0.7387000274658203, + "Acc.shower": 0.04099999904632568, + "Acc.radiator": 0.7202999877929688, + "Acc.glass": 0.136899995803833, + "Acc.clock": 0.29649999618530276, + "Acc.flag": 0.41369998931884766 + } + }, + "71": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8305, + "mIoU": 0.4802, + "mAcc": 0.6151, + "IoU.wall": 0.7729000091552735, + "IoU.building": 0.8327999877929687, + "IoU.sky": 0.9395999908447266, + "IoU.floor": 0.8291999816894531, + "IoU.tree": 0.7434999847412109, + "IoU.ceiling": 0.832699966430664, + "IoU.road": 0.8362999725341796, + "IoU.bed ": 0.8855999755859375, + "IoU.windowpane": 0.6272000122070313, + "IoU.grass": 0.7023000335693359, + "IoU.cabinet": 0.612599983215332, + "IoU.sidewalk": 0.6537999725341797, + "IoU.person": 0.808499984741211, + "IoU.earth": 0.3920999908447266, + "IoU.door": 0.5020999908447266, + "IoU.table": 0.5795000076293946, + "IoU.mountain": 0.605999984741211, + "IoU.plant": 0.5058000183105469, + "IoU.curtain": 0.7201999664306641, + "IoU.chair": 0.5415999984741211, + "IoU.car": 0.8380000305175781, + "IoU.water": 0.5633000183105469, + "IoU.painting": 0.6845999908447266, + "IoU.sofa": 0.6708000183105469, + "IoU.shelf": 0.41950000762939454, + "IoU.house": 0.5045999908447265, + "IoU.sea": 0.6406999969482422, + "IoU.mirror": 0.6611000061035156, + "IoU.rug": 0.673499984741211, + "IoU.field": 0.3152000045776367, + "IoU.armchair": 0.41810001373291017, + "IoU.seat": 0.6838999938964844, + "IoU.fence": 0.42270000457763673, + "IoU.desk": 0.42380001068115236, + "IoU.rock": 0.5222999954223633, + "IoU.wardrobe": 0.5852999877929688, + "IoU.lamp": 0.5454999923706054, + "IoU.bathtub": 0.8095999908447266, + "IoU.railing": 0.3843999862670898, + "IoU.cushion": 0.566500015258789, + "IoU.base": 0.32040000915527345, + "IoU.box": 0.263700008392334, + "IoU.column": 0.5031000137329101, + "IoU.signboard": 0.3618000030517578, + "IoU.chest of drawers": 0.35939998626708985, + "IoU.counter": 0.36389999389648436, + "IoU.sand": 0.4822999954223633, + "IoU.sink": 0.6731999969482422, + "IoU.skyscraper": 0.539099998474121, + "IoU.fireplace": 0.6769000244140625, + "IoU.refrigerator": 0.7073999786376953, + "IoU.grandstand": 0.47189998626708984, + "IoU.path": 0.23760000228881836, + "IoU.stairs": 0.3031999969482422, + "IoU.runway": 0.7387000274658203, + "IoU.case": 0.5702000045776368, + "IoU.pool table": 0.9252999877929687, + "IoU.pillow": 0.5565000152587891, + "IoU.screen door": 0.6408999633789062, + "IoU.stairway": 0.3575, + "IoU.river": 0.14880000114440917, + "IoU.bridge": 0.5033000183105468, + "IoU.bookcase": 0.30309999465942383, + "IoU.blind": 0.36220001220703124, + "IoU.coffee table": 0.5765999984741211, + "IoU.toilet": 0.7887000274658204, + "IoU.flower": 0.3609999847412109, + "IoU.book": 0.44880001068115233, + "IoU.hill": 0.13550000190734862, + "IoU.bench": 0.53, + "IoU.countertop": 0.545099983215332, + "IoU.stove": 0.6761000061035156, + "IoU.palm": 0.47439998626708985, + "IoU.kitchen island": 0.36369998931884767, + "IoU.computer": 0.6225, + "IoU.swivel chair": 0.40950000762939454, + "IoU.boat": 0.6719000244140625, + "IoU.bar": 0.5379000091552735, + "IoU.arcade machine": 0.6631999969482422, + "IoU.hovel": 0.5470000076293945, + "IoU.bus": 0.8912000274658203, + "IoU.towel": 0.6086000061035156, + "IoU.light": 0.42080001831054686, + "IoU.truck": 0.29670000076293945, + "IoU.tower": 0.35720001220703124, + "IoU.chandelier": 0.6279000091552734, + "IoU.awning": 0.36270000457763674, + "IoU.streetlight": 0.18549999237060547, + "IoU.booth": 0.29700000762939455, + "IoU.television receiver": 0.6708999633789062, + "IoU.airplane": 0.5808000183105468, + "IoU.dirt track": 0.09949999809265136, + "IoU.apparel": 0.340099983215332, + "IoU.pole": 0.15640000343322755, + "IoU.land": 0.0640999984741211, + "IoU.bannister": 0.12779999732971192, + "IoU.escalator": 0.5415999984741211, + "IoU.ottoman": 0.45369998931884764, + "IoU.bottle": 0.193799991607666, + "IoU.buffet": 0.5056000137329102, + "IoU.poster": 0.18569999694824219, + "IoU.stage": 0.1452999973297119, + "IoU.van": 0.3718000030517578, + "IoU.ship": 0.6256000137329102, + "IoU.fountain": 0.21319999694824218, + "IoU.conveyer belt": 0.6783999633789063, + "IoU.canopy": 0.20700000762939452, + "IoU.washer": 0.6715000152587891, + "IoU.plaything": 0.3920000076293945, + "IoU.swimming pool": 0.6680999755859375, + "IoU.stool": 0.2928000068664551, + "IoU.barrel": 0.168799991607666, + "IoU.basket": 0.2564999961853027, + "IoU.waterfall": 0.660199966430664, + "IoU.tent": 0.9065000152587891, + "IoU.bag": 0.1281999969482422, + "IoU.minibike": 0.6644999694824218, + "IoU.cradle": 0.8237000274658203, + "IoU.oven": 0.23010000228881836, + "IoU.ball": 0.4765999984741211, + "IoU.food": 0.5783000183105469, + "IoU.step": 0.08130000114440918, + "IoU.tank": 0.48189998626708985, + "IoU.trade name": 0.24319999694824218, + "IoU.microwave": 0.5175999832153321, + "IoU.pot": 0.4538999938964844, + "IoU.animal": 0.611500015258789, + "IoU.bicycle": 0.5465999984741211, + "IoU.lake": 0.3356999969482422, + "IoU.dishwasher": 0.4766999816894531, + "IoU.screen": 0.5963999938964843, + "IoU.blanket": 0.13630000114440918, + "IoU.sculpture": 0.6231000137329101, + "IoU.hood": 0.45720001220703127, + "IoU.sconce": 0.27860000610351565, + "IoU.vase": 0.3170000076293945, + "IoU.traffic light": 0.2175, + "IoU.tray": 0.044499998092651365, + "IoU.ashcan": 0.3358000183105469, + "IoU.fan": 0.47509998321533203, + "IoU.pier": 0.25299999237060544, + "IoU.crt screen": 0.07639999866485596, + "IoU.plate": 0.4715999984741211, + "IoU.monitor": 0.15319999694824218, + "IoU.bulletin board": 0.43560001373291013, + "IoU.shower": 0.034000000953674316, + "IoU.radiator": 0.5816999816894531, + "IoU.glass": 0.10260000228881835, + "IoU.clock": 0.223799991607666, + "IoU.flag": 0.36720001220703125, + "Acc.wall": 0.8673999786376954, + "Acc.building": 0.9312000274658203, + "Acc.sky": 0.9662000274658203, + "Acc.floor": 0.9016000366210938, + "Acc.tree": 0.8794000244140625, + "Acc.ceiling": 0.8880000305175781, + "Acc.road": 0.8848000335693359, + "Acc.bed ": 0.9598999786376953, + "Acc.windowpane": 0.7813999938964844, + "Acc.grass": 0.8245999908447266, + "Acc.cabinet": 0.7362999725341797, + "Acc.sidewalk": 0.8362999725341796, + "Acc.person": 0.9248999786376954, + "Acc.earth": 0.5763999938964843, + "Acc.door": 0.6937000274658203, + "Acc.table": 0.7375, + "Acc.mountain": 0.7145999908447266, + "Acc.plant": 0.5977000045776367, + "Acc.curtain": 0.8808000183105469, + "Acc.chair": 0.6862999725341797, + "Acc.car": 0.9345999908447266, + "Acc.water": 0.7244999694824219, + "Acc.painting": 0.8897000122070312, + "Acc.sofa": 0.8163999938964843, + "Acc.shelf": 0.5740999984741211, + "Acc.house": 0.6691999816894532, + "Acc.sea": 0.7894000244140625, + "Acc.mirror": 0.8154000091552734, + "Acc.rug": 0.7798999786376953, + "Acc.field": 0.4906000137329102, + "Acc.armchair": 0.6444999694824218, + "Acc.seat": 0.8579000091552734, + "Acc.fence": 0.5970999908447265, + "Acc.desk": 0.7680999755859375, + "Acc.rock": 0.7451000213623047, + "Acc.wardrobe": 0.7619000244140625, + "Acc.lamp": 0.7269999694824218, + "Acc.bathtub": 0.8708999633789063, + "Acc.railing": 0.5177000045776368, + "Acc.cushion": 0.6911000061035156, + "Acc.base": 0.595999984741211, + "Acc.box": 0.3181999969482422, + "Acc.column": 0.6195000076293945, + "Acc.signboard": 0.47709999084472654, + "Acc.chest of drawers": 0.6041999816894531, + "Acc.counter": 0.47509998321533203, + "Acc.sand": 0.7091000366210938, + "Acc.sink": 0.7352999877929688, + "Acc.skyscraper": 0.6804000091552734, + "Acc.fireplace": 0.8962999725341797, + "Acc.refrigerator": 0.8066999816894531, + "Acc.grandstand": 0.7351999664306641, + "Acc.path": 0.322599983215332, + "Acc.stairs": 0.44799999237060545, + "Acc.runway": 0.9411000061035156, + "Acc.case": 0.6852999877929687, + "Acc.pool table": 0.9741999816894531, + "Acc.pillow": 0.6619000244140625, + "Acc.screen door": 0.7569000244140625, + "Acc.stairway": 0.4533000183105469, + "Acc.river": 0.2909000015258789, + "Acc.bridge": 0.7113999938964843, + "Acc.bookcase": 0.5238000106811523, + "Acc.blind": 0.40240001678466797, + "Acc.coffee table": 0.857699966430664, + "Acc.toilet": 0.9058999633789062, + "Acc.flower": 0.6029000091552734, + "Acc.book": 0.6697000122070312, + "Acc.hill": 0.256200008392334, + "Acc.bench": 0.6163999938964844, + "Acc.countertop": 0.7101000213623047, + "Acc.stove": 0.8223999786376953, + "Acc.palm": 0.7076000213623047, + "Acc.kitchen island": 0.6234999847412109, + "Acc.computer": 0.7755999755859375, + "Acc.swivel chair": 0.6061999893188477, + "Acc.boat": 0.8358999633789063, + "Acc.bar": 0.6695999908447265, + "Acc.arcade machine": 0.7687999725341796, + "Acc.hovel": 0.6129999923706054, + "Acc.bus": 0.9469999694824218, + "Acc.towel": 0.7508999633789063, + "Acc.light": 0.5218999862670899, + "Acc.truck": 0.43020000457763674, + "Acc.tower": 0.5347000122070312, + "Acc.chandelier": 0.8144999694824219, + "Acc.awning": 0.43990001678466795, + "Acc.streetlight": 0.2698999977111816, + "Acc.booth": 0.42580001831054687, + "Acc.television receiver": 0.846500015258789, + "Acc.airplane": 0.6843000030517579, + "Acc.dirt track": 0.4190999984741211, + "Acc.apparel": 0.4854000091552734, + "Acc.pole": 0.2084000015258789, + "Acc.land": 0.10270000457763671, + "Acc.bannister": 0.18829999923706053, + "Acc.escalator": 0.7712000274658203, + "Acc.ottoman": 0.6088999938964844, + "Acc.bottle": 0.2402000045776367, + "Acc.buffet": 0.7356999969482422, + "Acc.poster": 0.24469999313354493, + "Acc.stage": 0.2596999931335449, + "Acc.van": 0.45939998626708983, + "Acc.ship": 0.6343999862670898, + "Acc.fountain": 0.22690000534057617, + "Acc.conveyer belt": 0.8236000061035156, + "Acc.canopy": 0.30040000915527343, + "Acc.washer": 0.6793000030517579, + "Acc.plaything": 0.5820000076293945, + "Acc.swimming pool": 0.8269000244140625, + "Acc.stool": 0.3641999816894531, + "Acc.barrel": 0.6479000091552735, + "Acc.basket": 0.32860000610351564, + "Acc.waterfall": 0.8779000091552734, + "Acc.tent": 0.9894000244140625, + "Acc.bag": 0.138100004196167, + "Acc.minibike": 0.7887000274658204, + "Acc.cradle": 0.9683000183105469, + "Acc.oven": 0.5427999877929688, + "Acc.ball": 0.5402000045776367, + "Acc.food": 0.640199966430664, + "Acc.step": 0.10619999885559082, + "Acc.tank": 0.5722000122070312, + "Acc.trade name": 0.2581999969482422, + "Acc.microwave": 0.585, + "Acc.pot": 0.5361000061035156, + "Acc.animal": 0.7170999908447265, + "Acc.bicycle": 0.6973000335693359, + "Acc.lake": 0.47779998779296873, + "Acc.dishwasher": 0.6336999893188476, + "Acc.screen": 0.7151000213623047, + "Acc.blanket": 0.14869999885559082, + "Acc.sculpture": 0.8019999694824219, + "Acc.hood": 0.5661999893188476, + "Acc.sconce": 0.36, + "Acc.vase": 0.4583000183105469, + "Acc.traffic light": 0.35130001068115235, + "Acc.tray": 0.06309999942779541, + "Acc.ashcan": 0.5204000091552734, + "Acc.fan": 0.700999984741211, + "Acc.pier": 0.4677000045776367, + "Acc.crt screen": 0.196299991607666, + "Acc.plate": 0.7019000244140625, + "Acc.monitor": 0.2595000076293945, + "Acc.bulletin board": 0.7437999725341797, + "Acc.shower": 0.03859999895095825, + "Acc.radiator": 0.7058000183105468, + "Acc.glass": 0.11569999694824219, + "Acc.clock": 0.27510000228881837, + "Acc.flag": 0.40310001373291016 + } + }, + "72": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8317, + "mIoU": 0.4866, + "mAcc": 0.6165999999999999, + "IoU.wall": 0.7736000061035156, + "IoU.building": 0.8276000213623047, + "IoU.sky": 0.9395999908447266, + "IoU.floor": 0.829800033569336, + "IoU.tree": 0.7498000335693359, + "IoU.ceiling": 0.8383999633789062, + "IoU.road": 0.8312999725341796, + "IoU.bed ": 0.8795999908447265, + "IoU.windowpane": 0.6316999816894531, + "IoU.grass": 0.7008000183105468, + "IoU.cabinet": 0.6186999893188476, + "IoU.sidewalk": 0.6558000183105469, + "IoU.person": 0.8022000122070313, + "IoU.earth": 0.39610000610351564, + "IoU.door": 0.5075, + "IoU.table": 0.5895000076293946, + "IoU.mountain": 0.6120999908447265, + "IoU.plant": 0.5159999847412109, + "IoU.curtain": 0.7163999938964843, + "IoU.chair": 0.5529999923706055, + "IoU.car": 0.8308999633789063, + "IoU.water": 0.544000015258789, + "IoU.painting": 0.6833999633789063, + "IoU.sofa": 0.6912000274658203, + "IoU.shelf": 0.4433000183105469, + "IoU.house": 0.48869998931884767, + "IoU.sea": 0.6354999923706055, + "IoU.mirror": 0.6691000366210937, + "IoU.rug": 0.6726999664306641, + "IoU.field": 0.340099983215332, + "IoU.armchair": 0.4533000183105469, + "IoU.seat": 0.6666000366210938, + "IoU.fence": 0.44400001525878907, + "IoU.desk": 0.41869998931884767, + "IoU.rock": 0.513499984741211, + "IoU.wardrobe": 0.6002999877929688, + "IoU.lamp": 0.5622999954223633, + "IoU.bathtub": 0.783499984741211, + "IoU.railing": 0.3902000045776367, + "IoU.cushion": 0.5627999877929688, + "IoU.base": 0.29940000534057615, + "IoU.box": 0.23700000762939452, + "IoU.column": 0.48650001525878905, + "IoU.signboard": 0.3675, + "IoU.chest of drawers": 0.3606000137329102, + "IoU.counter": 0.4147999954223633, + "IoU.sand": 0.5684000015258789, + "IoU.sink": 0.6719000244140625, + "IoU.skyscraper": 0.5315999984741211, + "IoU.fireplace": 0.7034999847412109, + "IoU.refrigerator": 0.7493000030517578, + "IoU.grandstand": 0.46290000915527346, + "IoU.path": 0.23860000610351562, + "IoU.stairs": 0.30709999084472656, + "IoU.runway": 0.7173000335693359, + "IoU.case": 0.6211999893188477, + "IoU.pool table": 0.9152999877929687, + "IoU.pillow": 0.5815999984741211, + "IoU.screen door": 0.4729000091552734, + "IoU.stairway": 0.36470001220703124, + "IoU.river": 0.17659999847412108, + "IoU.bridge": 0.6138000106811523, + "IoU.bookcase": 0.32459999084472657, + "IoU.blind": 0.4152000045776367, + "IoU.coffee table": 0.6184000015258789, + "IoU.toilet": 0.8066000366210937, + "IoU.flower": 0.37090000152587893, + "IoU.book": 0.4366999816894531, + "IoU.hill": 0.11869999885559082, + "IoU.bench": 0.5079999923706054, + "IoU.countertop": 0.507400016784668, + "IoU.stove": 0.680199966430664, + "IoU.palm": 0.4740999984741211, + "IoU.kitchen island": 0.35700000762939454, + "IoU.computer": 0.6570999908447266, + "IoU.swivel chair": 0.48220001220703124, + "IoU.boat": 0.7080000305175781, + "IoU.bar": 0.5813000106811523, + "IoU.arcade machine": 0.5141999816894531, + "IoU.hovel": 0.547599983215332, + "IoU.bus": 0.9001000213623047, + "IoU.towel": 0.638400001525879, + "IoU.light": 0.4077000045776367, + "IoU.truck": 0.3032999992370605, + "IoU.tower": 0.2738999938964844, + "IoU.chandelier": 0.6345999908447265, + "IoU.awning": 0.27799999237060546, + "IoU.streetlight": 0.20469999313354492, + "IoU.booth": 0.33060001373291015, + "IoU.television receiver": 0.6515000152587891, + "IoU.airplane": 0.6787999725341797, + "IoU.dirt track": 0.0509000015258789, + "IoU.apparel": 0.2745999908447266, + "IoU.pole": 0.2336000061035156, + "IoU.land": 0.07579999923706054, + "IoU.bannister": 0.1409000015258789, + "IoU.escalator": 0.5356999969482422, + "IoU.ottoman": 0.4841999816894531, + "IoU.bottle": 0.1946999931335449, + "IoU.buffet": 0.5163000106811524, + "IoU.poster": 0.20629999160766602, + "IoU.stage": 0.1409000015258789, + "IoU.van": 0.3383000183105469, + "IoU.ship": 0.7880999755859375, + "IoU.fountain": 0.19809999465942382, + "IoU.conveyer belt": 0.6769000244140625, + "IoU.canopy": 0.17319999694824217, + "IoU.washer": 0.7094999694824219, + "IoU.plaything": 0.286200008392334, + "IoU.swimming pool": 0.669000015258789, + "IoU.stool": 0.34509998321533203, + "IoU.barrel": 0.41630001068115235, + "IoU.basket": 0.22700000762939454, + "IoU.waterfall": 0.6288999938964843, + "IoU.tent": 0.9512000274658203, + "IoU.bag": 0.18040000915527343, + "IoU.minibike": 0.6448000335693359, + "IoU.cradle": 0.8159999847412109, + "IoU.oven": 0.233799991607666, + "IoU.ball": 0.5777000045776367, + "IoU.food": 0.5533000183105469, + "IoU.step": 0.07639999866485596, + "IoU.tank": 0.5358000183105469, + "IoU.trade name": 0.24319999694824218, + "IoU.microwave": 0.41560001373291017, + "IoU.pot": 0.3784000015258789, + "IoU.animal": 0.685199966430664, + "IoU.bicycle": 0.5488000106811524, + "IoU.lake": 0.5438000106811524, + "IoU.dishwasher": 0.5288000106811523, + "IoU.screen": 0.4906000137329102, + "IoU.blanket": 0.12170000076293945, + "IoU.sculpture": 0.6202999877929688, + "IoU.hood": 0.4702999877929688, + "IoU.sconce": 0.3290000152587891, + "IoU.vase": 0.3103000068664551, + "IoU.traffic light": 0.24100000381469727, + "IoU.tray": 0.018899999856948853, + "IoU.ashcan": 0.3383000183105469, + "IoU.fan": 0.4972999954223633, + "IoU.pier": 0.30270000457763674, + "IoU.crt screen": 0.014600000381469726, + "IoU.plate": 0.45599998474121095, + "IoU.monitor": 0.10470000267028809, + "IoU.bulletin board": 0.47630001068115235, + "IoU.shower": 0.004199999868869781, + "IoU.radiator": 0.5577000045776367, + "IoU.glass": 0.11560000419616699, + "IoU.clock": 0.27829999923706056, + "IoU.flag": 0.3783000183105469, + "Acc.wall": 0.87, + "Acc.building": 0.9245999908447265, + "Acc.sky": 0.9631999969482422, + "Acc.floor": 0.9018000030517578, + "Acc.tree": 0.8827999877929688, + "Acc.ceiling": 0.8969000244140625, + "Acc.road": 0.8918000030517578, + "Acc.bed ": 0.9638999938964844, + "Acc.windowpane": 0.7865000152587891, + "Acc.grass": 0.7956999969482422, + "Acc.cabinet": 0.737699966430664, + "Acc.sidewalk": 0.8201000213623046, + "Acc.person": 0.9316999816894531, + "Acc.earth": 0.5736000061035156, + "Acc.door": 0.6966999816894531, + "Acc.table": 0.7362999725341797, + "Acc.mountain": 0.7372000122070312, + "Acc.plant": 0.6104000091552735, + "Acc.curtain": 0.8730000305175781, + "Acc.chair": 0.6970999908447265, + "Acc.car": 0.9363999938964844, + "Acc.water": 0.7069000244140625, + "Acc.painting": 0.8701000213623047, + "Acc.sofa": 0.8436000061035156, + "Acc.shelf": 0.639900016784668, + "Acc.house": 0.6783999633789063, + "Acc.sea": 0.8268000030517578, + "Acc.mirror": 0.7981999969482422, + "Acc.rug": 0.7156999969482422, + "Acc.field": 0.5433000183105469, + "Acc.armchair": 0.6816000366210937, + "Acc.seat": 0.8665000152587891, + "Acc.fence": 0.6619999694824219, + "Acc.desk": 0.7197000122070313, + "Acc.rock": 0.7769999694824219, + "Acc.wardrobe": 0.7548999786376953, + "Acc.lamp": 0.7162000274658203, + "Acc.bathtub": 0.8429000091552734, + "Acc.railing": 0.5106999969482422, + "Acc.cushion": 0.6620999908447266, + "Acc.base": 0.46, + "Acc.box": 0.2826000022888184, + "Acc.column": 0.6168999862670899, + "Acc.signboard": 0.4875, + "Acc.chest of drawers": 0.6122999954223632, + "Acc.counter": 0.5297000122070312, + "Acc.sand": 0.7530000305175781, + "Acc.sink": 0.7391000366210938, + "Acc.skyscraper": 0.6656999969482422, + "Acc.fireplace": 0.9077999877929688, + "Acc.refrigerator": 0.8766999816894532, + "Acc.grandstand": 0.721500015258789, + "Acc.path": 0.3270999908447266, + "Acc.stairs": 0.44290000915527344, + "Acc.runway": 0.9488999938964844, + "Acc.case": 0.8201000213623046, + "Acc.pool table": 0.9779000091552734, + "Acc.pillow": 0.7161000061035157, + "Acc.screen door": 0.5779000091552734, + "Acc.stairway": 0.43689998626708987, + "Acc.river": 0.33169998168945314, + "Acc.bridge": 0.7816000366210938, + "Acc.bookcase": 0.5890999984741211, + "Acc.blind": 0.462599983215332, + "Acc.coffee table": 0.8336000061035156, + "Acc.toilet": 0.9001000213623047, + "Acc.flower": 0.5545999908447266, + "Acc.book": 0.6163000106811524, + "Acc.hill": 0.22530000686645507, + "Acc.bench": 0.6031000137329101, + "Acc.countertop": 0.6776000213623047, + "Acc.stove": 0.8272000122070312, + "Acc.palm": 0.6973999786376953, + "Acc.kitchen island": 0.6747000122070312, + "Acc.computer": 0.7861000061035156, + "Acc.swivel chair": 0.6930999755859375, + "Acc.boat": 0.8331999969482422, + "Acc.bar": 0.7669999694824219, + "Acc.arcade machine": 0.565, + "Acc.hovel": 0.6309999847412109, + "Acc.bus": 0.9541999816894531, + "Acc.towel": 0.7633999633789063, + "Acc.light": 0.4772999954223633, + "Acc.truck": 0.47720001220703123, + "Acc.tower": 0.4125, + "Acc.chandelier": 0.7945999908447265, + "Acc.awning": 0.33119998931884764, + "Acc.streetlight": 0.26489999771118167, + "Acc.booth": 0.4734999847412109, + "Acc.television receiver": 0.8616000366210937, + "Acc.airplane": 0.7530000305175781, + "Acc.dirt track": 0.22, + "Acc.apparel": 0.4086000061035156, + "Acc.pole": 0.35369998931884766, + "Acc.land": 0.15279999732971192, + "Acc.bannister": 0.22170000076293944, + "Acc.escalator": 0.8125, + "Acc.ottoman": 0.6409999847412109, + "Acc.bottle": 0.25, + "Acc.buffet": 0.6956999969482421, + "Acc.poster": 0.26930000305175783, + "Acc.stage": 0.2705999946594238, + "Acc.van": 0.42630001068115236, + "Acc.ship": 0.8687999725341797, + "Acc.fountain": 0.22459999084472657, + "Acc.conveyer belt": 0.9208999633789062, + "Acc.canopy": 0.22489999771118163, + "Acc.washer": 0.7131999969482422, + "Acc.plaything": 0.35580001831054686, + "Acc.swimming pool": 0.8638999938964844, + "Acc.stool": 0.4140999984741211, + "Acc.barrel": 0.6498999786376953, + "Acc.basket": 0.32360000610351564, + "Acc.waterfall": 0.8293000030517578, + "Acc.tent": 0.9843000030517578, + "Acc.bag": 0.21639999389648437, + "Acc.minibike": 0.8019000244140625, + "Acc.cradle": 0.9775, + "Acc.oven": 0.5972999954223632, + "Acc.ball": 0.6343999862670898, + "Acc.food": 0.6304000091552734, + "Acc.step": 0.09949999809265136, + "Acc.tank": 0.6325, + "Acc.trade name": 0.2645000076293945, + "Acc.microwave": 0.45669998168945314, + "Acc.pot": 0.4493999862670898, + "Acc.animal": 0.7406999969482422, + "Acc.bicycle": 0.7116999816894531, + "Acc.lake": 0.6437000274658203, + "Acc.dishwasher": 0.6254999923706055, + "Acc.screen": 0.7344999694824219, + "Acc.blanket": 0.12949999809265136, + "Acc.sculpture": 0.7708000183105469, + "Acc.hood": 0.6113000106811524, + "Acc.sconce": 0.44470001220703126, + "Acc.vase": 0.455, + "Acc.traffic light": 0.3413999938964844, + "Acc.tray": 0.03009999990463257, + "Acc.ashcan": 0.49450000762939456, + "Acc.fan": 0.6863999938964844, + "Acc.pier": 0.46439998626708984, + "Acc.crt screen": 0.040500001907348634, + "Acc.plate": 0.6045000076293945, + "Acc.monitor": 0.18639999389648437, + "Acc.bulletin board": 0.6733000183105469, + "Acc.shower": 0.03460000038146973, + "Acc.radiator": 0.7730000305175782, + "Acc.glass": 0.12729999542236328, + "Acc.clock": 0.3443000030517578, + "Acc.flag": 0.4104000091552734 + } + }, + "73": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8337, + "mIoU": 0.48350000000000004, + "mAcc": 0.5814, + "IoU.wall": 0.7716999816894531, + "IoU.building": 0.8298999786376953, + "IoU.sky": 0.9390000152587891, + "IoU.floor": 0.8252999877929688, + "IoU.tree": 0.7425, + "IoU.ceiling": 0.8366999816894531, + "IoU.road": 0.8337999725341797, + "IoU.bed ": 0.8836000061035156, + "IoU.windowpane": 0.6243000030517578, + "IoU.grass": 0.7087000274658203, + "IoU.cabinet": 0.6218999862670899, + "IoU.sidewalk": 0.6587000274658203, + "IoU.person": 0.8058999633789062, + "IoU.earth": 0.40950000762939454, + "IoU.door": 0.49959999084472656, + "IoU.table": 0.6058000183105469, + "IoU.mountain": 0.5981000137329101, + "IoU.plant": 0.5045000076293945, + "IoU.curtain": 0.7304000091552735, + "IoU.chair": 0.5599000167846679, + "IoU.car": 0.8390000152587891, + "IoU.water": 0.5609999847412109, + "IoU.painting": 0.6855999755859375, + "IoU.sofa": 0.6873999786376953, + "IoU.shelf": 0.4308000183105469, + "IoU.house": 0.502400016784668, + "IoU.sea": 0.6458999633789062, + "IoU.mirror": 0.6729000091552735, + "IoU.rug": 0.6481999969482422, + "IoU.field": 0.32740001678466796, + "IoU.armchair": 0.46869998931884765, + "IoU.seat": 0.6716999816894531, + "IoU.fence": 0.45299999237060545, + "IoU.desk": 0.46849998474121096, + "IoU.rock": 0.5372000122070313, + "IoU.wardrobe": 0.5665999984741211, + "IoU.lamp": 0.558499984741211, + "IoU.bathtub": 0.7608999633789062, + "IoU.railing": 0.39299999237060546, + "IoU.cushion": 0.564900016784668, + "IoU.base": 0.3215999984741211, + "IoU.box": 0.24139999389648437, + "IoU.column": 0.48770000457763674, + "IoU.signboard": 0.34630001068115235, + "IoU.chest of drawers": 0.3593000030517578, + "IoU.counter": 0.3845000076293945, + "IoU.sand": 0.48720001220703124, + "IoU.sink": 0.6608999633789062, + "IoU.skyscraper": 0.5368999862670898, + "IoU.fireplace": 0.7023999786376953, + "IoU.refrigerator": 0.7508000183105469, + "IoU.grandstand": 0.49009998321533205, + "IoU.path": 0.21700000762939453, + "IoU.stairs": 0.3078000068664551, + "IoU.runway": 0.6695999908447265, + "IoU.case": 0.6473999786376953, + "IoU.pool table": 0.9287999725341797, + "IoU.pillow": 0.5522999954223633, + "IoU.screen door": 0.6533000183105468, + "IoU.stairway": 0.387599983215332, + "IoU.river": 0.21350000381469728, + "IoU.bridge": 0.6791000366210938, + "IoU.bookcase": 0.34130001068115234, + "IoU.blind": 0.37270000457763675, + "IoU.coffee table": 0.6397999954223633, + "IoU.toilet": 0.774000015258789, + "IoU.flower": 0.36950000762939456, + "IoU.book": 0.4463999938964844, + "IoU.hill": 0.1175, + "IoU.bench": 0.4997999954223633, + "IoU.countertop": 0.4754000091552734, + "IoU.stove": 0.7080999755859375, + "IoU.palm": 0.46060001373291015, + "IoU.kitchen island": 0.42330001831054687, + "IoU.computer": 0.6544000244140625, + "IoU.swivel chair": 0.4990999984741211, + "IoU.boat": 0.7113999938964843, + "IoU.bar": 0.6013000106811524, + "IoU.arcade machine": 0.32939998626708983, + "IoU.hovel": 0.48380001068115236, + "IoU.bus": 0.9136000061035157, + "IoU.towel": 0.625999984741211, + "IoU.light": 0.27549999237060546, + "IoU.truck": 0.33599998474121096, + "IoU.tower": 0.3128000068664551, + "IoU.chandelier": 0.6286000061035156, + "IoU.awning": 0.19600000381469726, + "IoU.streetlight": 0.14890000343322754, + "IoU.booth": 0.3347999954223633, + "IoU.television receiver": 0.6711000061035156, + "IoU.airplane": 0.6224000167846679, + "IoU.dirt track": 0.02559999942779541, + "IoU.apparel": 0.2815999984741211, + "IoU.pole": 0.21399999618530274, + "IoU.land": 0.01759999990463257, + "IoU.bannister": 0.09930000305175782, + "IoU.escalator": 0.5911999893188477, + "IoU.ottoman": 0.4559000015258789, + "IoU.bottle": 0.24870000839233397, + "IoU.buffet": 0.3736000061035156, + "IoU.poster": 0.20600000381469727, + "IoU.stage": 0.11420000076293946, + "IoU.van": 0.3085000038146973, + "IoU.ship": 0.677300033569336, + "IoU.fountain": 0.20479999542236327, + "IoU.conveyer belt": 0.8063999938964844, + "IoU.canopy": 0.23709999084472655, + "IoU.washer": 0.7322000122070312, + "IoU.plaything": 0.28739999771118163, + "IoU.swimming pool": 0.6748999786376954, + "IoU.stool": 0.3160000038146973, + "IoU.barrel": 0.5790000152587891, + "IoU.basket": 0.22170000076293944, + "IoU.waterfall": 0.6477999877929688, + "IoU.tent": 0.9527999877929687, + "IoU.bag": 0.11890000343322754, + "IoU.minibike": 0.6591000366210937, + "IoU.cradle": 0.811500015258789, + "IoU.oven": 0.21649999618530275, + "IoU.ball": 0.35310001373291017, + "IoU.food": 0.537599983215332, + "IoU.step": 0.0884000015258789, + "IoU.tank": 0.5084000015258789, + "IoU.trade name": 0.15649999618530275, + "IoU.microwave": 0.36509998321533205, + "IoU.pot": 0.39540000915527346, + "IoU.animal": 0.6468000030517578, + "IoU.bicycle": 0.532400016784668, + "IoU.lake": 0.5881000137329102, + "IoU.dishwasher": 0.5611999893188476, + "IoU.screen": 0.567400016784668, + "IoU.blanket": 0.11869999885559082, + "IoU.sculpture": 0.5877000045776367, + "IoU.hood": 0.43720001220703125, + "IoU.sconce": 0.2427000045776367, + "IoU.vase": 0.3127000045776367, + "IoU.traffic light": 0.14300000190734863, + "IoU.tray": 0.002199999988079071, + "IoU.ashcan": 0.38610000610351564, + "IoU.fan": 0.45810001373291015, + "IoU.pier": 0.32310001373291014, + "IoU.crt screen": 0.04179999828338623, + "IoU.plate": 0.4793000030517578, + "IoU.monitor": 0.41150001525878904, + "IoU.bulletin board": 0.48529998779296873, + "IoU.shower": 0.0, + "IoU.radiator": 0.6054999923706055, + "IoU.glass": 0.0825, + "IoU.clock": 0.23200000762939454, + "IoU.flag": 0.3615999984741211, + "Acc.wall": 0.8958000183105469, + "Acc.building": 0.9369000244140625, + "Acc.sky": 0.9775, + "Acc.floor": 0.9211000061035156, + "Acc.tree": 0.8752999877929688, + "Acc.ceiling": 0.9019999694824219, + "Acc.road": 0.9012000274658203, + "Acc.bed ": 0.9580999755859375, + "Acc.windowpane": 0.7643000030517578, + "Acc.grass": 0.8008999633789062, + "Acc.cabinet": 0.7648000335693359, + "Acc.sidewalk": 0.810199966430664, + "Acc.person": 0.9038999938964843, + "Acc.earth": 0.658499984741211, + "Acc.door": 0.6819000244140625, + "Acc.table": 0.7559999847412109, + "Acc.mountain": 0.7075, + "Acc.plant": 0.592599983215332, + "Acc.curtain": 0.8379000091552734, + "Acc.chair": 0.6783999633789063, + "Acc.car": 0.9116000366210938, + "Acc.water": 0.7223999786376953, + "Acc.painting": 0.8404000091552735, + "Acc.sofa": 0.8311000061035156, + "Acc.shelf": 0.600099983215332, + "Acc.house": 0.6411000061035156, + "Acc.sea": 0.7948999786376953, + "Acc.mirror": 0.7455000305175781, + "Acc.rug": 0.6793000030517579, + "Acc.field": 0.4916999816894531, + "Acc.armchair": 0.6937999725341797, + "Acc.seat": 0.8280999755859375, + "Acc.fence": 0.6256999969482422, + "Acc.desk": 0.6863999938964844, + "Acc.rock": 0.7262999725341797, + "Acc.wardrobe": 0.7069999694824218, + "Acc.lamp": 0.6562000274658203, + "Acc.bathtub": 0.8070999908447266, + "Acc.railing": 0.5354999923706054, + "Acc.cushion": 0.6395999908447265, + "Acc.base": 0.5190999984741211, + "Acc.box": 0.3135000038146973, + "Acc.column": 0.5979000091552734, + "Acc.signboard": 0.4336000061035156, + "Acc.chest of drawers": 0.6018999862670898, + "Acc.counter": 0.455, + "Acc.sand": 0.6084999847412109, + "Acc.sink": 0.6919000244140625, + "Acc.skyscraper": 0.6375999832153321, + "Acc.fireplace": 0.8748999786376953, + "Acc.refrigerator": 0.7945999908447265, + "Acc.grandstand": 0.7445999908447266, + "Acc.path": 0.2834000015258789, + "Acc.stairs": 0.42029998779296873, + "Acc.runway": 0.8529000091552734, + "Acc.case": 0.8072000122070313, + "Acc.pool table": 0.9569000244140625, + "Acc.pillow": 0.6405000305175781, + "Acc.screen door": 0.7384999847412109, + "Acc.stairway": 0.46919998168945315, + "Acc.river": 0.43709999084472656, + "Acc.bridge": 0.7698999786376953, + "Acc.bookcase": 0.6041999816894531, + "Acc.blind": 0.4075, + "Acc.coffee table": 0.7954000091552734, + "Acc.toilet": 0.8758999633789063, + "Acc.flower": 0.48450000762939455, + "Acc.book": 0.5829999923706055, + "Acc.hill": 0.2006999969482422, + "Acc.bench": 0.5813999938964843, + "Acc.countertop": 0.6356000137329102, + "Acc.stove": 0.797300033569336, + "Acc.palm": 0.5822000122070312, + "Acc.kitchen island": 0.5879000091552734, + "Acc.computer": 0.7555000305175781, + "Acc.swivel chair": 0.6304000091552734, + "Acc.boat": 0.8348999786376953, + "Acc.bar": 0.7665000152587891, + "Acc.arcade machine": 0.3554999923706055, + "Acc.hovel": 0.5309999847412109, + "Acc.bus": 0.9441000366210938, + "Acc.towel": 0.6966999816894531, + "Acc.light": 0.29200000762939454, + "Acc.truck": 0.47060001373291016, + "Acc.tower": 0.42020000457763673, + "Acc.chandelier": 0.7429000091552734, + "Acc.awning": 0.20459999084472658, + "Acc.streetlight": 0.16940000534057617, + "Acc.booth": 0.41880001068115236, + "Acc.television receiver": 0.8088999938964844, + "Acc.airplane": 0.6605000305175781, + "Acc.dirt track": 0.10649999618530273, + "Acc.apparel": 0.37740001678466795, + "Acc.pole": 0.29469999313354495, + "Acc.land": 0.023900001049041747, + "Acc.bannister": 0.13100000381469726, + "Acc.escalator": 0.7908000183105469, + "Acc.ottoman": 0.5936000061035156, + "Acc.bottle": 0.3327000045776367, + "Acc.buffet": 0.44869998931884764, + "Acc.poster": 0.2756999969482422, + "Acc.stage": 0.17239999771118164, + "Acc.van": 0.3743000030517578, + "Acc.ship": 0.7030999755859375, + "Acc.fountain": 0.21260000228881837, + "Acc.conveyer belt": 0.9098000335693359, + "Acc.canopy": 0.31670000076293947, + "Acc.washer": 0.7405000305175782, + "Acc.plaything": 0.35639999389648436, + "Acc.swimming pool": 0.7169000244140625, + "Acc.stool": 0.35069999694824217, + "Acc.barrel": 0.6444000244140625, + "Acc.basket": 0.29209999084472654, + "Acc.waterfall": 0.6980000305175781, + "Acc.tent": 0.9812999725341797, + "Acc.bag": 0.12649999618530272, + "Acc.minibike": 0.7404000091552735, + "Acc.cradle": 0.9619000244140625, + "Acc.oven": 0.6158000183105469, + "Acc.ball": 0.3731999969482422, + "Acc.food": 0.604900016784668, + "Acc.step": 0.10170000076293945, + "Acc.tank": 0.572400016784668, + "Acc.trade name": 0.16329999923706054, + "Acc.microwave": 0.38560001373291014, + "Acc.pot": 0.44529998779296875, + "Acc.animal": 0.6763999938964844, + "Acc.bicycle": 0.6491000366210937, + "Acc.lake": 0.6111999893188477, + "Acc.dishwasher": 0.6263000106811524, + "Acc.screen": 0.6808999633789062, + "Acc.blanket": 0.12729999542236328, + "Acc.sculpture": 0.7222000122070312, + "Acc.hood": 0.4481999969482422, + "Acc.sconce": 0.29270000457763673, + "Acc.vase": 0.387599983215332, + "Acc.traffic light": 0.17360000610351561, + "Acc.tray": 0.0025999999046325685, + "Acc.ashcan": 0.5361999893188476, + "Acc.fan": 0.5656000137329101, + "Acc.pier": 0.43270000457763674, + "Acc.crt screen": 0.06539999961853027, + "Acc.plate": 0.5995999908447266, + "Acc.monitor": 0.6438999938964843, + "Acc.bulletin board": 0.6245000076293945, + "Acc.shower": 0.0, + "Acc.radiator": 0.6745999908447265, + "Acc.glass": 0.0859000015258789, + "Acc.clock": 0.2943000030517578, + "Acc.flag": 0.3791999816894531 + } + }, + "74": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8320000000000001, + "mIoU": 0.4718, + "mAcc": 0.5987, + "IoU.wall": 0.7762999725341797, + "IoU.building": 0.8263999938964843, + "IoU.sky": 0.9405000305175781, + "IoU.floor": 0.8220999908447265, + "IoU.tree": 0.7544000244140625, + "IoU.ceiling": 0.8397000122070313, + "IoU.road": 0.8268000030517578, + "IoU.bed ": 0.8941999816894531, + "IoU.windowpane": 0.6288000106811523, + "IoU.grass": 0.722699966430664, + "IoU.cabinet": 0.6302000045776367, + "IoU.sidewalk": 0.6411000061035156, + "IoU.person": 0.8077999877929688, + "IoU.earth": 0.3965999984741211, + "IoU.door": 0.5138999938964843, + "IoU.table": 0.5897000122070313, + "IoU.mountain": 0.6056999969482422, + "IoU.plant": 0.5286000061035157, + "IoU.curtain": 0.7287000274658203, + "IoU.chair": 0.5595000076293946, + "IoU.car": 0.8318000030517578, + "IoU.water": 0.5697999954223633, + "IoU.painting": 0.7, + "IoU.sofa": 0.675, + "IoU.shelf": 0.4122999954223633, + "IoU.house": 0.46700000762939453, + "IoU.sea": 0.6727999877929688, + "IoU.mirror": 0.6680999755859375, + "IoU.rug": 0.6577999877929688, + "IoU.field": 0.32279998779296876, + "IoU.armchair": 0.42450000762939455, + "IoU.seat": 0.6529000091552735, + "IoU.fence": 0.4316999816894531, + "IoU.desk": 0.4540999984741211, + "IoU.rock": 0.49939998626708987, + "IoU.wardrobe": 0.552400016784668, + "IoU.lamp": 0.5513000106811523, + "IoU.bathtub": 0.8436000061035156, + "IoU.railing": 0.35900001525878905, + "IoU.cushion": 0.5791999816894531, + "IoU.base": 0.3277000045776367, + "IoU.box": 0.2427000045776367, + "IoU.column": 0.485, + "IoU.signboard": 0.37779998779296875, + "IoU.chest of drawers": 0.38150001525878907, + "IoU.counter": 0.36200000762939455, + "IoU.sand": 0.44919998168945313, + "IoU.sink": 0.6437999725341796, + "IoU.skyscraper": 0.45599998474121095, + "IoU.fireplace": 0.6730999755859375, + "IoU.refrigerator": 0.7319000244140625, + "IoU.grandstand": 0.49189998626708986, + "IoU.path": 0.1697999954223633, + "IoU.stairs": 0.29639999389648436, + "IoU.runway": 0.7187000274658203, + "IoU.case": 0.49259998321533205, + "IoU.pool table": 0.9202999877929687, + "IoU.pillow": 0.5556000137329101, + "IoU.screen door": 0.5797999954223633, + "IoU.stairway": 0.32459999084472657, + "IoU.river": 0.17600000381469727, + "IoU.bridge": 0.5865999984741211, + "IoU.bookcase": 0.31260000228881835, + "IoU.blind": 0.37200000762939456, + "IoU.coffee table": 0.5934999847412109, + "IoU.toilet": 0.7477999877929687, + "IoU.flower": 0.39290000915527346, + "IoU.book": 0.450099983215332, + "IoU.hill": 0.14850000381469727, + "IoU.bench": 0.46680000305175784, + "IoU.countertop": 0.5666999816894531, + "IoU.stove": 0.7025, + "IoU.palm": 0.5125999832153321, + "IoU.kitchen island": 0.4840999984741211, + "IoU.computer": 0.638499984741211, + "IoU.swivel chair": 0.47209999084472654, + "IoU.boat": 0.5972999954223632, + "IoU.bar": 0.49970001220703125, + "IoU.arcade machine": 0.7233000183105469, + "IoU.hovel": 0.5922999954223633, + "IoU.bus": 0.8770999908447266, + "IoU.towel": 0.6265000152587891, + "IoU.light": 0.42439998626708986, + "IoU.truck": 0.2975, + "IoU.tower": 0.27760000228881837, + "IoU.chandelier": 0.6040000152587891, + "IoU.awning": 0.28420000076293944, + "IoU.streetlight": 0.17399999618530274, + "IoU.booth": 0.34330001831054685, + "IoU.television receiver": 0.6958000183105468, + "IoU.airplane": 0.6081999969482422, + "IoU.dirt track": 0.08310000419616699, + "IoU.apparel": 0.30450000762939455, + "IoU.pole": 0.12899999618530272, + "IoU.land": 0.01899999976158142, + "IoU.bannister": 0.12869999885559083, + "IoU.escalator": 0.5133000183105468, + "IoU.ottoman": 0.48270000457763673, + "IoU.bottle": 0.2425, + "IoU.buffet": 0.469900016784668, + "IoU.poster": 0.21049999237060546, + "IoU.stage": 0.14319999694824218, + "IoU.van": 0.342599983215332, + "IoU.ship": 0.07639999866485596, + "IoU.fountain": 0.21770000457763672, + "IoU.conveyer belt": 0.724800033569336, + "IoU.canopy": 0.20239999771118164, + "IoU.washer": 0.5638999938964844, + "IoU.plaything": 0.414900016784668, + "IoU.swimming pool": 0.6143999862670898, + "IoU.stool": 0.31209999084472656, + "IoU.barrel": 0.2618000030517578, + "IoU.basket": 0.2528000068664551, + "IoU.waterfall": 0.5761999893188476, + "IoU.tent": 0.9261000061035156, + "IoU.bag": 0.14890000343322754, + "IoU.minibike": 0.6433999633789063, + "IoU.cradle": 0.7963999938964844, + "IoU.oven": 0.28780000686645507, + "IoU.ball": 0.46669998168945315, + "IoU.food": 0.5931999969482422, + "IoU.step": 0.07380000114440918, + "IoU.tank": 0.567400016784668, + "IoU.trade name": 0.26979999542236327, + "IoU.microwave": 0.4508000183105469, + "IoU.pot": 0.40119998931884765, + "IoU.animal": 0.5943999862670899, + "IoU.bicycle": 0.527400016784668, + "IoU.lake": 0.027200000286102297, + "IoU.dishwasher": 0.44709999084472657, + "IoU.screen": 0.53, + "IoU.blanket": 0.12210000038146973, + "IoU.sculpture": 0.5761999893188476, + "IoU.hood": 0.5077000045776368, + "IoU.sconce": 0.32419998168945313, + "IoU.vase": 0.30440000534057615, + "IoU.traffic light": 0.23260000228881836, + "IoU.tray": 0.030399999618530273, + "IoU.ashcan": 0.3218999862670898, + "IoU.fan": 0.5047000122070312, + "IoU.pier": 0.2911000061035156, + "IoU.crt screen": 0.023399999141693117, + "IoU.plate": 0.47819999694824217, + "IoU.monitor": 0.168799991607666, + "IoU.bulletin board": 0.40599998474121096, + "IoU.shower": 0.011699999570846558, + "IoU.radiator": 0.5581000137329102, + "IoU.glass": 0.09279999732971192, + "IoU.clock": 0.19850000381469726, + "IoU.flag": 0.283799991607666, + "Acc.wall": 0.8730999755859375, + "Acc.building": 0.9308000183105469, + "Acc.sky": 0.9690000152587891, + "Acc.floor": 0.9018000030517578, + "Acc.tree": 0.8772000122070313, + "Acc.ceiling": 0.9027999877929688, + "Acc.road": 0.8822000122070313, + "Acc.bed ": 0.9651000213623047, + "Acc.windowpane": 0.7848000335693359, + "Acc.grass": 0.8511000061035157, + "Acc.cabinet": 0.7587000274658203, + "Acc.sidewalk": 0.8456999969482422, + "Acc.person": 0.9219999694824219, + "Acc.earth": 0.5940000152587891, + "Acc.door": 0.6919999694824219, + "Acc.table": 0.7327999877929687, + "Acc.mountain": 0.7451999664306641, + "Acc.plant": 0.6222000122070312, + "Acc.curtain": 0.8745999908447266, + "Acc.chair": 0.7222000122070312, + "Acc.car": 0.9322000122070313, + "Acc.water": 0.745999984741211, + "Acc.painting": 0.8743000030517578, + "Acc.sofa": 0.8141000366210938, + "Acc.shelf": 0.5502000045776367, + "Acc.house": 0.6354999923706055, + "Acc.sea": 0.870999984741211, + "Acc.mirror": 0.7995999908447265, + "Acc.rug": 0.7579000091552734, + "Acc.field": 0.4408000183105469, + "Acc.armchair": 0.6676000213623047, + "Acc.seat": 0.8479000091552734, + "Acc.fence": 0.61, + "Acc.desk": 0.7633999633789063, + "Acc.rock": 0.6958999633789062, + "Acc.wardrobe": 0.7301999664306641, + "Acc.lamp": 0.7244999694824219, + "Acc.bathtub": 0.9013999938964844, + "Acc.railing": 0.4620999908447266, + "Acc.cushion": 0.7079000091552734, + "Acc.base": 0.6295999908447265, + "Acc.box": 0.31, + "Acc.column": 0.605999984741211, + "Acc.signboard": 0.48970001220703124, + "Acc.chest of drawers": 0.5825, + "Acc.counter": 0.46, + "Acc.sand": 0.6370000076293946, + "Acc.sink": 0.7173999786376953, + "Acc.skyscraper": 0.5834000015258789, + "Acc.fireplace": 0.8902999877929687, + "Acc.refrigerator": 0.8569999694824219, + "Acc.grandstand": 0.7330000305175781, + "Acc.path": 0.20760000228881836, + "Acc.stairs": 0.4102999877929687, + "Acc.runway": 0.9226000213623047, + "Acc.case": 0.5652000045776367, + "Acc.pool table": 0.9775, + "Acc.pillow": 0.6551000213623047, + "Acc.screen door": 0.7130999755859375, + "Acc.stairway": 0.4077999877929688, + "Acc.river": 0.2684000015258789, + "Acc.bridge": 0.7394000244140625, + "Acc.bookcase": 0.5025999832153321, + "Acc.blind": 0.402599983215332, + "Acc.coffee table": 0.8673999786376954, + "Acc.toilet": 0.9170999908447266, + "Acc.flower": 0.5545000076293946, + "Acc.book": 0.6393999862670898, + "Acc.hill": 0.2610000038146973, + "Acc.bench": 0.5345999908447265, + "Acc.countertop": 0.7286000061035156, + "Acc.stove": 0.8462999725341797, + "Acc.palm": 0.7087000274658203, + "Acc.kitchen island": 0.7898000335693359, + "Acc.computer": 0.7904000091552734, + "Acc.swivel chair": 0.6090999984741211, + "Acc.boat": 0.830999984741211, + "Acc.bar": 0.6359999847412109, + "Acc.arcade machine": 0.8980000305175782, + "Acc.hovel": 0.6476999664306641, + "Acc.bus": 0.9179000091552735, + "Acc.towel": 0.7891000366210937, + "Acc.light": 0.5022000122070313, + "Acc.truck": 0.43939998626708987, + "Acc.tower": 0.40279998779296877, + "Acc.chandelier": 0.7605000305175781, + "Acc.awning": 0.3415999984741211, + "Acc.streetlight": 0.2459000015258789, + "Acc.booth": 0.42639999389648436, + "Acc.television receiver": 0.8458999633789063, + "Acc.airplane": 0.6951000213623046, + "Acc.dirt track": 0.24979999542236328, + "Acc.apparel": 0.4291999816894531, + "Acc.pole": 0.16649999618530273, + "Acc.land": 0.030899999141693116, + "Acc.bannister": 0.16860000610351564, + "Acc.escalator": 0.7862999725341797, + "Acc.ottoman": 0.678499984741211, + "Acc.bottle": 0.320099983215332, + "Acc.buffet": 0.615999984741211, + "Acc.poster": 0.24459999084472656, + "Acc.stage": 0.2581999969482422, + "Acc.van": 0.41069999694824216, + "Acc.ship": 0.07889999866485596, + "Acc.fountain": 0.22190000534057616, + "Acc.conveyer belt": 0.9247000122070312, + "Acc.canopy": 0.2322999954223633, + "Acc.washer": 0.6816999816894531, + "Acc.plaything": 0.6420999908447266, + "Acc.swimming pool": 0.8434999847412109, + "Acc.stool": 0.3972999954223633, + "Acc.barrel": 0.6427999877929688, + "Acc.basket": 0.2929999923706055, + "Acc.waterfall": 0.7748000335693359, + "Acc.tent": 0.989800033569336, + "Acc.bag": 0.1634000015258789, + "Acc.minibike": 0.7326000213623047, + "Acc.cradle": 0.9718000030517578, + "Acc.oven": 0.54, + "Acc.ball": 0.5054000091552734, + "Acc.food": 0.6843000030517579, + "Acc.step": 0.103100004196167, + "Acc.tank": 0.624000015258789, + "Acc.trade name": 0.291200008392334, + "Acc.microwave": 0.49770000457763675, + "Acc.pot": 0.46099998474121096, + "Acc.animal": 0.632400016784668, + "Acc.bicycle": 0.7047000122070313, + "Acc.lake": 0.03059999942779541, + "Acc.dishwasher": 0.5929999923706055, + "Acc.screen": 0.767300033569336, + "Acc.blanket": 0.13300000190734862, + "Acc.sculpture": 0.7854000091552734, + "Acc.hood": 0.6008000183105469, + "Acc.sconce": 0.41950000762939454, + "Acc.vase": 0.4718000030517578, + "Acc.traffic light": 0.34959999084472654, + "Acc.tray": 0.041500000953674315, + "Acc.ashcan": 0.5168000030517578, + "Acc.fan": 0.7493000030517578, + "Acc.pier": 0.46130001068115234, + "Acc.crt screen": 0.05599999904632568, + "Acc.plate": 0.7026000213623047, + "Acc.monitor": 0.2765999984741211, + "Acc.bulletin board": 0.6641999816894532, + "Acc.shower": 0.05, + "Acc.radiator": 0.6211000061035157, + "Acc.glass": 0.10170000076293945, + "Acc.clock": 0.22479999542236329, + "Acc.flag": 0.3181999969482422 + } + }, + "75": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.833, + "mIoU": 0.4824, + "mAcc": 0.6086, + "IoU.wall": 0.7791999816894531, + "IoU.building": 0.8263999938964843, + "IoU.sky": 0.9408000183105468, + "IoU.floor": 0.8236000061035156, + "IoU.tree": 0.7554000091552734, + "IoU.ceiling": 0.8412000274658203, + "IoU.road": 0.8308999633789063, + "IoU.bed ": 0.8894000244140625, + "IoU.windowpane": 0.6302999877929687, + "IoU.grass": 0.7062999725341796, + "IoU.cabinet": 0.6297999954223633, + "IoU.sidewalk": 0.655199966430664, + "IoU.person": 0.802300033569336, + "IoU.earth": 0.38240001678466795, + "IoU.door": 0.5311000061035156, + "IoU.table": 0.6011999893188477, + "IoU.mountain": 0.5931000137329101, + "IoU.plant": 0.53, + "IoU.curtain": 0.7244999694824219, + "IoU.chair": 0.5647000122070313, + "IoU.car": 0.8330000305175781, + "IoU.water": 0.5491999816894532, + "IoU.painting": 0.6975, + "IoU.sofa": 0.6856999969482422, + "IoU.shelf": 0.4456999969482422, + "IoU.house": 0.4929999923706055, + "IoU.sea": 0.6498999786376953, + "IoU.mirror": 0.6786000061035157, + "IoU.rug": 0.6551000213623047, + "IoU.field": 0.357400016784668, + "IoU.armchair": 0.4427000045776367, + "IoU.seat": 0.6479000091552735, + "IoU.fence": 0.46290000915527346, + "IoU.desk": 0.44159999847412107, + "IoU.rock": 0.5165999984741211, + "IoU.wardrobe": 0.5704000091552734, + "IoU.lamp": 0.5722999954223633, + "IoU.bathtub": 0.8330000305175781, + "IoU.railing": 0.39099998474121095, + "IoU.cushion": 0.5727000045776367, + "IoU.base": 0.3245000076293945, + "IoU.box": 0.23450000762939452, + "IoU.column": 0.47869998931884766, + "IoU.signboard": 0.382599983215332, + "IoU.chest of drawers": 0.37259998321533205, + "IoU.counter": 0.34380001068115235, + "IoU.sand": 0.42700000762939455, + "IoU.sink": 0.6623999786376953, + "IoU.skyscraper": 0.4829999923706055, + "IoU.fireplace": 0.6918000030517578, + "IoU.refrigerator": 0.6934999847412109, + "IoU.grandstand": 0.512400016784668, + "IoU.path": 0.18709999084472656, + "IoU.stairs": 0.3206999969482422, + "IoU.runway": 0.7125, + "IoU.case": 0.5786000061035156, + "IoU.pool table": 0.9154000091552734, + "IoU.pillow": 0.5670000076293945, + "IoU.screen door": 0.5593000030517579, + "IoU.stairway": 0.345, + "IoU.river": 0.1752000045776367, + "IoU.bridge": 0.6036000061035156, + "IoU.bookcase": 0.32939998626708983, + "IoU.blind": 0.4318000030517578, + "IoU.coffee table": 0.6018000030517578, + "IoU.toilet": 0.8081999969482422, + "IoU.flower": 0.3761999893188477, + "IoU.book": 0.44610000610351563, + "IoU.hill": 0.1347000026702881, + "IoU.bench": 0.40369998931884765, + "IoU.countertop": 0.5308000183105469, + "IoU.stove": 0.6968000030517578, + "IoU.palm": 0.514900016784668, + "IoU.kitchen island": 0.4893000030517578, + "IoU.computer": 0.6691999816894532, + "IoU.swivel chair": 0.5281999969482422, + "IoU.boat": 0.6470999908447266, + "IoU.bar": 0.5404999923706054, + "IoU.arcade machine": 0.7908000183105469, + "IoU.hovel": 0.5429000091552735, + "IoU.bus": 0.8241999816894531, + "IoU.towel": 0.6181999969482422, + "IoU.light": 0.4256999969482422, + "IoU.truck": 0.2231999969482422, + "IoU.tower": 0.2502000045776367, + "IoU.chandelier": 0.6286999893188476, + "IoU.awning": 0.27010000228881836, + "IoU.streetlight": 0.19850000381469726, + "IoU.booth": 0.445, + "IoU.television receiver": 0.6502999877929687, + "IoU.airplane": 0.6444999694824218, + "IoU.dirt track": 0.007300000190734863, + "IoU.apparel": 0.32360000610351564, + "IoU.pole": 0.22059999465942381, + "IoU.land": 0.04949999809265137, + "IoU.bannister": 0.13039999961853027, + "IoU.escalator": 0.525, + "IoU.ottoman": 0.4893000030517578, + "IoU.bottle": 0.2527000045776367, + "IoU.buffet": 0.47139999389648435, + "IoU.poster": 0.19840000152587892, + "IoU.stage": 0.09579999923706055, + "IoU.van": 0.35009998321533203, + "IoU.ship": 0.12670000076293944, + "IoU.fountain": 0.20659999847412108, + "IoU.conveyer belt": 0.6937000274658203, + "IoU.canopy": 0.17790000915527343, + "IoU.washer": 0.6898999786376954, + "IoU.plaything": 0.3375, + "IoU.swimming pool": 0.6925, + "IoU.stool": 0.3040999984741211, + "IoU.barrel": 0.3559999847412109, + "IoU.basket": 0.2720999908447266, + "IoU.waterfall": 0.5079999923706054, + "IoU.tent": 0.9468000030517578, + "IoU.bag": 0.17969999313354493, + "IoU.minibike": 0.6712999725341797, + "IoU.cradle": 0.8166999816894531, + "IoU.oven": 0.2720000076293945, + "IoU.ball": 0.5195999908447265, + "IoU.food": 0.567599983215332, + "IoU.step": 0.07849999904632568, + "IoU.tank": 0.5627000045776367, + "IoU.trade name": 0.2585000038146973, + "IoU.microwave": 0.4331999969482422, + "IoU.pot": 0.3915999984741211, + "IoU.animal": 0.6526000213623047, + "IoU.bicycle": 0.5404999923706054, + "IoU.lake": 0.49290000915527343, + "IoU.dishwasher": 0.5070000076293946, + "IoU.screen": 0.48770000457763674, + "IoU.blanket": 0.0975, + "IoU.sculpture": 0.6233000183105468, + "IoU.hood": 0.5138999938964843, + "IoU.sconce": 0.3477000045776367, + "IoU.vase": 0.28780000686645507, + "IoU.traffic light": 0.2520000076293945, + "IoU.tray": 0.025799999237060545, + "IoU.ashcan": 0.34819999694824216, + "IoU.fan": 0.515099983215332, + "IoU.pier": 0.37810001373291013, + "IoU.crt screen": 0.0014000000059604645, + "IoU.plate": 0.48330001831054686, + "IoU.monitor": 0.09930000305175782, + "IoU.bulletin board": 0.4290999984741211, + "IoU.shower": 0.007900000214576722, + "IoU.radiator": 0.6020999908447265, + "IoU.glass": 0.11899999618530273, + "IoU.clock": 0.2631999969482422, + "IoU.flag": 0.3461000061035156, + "Acc.wall": 0.8755999755859375, + "Acc.building": 0.9216999816894531, + "Acc.sky": 0.9662999725341797, + "Acc.floor": 0.9019000244140625, + "Acc.tree": 0.8813999938964844, + "Acc.ceiling": 0.9055999755859375, + "Acc.road": 0.895, + "Acc.bed ": 0.9643000030517578, + "Acc.windowpane": 0.7794999694824218, + "Acc.grass": 0.8116999816894531, + "Acc.cabinet": 0.749000015258789, + "Acc.sidewalk": 0.8168000030517578, + "Acc.person": 0.9337999725341797, + "Acc.earth": 0.5593000030517579, + "Acc.door": 0.7147000122070313, + "Acc.table": 0.7434999847412109, + "Acc.mountain": 0.7355000305175782, + "Acc.plant": 0.6236999893188476, + "Acc.curtain": 0.8686000061035156, + "Acc.chair": 0.7175, + "Acc.car": 0.9298000335693359, + "Acc.water": 0.7133000183105469, + "Acc.painting": 0.8683000183105469, + "Acc.sofa": 0.8333000183105469, + "Acc.shelf": 0.6156999969482422, + "Acc.house": 0.7237000274658203, + "Acc.sea": 0.8433000183105469, + "Acc.mirror": 0.8116000366210937, + "Acc.rug": 0.7181999969482422, + "Acc.field": 0.5418999862670898, + "Acc.armchair": 0.6856999969482422, + "Acc.seat": 0.8520999908447265, + "Acc.fence": 0.695, + "Acc.desk": 0.7422000122070312, + "Acc.rock": 0.7290000152587891, + "Acc.wardrobe": 0.734800033569336, + "Acc.lamp": 0.7311000061035157, + "Acc.bathtub": 0.88, + "Acc.railing": 0.49840000152587893, + "Acc.cushion": 0.6905000305175781, + "Acc.base": 0.5372999954223633, + "Acc.box": 0.27719999313354493, + "Acc.column": 0.6165999984741211, + "Acc.signboard": 0.5063000106811524, + "Acc.chest of drawers": 0.5877999877929687, + "Acc.counter": 0.47880001068115235, + "Acc.sand": 0.6563999938964844, + "Acc.sink": 0.7281999969482422, + "Acc.skyscraper": 0.620999984741211, + "Acc.fireplace": 0.9019000244140625, + "Acc.refrigerator": 0.8876999664306641, + "Acc.grandstand": 0.7701000213623047, + "Acc.path": 0.24540000915527344, + "Acc.stairs": 0.4236000061035156, + "Acc.runway": 0.9469000244140625, + "Acc.case": 0.7484999847412109, + "Acc.pool table": 0.9808000183105469, + "Acc.pillow": 0.6980999755859375, + "Acc.screen door": 0.657300033569336, + "Acc.stairway": 0.4222999954223633, + "Acc.river": 0.32419998168945313, + "Acc.bridge": 0.6973000335693359, + "Acc.bookcase": 0.6029000091552734, + "Acc.blind": 0.48939998626708986, + "Acc.coffee table": 0.8370999908447265, + "Acc.toilet": 0.9083999633789063, + "Acc.flower": 0.5433000183105469, + "Acc.book": 0.6102000045776367, + "Acc.hill": 0.25559999465942385, + "Acc.bench": 0.46110000610351565, + "Acc.countertop": 0.7136000061035156, + "Acc.stove": 0.8340000152587891, + "Acc.palm": 0.722699966430664, + "Acc.kitchen island": 0.773499984741211, + "Acc.computer": 0.795199966430664, + "Acc.swivel chair": 0.6769999694824219, + "Acc.boat": 0.8220999908447265, + "Acc.bar": 0.7318000030517579, + "Acc.arcade machine": 0.8526000213623047, + "Acc.hovel": 0.5929999923706055, + "Acc.bus": 0.9402999877929688, + "Acc.towel": 0.764000015258789, + "Acc.light": 0.48529998779296873, + "Acc.truck": 0.33630001068115234, + "Acc.tower": 0.36020000457763673, + "Acc.chandelier": 0.7901000213623047, + "Acc.awning": 0.32610000610351564, + "Acc.streetlight": 0.266200008392334, + "Acc.booth": 0.5186000061035156, + "Acc.television receiver": 0.8669999694824219, + "Acc.airplane": 0.7312000274658204, + "Acc.dirt track": 0.02809999942779541, + "Acc.apparel": 0.44549999237060545, + "Acc.pole": 0.31979999542236326, + "Acc.land": 0.12329999923706055, + "Acc.bannister": 0.2015999984741211, + "Acc.escalator": 0.8258000183105468, + "Acc.ottoman": 0.6709999847412109, + "Acc.bottle": 0.3420999908447266, + "Acc.buffet": 0.6261999893188477, + "Acc.poster": 0.2352000045776367, + "Acc.stage": 0.19370000839233398, + "Acc.van": 0.42779998779296874, + "Acc.ship": 0.1356999969482422, + "Acc.fountain": 0.21540000915527344, + "Acc.conveyer belt": 0.9287000274658204, + "Acc.canopy": 0.20200000762939452, + "Acc.washer": 0.7002999877929688, + "Acc.plaything": 0.5611000061035156, + "Acc.swimming pool": 0.8573999786376953, + "Acc.stool": 0.3983000183105469, + "Acc.barrel": 0.6463999938964844, + "Acc.basket": 0.3258000183105469, + "Acc.waterfall": 0.6798000335693359, + "Acc.tent": 0.9833999633789062, + "Acc.bag": 0.21360000610351562, + "Acc.minibike": 0.7669999694824219, + "Acc.cradle": 0.9751999664306641, + "Acc.oven": 0.6013000106811524, + "Acc.ball": 0.5858000183105468, + "Acc.food": 0.635, + "Acc.step": 0.1090999984741211, + "Acc.tank": 0.6527999877929688, + "Acc.trade name": 0.2854000091552734, + "Acc.microwave": 0.46799999237060547, + "Acc.pot": 0.45560001373291015, + "Acc.animal": 0.7013999938964843, + "Acc.bicycle": 0.6859999847412109, + "Acc.lake": 0.5470999908447266, + "Acc.dishwasher": 0.6066999816894532, + "Acc.screen": 0.7922000122070313, + "Acc.blanket": 0.10689999580383301, + "Acc.sculpture": 0.794000015258789, + "Acc.hood": 0.625999984741211, + "Acc.sconce": 0.44549999237060545, + "Acc.vase": 0.42540000915527343, + "Acc.traffic light": 0.35439998626708985, + "Acc.tray": 0.031700000762939454, + "Acc.ashcan": 0.5131000137329101, + "Acc.fan": 0.6866000366210937, + "Acc.pier": 0.6648000335693359, + "Acc.crt screen": 0.0037000000476837156, + "Acc.plate": 0.6629000091552735, + "Acc.monitor": 0.16200000762939454, + "Acc.bulletin board": 0.5565000152587891, + "Acc.shower": 0.0509000015258789, + "Acc.radiator": 0.7208999633789063, + "Acc.glass": 0.12760000228881835, + "Acc.clock": 0.3040999984741211, + "Acc.flag": 0.3711999893188477 + } + }, + "76": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8341, + "mIoU": 0.4806, + "mAcc": 0.5764, + "IoU.wall": 0.7772000122070313, + "IoU.building": 0.8286000061035156, + "IoU.sky": 0.9401000213623046, + "IoU.floor": 0.8177999877929687, + "IoU.tree": 0.7498000335693359, + "IoU.ceiling": 0.8370999908447265, + "IoU.road": 0.8279000091552734, + "IoU.bed ": 0.8906999969482422, + "IoU.windowpane": 0.6211000061035157, + "IoU.grass": 0.7161000061035157, + "IoU.cabinet": 0.6243999862670898, + "IoU.sidewalk": 0.653499984741211, + "IoU.person": 0.8052999877929687, + "IoU.earth": 0.4025, + "IoU.door": 0.5141999816894531, + "IoU.table": 0.6002999877929688, + "IoU.mountain": 0.5740000152587891, + "IoU.plant": 0.52, + "IoU.curtain": 0.7379000091552734, + "IoU.chair": 0.5581999969482422, + "IoU.car": 0.8393000030517578, + "IoU.water": 0.5945999908447266, + "IoU.painting": 0.7037000274658203, + "IoU.sofa": 0.6805000305175781, + "IoU.shelf": 0.42619998931884767, + "IoU.house": 0.4879999923706055, + "IoU.sea": 0.6786000061035157, + "IoU.mirror": 0.6676000213623047, + "IoU.rug": 0.6029000091552734, + "IoU.field": 0.3452999877929688, + "IoU.armchair": 0.4458000183105469, + "IoU.seat": 0.6691000366210937, + "IoU.fence": 0.49509998321533205, + "IoU.desk": 0.4866999816894531, + "IoU.rock": 0.5120999908447266, + "IoU.wardrobe": 0.5568999862670898, + "IoU.lamp": 0.5565000152587891, + "IoU.bathtub": 0.8172000122070312, + "IoU.railing": 0.39810001373291015, + "IoU.cushion": 0.5727999877929687, + "IoU.base": 0.33299999237060546, + "IoU.box": 0.23309999465942383, + "IoU.column": 0.48209999084472654, + "IoU.signboard": 0.3618000030517578, + "IoU.chest of drawers": 0.34549999237060547, + "IoU.counter": 0.34450000762939453, + "IoU.sand": 0.46450000762939453, + "IoU.sink": 0.6566000366210938, + "IoU.skyscraper": 0.495, + "IoU.fireplace": 0.6841000366210938, + "IoU.refrigerator": 0.7656999969482422, + "IoU.grandstand": 0.5013000106811524, + "IoU.path": 0.17280000686645508, + "IoU.stairs": 0.3113999938964844, + "IoU.runway": 0.6122999954223632, + "IoU.case": 0.5738999938964844, + "IoU.pool table": 0.9315000152587891, + "IoU.pillow": 0.5290999984741211, + "IoU.screen door": 0.6281999969482421, + "IoU.stairway": 0.37540000915527344, + "IoU.river": 0.22649999618530273, + "IoU.bridge": 0.539099998474121, + "IoU.bookcase": 0.33029998779296876, + "IoU.blind": 0.37709999084472656, + "IoU.coffee table": 0.6445999908447265, + "IoU.toilet": 0.7870999908447266, + "IoU.flower": 0.38560001373291014, + "IoU.book": 0.4463000106811523, + "IoU.hill": 0.1427999973297119, + "IoU.bench": 0.4383000183105469, + "IoU.countertop": 0.5052999877929687, + "IoU.stove": 0.7476000213623046, + "IoU.palm": 0.4766999816894531, + "IoU.kitchen island": 0.44360000610351563, + "IoU.computer": 0.7363999938964844, + "IoU.swivel chair": 0.48450000762939455, + "IoU.boat": 0.6837999725341797, + "IoU.bar": 0.489900016784668, + "IoU.arcade machine": 0.7619000244140625, + "IoU.hovel": 0.4911000061035156, + "IoU.bus": 0.8925, + "IoU.towel": 0.6448999786376953, + "IoU.light": 0.2879000091552734, + "IoU.truck": 0.24829999923706056, + "IoU.tower": 0.26260000228881836, + "IoU.chandelier": 0.6297000122070312, + "IoU.awning": 0.19020000457763672, + "IoU.streetlight": 0.15229999542236328, + "IoU.booth": 0.4463999938964844, + "IoU.television receiver": 0.7152999877929688, + "IoU.airplane": 0.635, + "IoU.dirt track": 0.0, + "IoU.apparel": 0.32610000610351564, + "IoU.pole": 0.2384000015258789, + "IoU.land": 0.01059999942779541, + "IoU.bannister": 0.125, + "IoU.escalator": 0.564900016784668, + "IoU.ottoman": 0.49770000457763675, + "IoU.bottle": 0.35900001525878905, + "IoU.buffet": 0.4154000091552734, + "IoU.poster": 0.20299999237060548, + "IoU.stage": 0.09069999694824218, + "IoU.van": 0.2894000053405762, + "IoU.ship": 0.10050000190734863, + "IoU.fountain": 0.19940000534057617, + "IoU.conveyer belt": 0.8119999694824219, + "IoU.canopy": 0.2281999969482422, + "IoU.washer": 0.7272000122070312, + "IoU.plaything": 0.28219999313354494, + "IoU.swimming pool": 0.7273999786376953, + "IoU.stool": 0.2628000068664551, + "IoU.barrel": 0.5977999877929687, + "IoU.basket": 0.2535000038146973, + "IoU.waterfall": 0.5436999893188477, + "IoU.tent": 0.9508999633789063, + "IoU.bag": 0.120600004196167, + "IoU.minibike": 0.6143999862670898, + "IoU.cradle": 0.8141999816894532, + "IoU.oven": 0.35, + "IoU.ball": 0.38970001220703127, + "IoU.food": 0.5084000015258789, + "IoU.step": 0.07530000209808349, + "IoU.tank": 0.6045999908447266, + "IoU.trade name": 0.13399999618530273, + "IoU.microwave": 0.6908999633789062, + "IoU.pot": 0.395, + "IoU.animal": 0.6081000137329101, + "IoU.bicycle": 0.5033000183105468, + "IoU.lake": 0.13, + "IoU.dishwasher": 0.4940999984741211, + "IoU.screen": 0.5843999862670899, + "IoU.blanket": 0.1059000015258789, + "IoU.sculpture": 0.5620999908447266, + "IoU.hood": 0.5133000183105468, + "IoU.sconce": 0.28700000762939454, + "IoU.vase": 0.30120000839233396, + "IoU.traffic light": 0.21440000534057618, + "IoU.tray": 0.006200000047683716, + "IoU.ashcan": 0.3743000030517578, + "IoU.fan": 0.4865999984741211, + "IoU.pier": 0.3384000015258789, + "IoU.crt screen": 9.999999776482581e-05, + "IoU.plate": 0.47150001525878904, + "IoU.monitor": 0.4590000152587891, + "IoU.bulletin board": 0.3356999969482422, + "IoU.shower": 0.029100000858306885, + "IoU.radiator": 0.540099983215332, + "IoU.glass": 0.08789999961853028, + "IoU.clock": 0.26549999237060545, + "IoU.flag": 0.3043000030517578, + "Acc.wall": 0.8991000366210937, + "Acc.building": 0.9313999938964844, + "Acc.sky": 0.9762999725341797, + "Acc.floor": 0.9181999969482422, + "Acc.tree": 0.8788999938964843, + "Acc.ceiling": 0.9091999816894532, + "Acc.road": 0.9041000366210937, + "Acc.bed ": 0.9573000335693359, + "Acc.windowpane": 0.7508000183105469, + "Acc.grass": 0.8145999908447266, + "Acc.cabinet": 0.7662000274658203, + "Acc.sidewalk": 0.8062999725341797, + "Acc.person": 0.9075, + "Acc.earth": 0.647300033569336, + "Acc.door": 0.7025, + "Acc.table": 0.7593000030517578, + "Acc.mountain": 0.7011000061035156, + "Acc.plant": 0.6074000167846679, + "Acc.curtain": 0.844800033569336, + "Acc.chair": 0.6902999877929688, + "Acc.car": 0.9094000244140625, + "Acc.water": 0.7718000030517578, + "Acc.painting": 0.8431999969482422, + "Acc.sofa": 0.8133000183105469, + "Acc.shelf": 0.571500015258789, + "Acc.house": 0.6941999816894531, + "Acc.sea": 0.8225, + "Acc.mirror": 0.7377999877929687, + "Acc.rug": 0.6501000213623047, + "Acc.field": 0.48639999389648436, + "Acc.armchair": 0.6812999725341797, + "Acc.seat": 0.8302999877929688, + "Acc.fence": 0.677300033569336, + "Acc.desk": 0.7070999908447265, + "Acc.rock": 0.6994000244140625, + "Acc.wardrobe": 0.706500015258789, + "Acc.lamp": 0.6538999938964843, + "Acc.bathtub": 0.8554000091552735, + "Acc.railing": 0.5338000106811523, + "Acc.cushion": 0.6726000213623047, + "Acc.base": 0.5591999816894532, + "Acc.box": 0.3015999984741211, + "Acc.column": 0.5970999908447265, + "Acc.signboard": 0.4452000045776367, + "Acc.chest of drawers": 0.5518999862670898, + "Acc.counter": 0.43779998779296875, + "Acc.sand": 0.6008000183105469, + "Acc.sink": 0.6904000091552734, + "Acc.skyscraper": 0.5934000015258789, + "Acc.fireplace": 0.8755000305175781, + "Acc.refrigerator": 0.8438999938964844, + "Acc.grandstand": 0.7552999877929687, + "Acc.path": 0.211200008392334, + "Acc.stairs": 0.40439998626708984, + "Acc.runway": 0.7812000274658203, + "Acc.case": 0.739800033569336, + "Acc.pool table": 0.9555000305175781, + "Acc.pillow": 0.6091999816894531, + "Acc.screen door": 0.7106999969482422, + "Acc.stairway": 0.4465000152587891, + "Acc.river": 0.3888999938964844, + "Acc.bridge": 0.6315000152587891, + "Acc.bookcase": 0.5954999923706055, + "Acc.blind": 0.4, + "Acc.coffee table": 0.7897000122070312, + "Acc.toilet": 0.8912999725341797, + "Acc.flower": 0.5191999816894531, + "Acc.book": 0.5734000015258789, + "Acc.hill": 0.2440999984741211, + "Acc.bench": 0.49689998626708987, + "Acc.countertop": 0.6712000274658203, + "Acc.stove": 0.8105000305175781, + "Acc.palm": 0.5920999908447265, + "Acc.kitchen island": 0.5627999877929688, + "Acc.computer": 0.8405999755859375, + "Acc.swivel chair": 0.5925, + "Acc.boat": 0.8273999786376953, + "Acc.bar": 0.6518000030517578, + "Acc.arcade machine": 0.8131999969482422, + "Acc.hovel": 0.5302000045776367, + "Acc.bus": 0.9302999877929687, + "Acc.towel": 0.7362999725341797, + "Acc.light": 0.3015999984741211, + "Acc.truck": 0.330099983215332, + "Acc.tower": 0.34889999389648435, + "Acc.chandelier": 0.7368000030517579, + "Acc.awning": 0.20110000610351564, + "Acc.streetlight": 0.17299999237060548, + "Acc.booth": 0.49759998321533205, + "Acc.television receiver": 0.8212000274658203, + "Acc.airplane": 0.6786000061035157, + "Acc.dirt track": 0.0, + "Acc.apparel": 0.41529998779296873, + "Acc.pole": 0.3290000152587891, + "Acc.land": 0.01809999942779541, + "Acc.bannister": 0.15479999542236328, + "Acc.escalator": 0.8231999969482422, + "Acc.ottoman": 0.6544000244140625, + "Acc.bottle": 0.5275, + "Acc.buffet": 0.4988999938964844, + "Acc.poster": 0.26610000610351564, + "Acc.stage": 0.15319999694824218, + "Acc.van": 0.3477000045776367, + "Acc.ship": 0.10260000228881835, + "Acc.fountain": 0.21329999923706056, + "Acc.conveyer belt": 0.9133000183105469, + "Acc.canopy": 0.2794000053405762, + "Acc.washer": 0.7388999938964844, + "Acc.plaything": 0.445, + "Acc.swimming pool": 0.77, + "Acc.stool": 0.31260000228881835, + "Acc.barrel": 0.6379999923706055, + "Acc.basket": 0.2947999954223633, + "Acc.waterfall": 0.635099983215332, + "Acc.tent": 0.9790000152587891, + "Acc.bag": 0.12670000076293944, + "Acc.minibike": 0.6612000274658203, + "Acc.cradle": 0.9605999755859375, + "Acc.oven": 0.6172000122070312, + "Acc.ball": 0.4034000015258789, + "Acc.food": 0.5695999908447266, + "Acc.step": 0.09770000457763672, + "Acc.tank": 0.6341999816894531, + "Acc.trade name": 0.13960000038146972, + "Acc.microwave": 0.7225, + "Acc.pot": 0.4413000106811523, + "Acc.animal": 0.6359999847412109, + "Acc.bicycle": 0.6426999664306641, + "Acc.lake": 0.1372999954223633, + "Acc.dishwasher": 0.5833000183105469, + "Acc.screen": 0.7094999694824219, + "Acc.blanket": 0.11539999961853027, + "Acc.sculpture": 0.7529000091552734, + "Acc.hood": 0.5288999938964843, + "Acc.sconce": 0.33490001678466796, + "Acc.vase": 0.3915999984741211, + "Acc.traffic light": 0.26190000534057617, + "Acc.tray": 0.006600000262260437, + "Acc.ashcan": 0.5145999908447265, + "Acc.fan": 0.5675, + "Acc.pier": 0.4256999969482422, + "Acc.crt screen": 9.999999776482581e-05, + "Acc.plate": 0.5991999816894531, + "Acc.monitor": 0.6084999847412109, + "Acc.bulletin board": 0.42389999389648436, + "Acc.shower": 0.03539999961853027, + "Acc.radiator": 0.5879999923706055, + "Acc.glass": 0.09119999885559082, + "Acc.clock": 0.32549999237060545, + "Acc.flag": 0.3196999931335449 + } + }, + "77": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8344, + "mIoU": 0.4876, + "mAcc": 0.6069, + "IoU.wall": 0.7812000274658203, + "IoU.building": 0.8291999816894531, + "IoU.sky": 0.9398999786376954, + "IoU.floor": 0.8244000244140625, + "IoU.tree": 0.7483000183105468, + "IoU.ceiling": 0.8433000183105469, + "IoU.road": 0.8272000122070312, + "IoU.bed ": 0.8948999786376953, + "IoU.windowpane": 0.6195999908447266, + "IoU.grass": 0.6898999786376954, + "IoU.cabinet": 0.644000015258789, + "IoU.sidewalk": 0.654000015258789, + "IoU.person": 0.8030999755859375, + "IoU.earth": 0.40310001373291016, + "IoU.door": 0.5106000137329102, + "IoU.table": 0.6127000045776367, + "IoU.mountain": 0.5995999908447266, + "IoU.plant": 0.5186000061035156, + "IoU.curtain": 0.7308999633789063, + "IoU.chair": 0.5788000106811524, + "IoU.car": 0.8434999847412109, + "IoU.water": 0.545099983215332, + "IoU.painting": 0.7108000183105468, + "IoU.sofa": 0.6837999725341797, + "IoU.shelf": 0.4409000015258789, + "IoU.house": 0.5034000015258789, + "IoU.sea": 0.6655000305175781, + "IoU.mirror": 0.6984999847412109, + "IoU.rug": 0.6354000091552734, + "IoU.field": 0.37799999237060544, + "IoU.armchair": 0.4386000061035156, + "IoU.seat": 0.6577999877929688, + "IoU.fence": 0.43400001525878906, + "IoU.desk": 0.4652000045776367, + "IoU.rock": 0.4986999893188477, + "IoU.wardrobe": 0.5622000122070312, + "IoU.lamp": 0.5725, + "IoU.bathtub": 0.8512000274658204, + "IoU.railing": 0.3752000045776367, + "IoU.cushion": 0.5924000167846679, + "IoU.base": 0.3263999938964844, + "IoU.box": 0.23340000152587892, + "IoU.column": 0.48009998321533204, + "IoU.signboard": 0.36529998779296874, + "IoU.chest of drawers": 0.354900016784668, + "IoU.counter": 0.37139999389648437, + "IoU.sand": 0.5372999954223633, + "IoU.sink": 0.6980999755859375, + "IoU.skyscraper": 0.49290000915527343, + "IoU.fireplace": 0.7216000366210937, + "IoU.refrigerator": 0.7343000030517578, + "IoU.grandstand": 0.48520000457763673, + "IoU.path": 0.19350000381469726, + "IoU.stairs": 0.29959999084472655, + "IoU.runway": 0.7018000030517578, + "IoU.case": 0.5734999847412109, + "IoU.pool table": 0.924000015258789, + "IoU.pillow": 0.5679000091552734, + "IoU.screen door": 0.5622999954223633, + "IoU.stairway": 0.42, + "IoU.river": 0.13760000228881836, + "IoU.bridge": 0.580999984741211, + "IoU.bookcase": 0.34200000762939453, + "IoU.blind": 0.3629999923706055, + "IoU.coffee table": 0.6325999832153321, + "IoU.toilet": 0.8390000152587891, + "IoU.flower": 0.3656000137329102, + "IoU.book": 0.41869998931884767, + "IoU.hill": 0.13199999809265137, + "IoU.bench": 0.43439998626708987, + "IoU.countertop": 0.5781000137329102, + "IoU.stove": 0.7381999969482422, + "IoU.palm": 0.5013000106811524, + "IoU.kitchen island": 0.44360000610351563, + "IoU.computer": 0.6708999633789062, + "IoU.swivel chair": 0.5377999877929688, + "IoU.boat": 0.7241999816894531, + "IoU.bar": 0.5159000015258789, + "IoU.arcade machine": 0.7030999755859375, + "IoU.hovel": 0.28819999694824217, + "IoU.bus": 0.9079000091552735, + "IoU.towel": 0.5822000122070312, + "IoU.light": 0.43560001373291013, + "IoU.truck": 0.2855999946594238, + "IoU.tower": 0.22180000305175782, + "IoU.chandelier": 0.649000015258789, + "IoU.awning": 0.2125, + "IoU.streetlight": 0.19790000915527345, + "IoU.booth": 0.38990001678466796, + "IoU.television receiver": 0.7202999877929688, + "IoU.airplane": 0.6265999984741211, + "IoU.dirt track": 0.04710000038146973, + "IoU.apparel": 0.29989999771118164, + "IoU.pole": 0.21569999694824218, + "IoU.land": 0.08520000457763671, + "IoU.bannister": 0.09590000152587891, + "IoU.escalator": 0.5497000122070312, + "IoU.ottoman": 0.49700000762939456, + "IoU.bottle": 0.20829999923706055, + "IoU.buffet": 0.5588000106811524, + "IoU.poster": 0.19959999084472657, + "IoU.stage": 0.11289999961853027, + "IoU.van": 0.3295999908447266, + "IoU.ship": 0.4608000183105469, + "IoU.fountain": 0.12729999542236328, + "IoU.conveyer belt": 0.6993000030517578, + "IoU.canopy": 0.23549999237060548, + "IoU.washer": 0.6025, + "IoU.plaything": 0.35639999389648436, + "IoU.swimming pool": 0.699800033569336, + "IoU.stool": 0.3909000015258789, + "IoU.barrel": 0.48459999084472655, + "IoU.basket": 0.24489999771118165, + "IoU.waterfall": 0.5245000076293945, + "IoU.tent": 0.9505999755859375, + "IoU.bag": 0.1690999984741211, + "IoU.minibike": 0.6845999908447266, + "IoU.cradle": 0.8238999938964844, + "IoU.oven": 0.28389999389648435, + "IoU.ball": 0.5138000106811523, + "IoU.food": 0.577400016784668, + "IoU.step": 0.08649999618530274, + "IoU.tank": 0.5520999908447266, + "IoU.trade name": 0.2645000076293945, + "IoU.microwave": 0.5856000137329102, + "IoU.pot": 0.40950000762939454, + "IoU.animal": 0.581500015258789, + "IoU.bicycle": 0.515, + "IoU.lake": 0.5386999893188477, + "IoU.dishwasher": 0.5381999969482422, + "IoU.screen": 0.4602000045776367, + "IoU.blanket": 0.11880000114440918, + "IoU.sculpture": 0.6512999725341797, + "IoU.hood": 0.45360000610351564, + "IoU.sconce": 0.3265999984741211, + "IoU.vase": 0.29149999618530276, + "IoU.traffic light": 0.24569999694824218, + "IoU.tray": 0.04809999942779541, + "IoU.ashcan": 0.33, + "IoU.fan": 0.535, + "IoU.pier": 0.29959999084472655, + "IoU.crt screen": 0.0005000000074505806, + "IoU.plate": 0.5088999938964843, + "IoU.monitor": 0.06889999866485595, + "IoU.bulletin board": 0.4670999908447266, + "IoU.shower": 0.005899999737739563, + "IoU.radiator": 0.5684999847412109, + "IoU.glass": 0.15369999885559082, + "IoU.clock": 0.29170000076293945, + "IoU.flag": 0.39310001373291015, + "Acc.wall": 0.882300033569336, + "Acc.building": 0.9262000274658203, + "Acc.sky": 0.9673999786376953, + "Acc.floor": 0.9022000122070313, + "Acc.tree": 0.8808999633789063, + "Acc.ceiling": 0.913499984741211, + "Acc.road": 0.9005999755859375, + "Acc.bed ": 0.965, + "Acc.windowpane": 0.7855000305175781, + "Acc.grass": 0.7837999725341797, + "Acc.cabinet": 0.7712999725341797, + "Acc.sidewalk": 0.8058000183105469, + "Acc.person": 0.9383999633789063, + "Acc.earth": 0.5681999969482422, + "Acc.door": 0.6748000335693359, + "Acc.table": 0.7501999664306641, + "Acc.mountain": 0.7383999633789062, + "Acc.plant": 0.6288000106811523, + "Acc.curtain": 0.8706999969482422, + "Acc.chair": 0.7322000122070312, + "Acc.car": 0.9422000122070312, + "Acc.water": 0.6861000061035156, + "Acc.painting": 0.8620999908447265, + "Acc.sofa": 0.845, + "Acc.shelf": 0.5990999984741211, + "Acc.house": 0.7081999969482422, + "Acc.sea": 0.8916000366210938, + "Acc.mirror": 0.8151999664306641, + "Acc.rug": 0.7054000091552735, + "Acc.field": 0.6291999816894531, + "Acc.armchair": 0.6304000091552734, + "Acc.seat": 0.8701999664306641, + "Acc.fence": 0.6356000137329102, + "Acc.desk": 0.7322000122070312, + "Acc.rock": 0.7156999969482422, + "Acc.wardrobe": 0.7012000274658203, + "Acc.lamp": 0.7120999908447265, + "Acc.bathtub": 0.8922000122070313, + "Acc.railing": 0.4791999816894531, + "Acc.cushion": 0.715, + "Acc.base": 0.5204999923706055, + "Acc.box": 0.268700008392334, + "Acc.column": 0.6009000015258789, + "Acc.signboard": 0.47900001525878905, + "Acc.chest of drawers": 0.5658000183105468, + "Acc.counter": 0.5093000030517578, + "Acc.sand": 0.6929000091552734, + "Acc.sink": 0.7695999908447265, + "Acc.skyscraper": 0.629000015258789, + "Acc.fireplace": 0.9111000061035156, + "Acc.refrigerator": 0.8605999755859375, + "Acc.grandstand": 0.7190000152587891, + "Acc.path": 0.2690999984741211, + "Acc.stairs": 0.4047999954223633, + "Acc.runway": 0.9326000213623047, + "Acc.case": 0.7251000213623047, + "Acc.pool table": 0.9804000091552735, + "Acc.pillow": 0.6966000366210937, + "Acc.screen door": 0.610999984741211, + "Acc.stairway": 0.5475, + "Acc.river": 0.28719999313354494, + "Acc.bridge": 0.6638999938964844, + "Acc.bookcase": 0.6254999923706055, + "Acc.blind": 0.39430000305175783, + "Acc.coffee table": 0.8095999908447266, + "Acc.toilet": 0.9030000305175782, + "Acc.flower": 0.5318000030517578, + "Acc.book": 0.5647999954223633, + "Acc.hill": 0.21899999618530275, + "Acc.bench": 0.5086999893188476, + "Acc.countertop": 0.7522000122070313, + "Acc.stove": 0.8219000244140625, + "Acc.palm": 0.6869000244140625, + "Acc.kitchen island": 0.6413999938964844, + "Acc.computer": 0.7904000091552734, + "Acc.swivel chair": 0.6869000244140625, + "Acc.boat": 0.7951000213623047, + "Acc.bar": 0.6405000305175781, + "Acc.arcade machine": 0.7577999877929688, + "Acc.hovel": 0.32099998474121094, + "Acc.bus": 0.9518000030517578, + "Acc.towel": 0.7715000152587891, + "Acc.light": 0.518400001525879, + "Acc.truck": 0.412400016784668, + "Acc.tower": 0.32459999084472657, + "Acc.chandelier": 0.8051000213623047, + "Acc.awning": 0.283700008392334, + "Acc.streetlight": 0.23909999847412108, + "Acc.booth": 0.49380001068115237, + "Acc.television receiver": 0.8562000274658204, + "Acc.airplane": 0.697699966430664, + "Acc.dirt track": 0.22059999465942381, + "Acc.apparel": 0.40240001678466797, + "Acc.pole": 0.32970001220703127, + "Acc.land": 0.19329999923706054, + "Acc.bannister": 0.14050000190734863, + "Acc.escalator": 0.7886000061035157, + "Acc.ottoman": 0.6918000030517578, + "Acc.bottle": 0.26930000305175783, + "Acc.buffet": 0.7738999938964843, + "Acc.poster": 0.256200008392334, + "Acc.stage": 0.19989999771118164, + "Acc.van": 0.39169998168945314, + "Acc.ship": 0.4759999847412109, + "Acc.fountain": 0.12979999542236328, + "Acc.conveyer belt": 0.9306999969482422, + "Acc.canopy": 0.25690000534057617, + "Acc.washer": 0.68, + "Acc.plaything": 0.610099983215332, + "Acc.swimming pool": 0.867699966430664, + "Acc.stool": 0.4841999816894531, + "Acc.barrel": 0.6512000274658203, + "Acc.basket": 0.330099983215332, + "Acc.waterfall": 0.6975, + "Acc.tent": 0.9762000274658204, + "Acc.bag": 0.20860000610351562, + "Acc.minibike": 0.8072000122070313, + "Acc.cradle": 0.9652999877929688, + "Acc.oven": 0.49720001220703125, + "Acc.ball": 0.5661999893188476, + "Acc.food": 0.6718000030517578, + "Acc.step": 0.1125, + "Acc.tank": 0.6497000122070312, + "Acc.trade name": 0.28899999618530275, + "Acc.microwave": 0.6455999755859375, + "Acc.pot": 0.46849998474121096, + "Acc.animal": 0.609900016784668, + "Acc.bicycle": 0.5993000030517578, + "Acc.lake": 0.6937000274658203, + "Acc.dishwasher": 0.648499984741211, + "Acc.screen": 0.7230000305175781, + "Acc.blanket": 0.13109999656677246, + "Acc.sculpture": 0.7977999877929688, + "Acc.hood": 0.5681000137329102, + "Acc.sconce": 0.4381999969482422, + "Acc.vase": 0.48220001220703124, + "Acc.traffic light": 0.39, + "Acc.tray": 0.06, + "Acc.ashcan": 0.505099983215332, + "Acc.fan": 0.6548999786376953, + "Acc.pier": 0.43200000762939456, + "Acc.crt screen": 0.0015000000596046448, + "Acc.plate": 0.6919999694824219, + "Acc.monitor": 0.10789999961853028, + "Acc.bulletin board": 0.5536000061035157, + "Acc.shower": 0.05, + "Acc.radiator": 0.7516999816894532, + "Acc.glass": 0.1718000030517578, + "Acc.clock": 0.3215000152587891, + "Acc.flag": 0.4109000015258789 + } + }, + "78": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8349, + "mIoU": 0.48350000000000004, + "mAcc": 0.5746, + "IoU.wall": 0.7758000183105469, + "IoU.building": 0.8304000091552735, + "IoU.sky": 0.9386000061035156, + "IoU.floor": 0.8172000122070312, + "IoU.tree": 0.7405999755859375, + "IoU.ceiling": 0.8362000274658203, + "IoU.road": 0.8279000091552734, + "IoU.bed ": 0.8952999877929687, + "IoU.windowpane": 0.6140999984741211, + "IoU.grass": 0.6969999694824218, + "IoU.cabinet": 0.634000015258789, + "IoU.sidewalk": 0.6502999877929687, + "IoU.person": 0.8095999908447266, + "IoU.earth": 0.42119998931884767, + "IoU.door": 0.5208000183105469, + "IoU.table": 0.6129999923706054, + "IoU.mountain": 0.5827000045776367, + "IoU.plant": 0.5081999969482421, + "IoU.curtain": 0.7352999877929688, + "IoU.chair": 0.5709999847412109, + "IoU.car": 0.8416999816894531, + "IoU.water": 0.5618999862670898, + "IoU.painting": 0.7190000152587891, + "IoU.sofa": 0.6769999694824219, + "IoU.shelf": 0.42389999389648436, + "IoU.house": 0.48970001220703124, + "IoU.sea": 0.6405999755859375, + "IoU.mirror": 0.6825, + "IoU.rug": 0.590900001525879, + "IoU.field": 0.36950000762939456, + "IoU.armchair": 0.44959999084472657, + "IoU.seat": 0.6744000244140625, + "IoU.fence": 0.4677000045776367, + "IoU.desk": 0.48689998626708986, + "IoU.rock": 0.5427999877929688, + "IoU.wardrobe": 0.5611000061035156, + "IoU.lamp": 0.5456999969482422, + "IoU.bathtub": 0.8462000274658203, + "IoU.railing": 0.38779998779296876, + "IoU.cushion": 0.5934000015258789, + "IoU.base": 0.311299991607666, + "IoU.box": 0.2347999954223633, + "IoU.column": 0.47099998474121096, + "IoU.signboard": 0.34950000762939454, + "IoU.chest of drawers": 0.34580001831054685, + "IoU.counter": 0.36520000457763674, + "IoU.sand": 0.47319999694824216, + "IoU.sink": 0.6911000061035156, + "IoU.skyscraper": 0.48950000762939455, + "IoU.fireplace": 0.7144999694824219, + "IoU.refrigerator": 0.7488999938964844, + "IoU.grandstand": 0.4779000091552734, + "IoU.path": 0.18520000457763672, + "IoU.stairs": 0.3208000183105469, + "IoU.runway": 0.6313000106811524, + "IoU.case": 0.5756999969482421, + "IoU.pool table": 0.9286000061035157, + "IoU.pillow": 0.5431000137329102, + "IoU.screen door": 0.683499984741211, + "IoU.stairway": 0.4340999984741211, + "IoU.river": 0.18930000305175781, + "IoU.bridge": 0.5877000045776367, + "IoU.bookcase": 0.34150001525878904, + "IoU.blind": 0.36840000152587893, + "IoU.coffee table": 0.6569999694824219, + "IoU.toilet": 0.813499984741211, + "IoU.flower": 0.4102000045776367, + "IoU.book": 0.4109000015258789, + "IoU.hill": 0.13789999961853028, + "IoU.bench": 0.4675, + "IoU.countertop": 0.5561000061035156, + "IoU.stove": 0.7558000183105469, + "IoU.palm": 0.4656999969482422, + "IoU.kitchen island": 0.39610000610351564, + "IoU.computer": 0.7401999664306641, + "IoU.swivel chair": 0.4997999954223633, + "IoU.boat": 0.7218000030517578, + "IoU.bar": 0.49389999389648437, + "IoU.arcade machine": 0.3970999908447266, + "IoU.hovel": 0.19180000305175782, + "IoU.bus": 0.9066000366210938, + "IoU.towel": 0.6134999847412109, + "IoU.light": 0.19989999771118164, + "IoU.truck": 0.27860000610351565, + "IoU.tower": 0.23940000534057618, + "IoU.chandelier": 0.6190000152587891, + "IoU.awning": 0.176200008392334, + "IoU.streetlight": 0.15020000457763671, + "IoU.booth": 0.3945000076293945, + "IoU.television receiver": 0.6987999725341797, + "IoU.airplane": 0.6147000122070313, + "IoU.dirt track": 0.010499999523162842, + "IoU.apparel": 0.2763999938964844, + "IoU.pole": 0.20879999160766602, + "IoU.land": 0.009200000166893006, + "IoU.bannister": 0.06119999885559082, + "IoU.escalator": 0.605099983215332, + "IoU.ottoman": 0.5072000122070313, + "IoU.bottle": 0.2159000015258789, + "IoU.buffet": 0.44880001068115233, + "IoU.poster": 0.28829999923706057, + "IoU.stage": 0.12539999961853027, + "IoU.van": 0.278700008392334, + "IoU.ship": 0.5309999847412109, + "IoU.fountain": 0.22899999618530273, + "IoU.conveyer belt": 0.7791999816894531, + "IoU.canopy": 0.27899999618530275, + "IoU.washer": 0.6969000244140625, + "IoU.plaything": 0.34700000762939454, + "IoU.swimming pool": 0.7340000152587891, + "IoU.stool": 0.369900016784668, + "IoU.barrel": 0.6020000076293945, + "IoU.basket": 0.24649999618530274, + "IoU.waterfall": 0.57, + "IoU.tent": 0.9551000213623047, + "IoU.bag": 0.149399995803833, + "IoU.minibike": 0.7084999847412109, + "IoU.cradle": 0.739800033569336, + "IoU.oven": 0.4075, + "IoU.ball": 0.3243000030517578, + "IoU.food": 0.5383000183105469, + "IoU.step": 0.081899995803833, + "IoU.tank": 0.5752000045776368, + "IoU.trade name": 0.15279999732971192, + "IoU.microwave": 0.7484999847412109, + "IoU.pot": 0.45919998168945314, + "IoU.animal": 0.6430999755859375, + "IoU.bicycle": 0.5077000045776368, + "IoU.lake": 0.6052000045776367, + "IoU.dishwasher": 0.5515000152587891, + "IoU.screen": 0.5538000106811524, + "IoU.blanket": 0.12789999961853027, + "IoU.sculpture": 0.5904000091552735, + "IoU.hood": 0.38599998474121094, + "IoU.sconce": 0.2322999954223633, + "IoU.vase": 0.30790000915527344, + "IoU.traffic light": 0.18219999313354493, + "IoU.tray": 0.007599999904632568, + "IoU.ashcan": 0.3438999938964844, + "IoU.fan": 0.38610000610351564, + "IoU.pier": 0.3345999908447266, + "IoU.crt screen": 0.0, + "IoU.plate": 0.5061000061035156, + "IoU.monitor": 0.2578000068664551, + "IoU.bulletin board": 0.3833000183105469, + "IoU.shower": 0.03119999885559082, + "IoU.radiator": 0.5650999832153321, + "IoU.glass": 0.09439999580383301, + "IoU.clock": 0.2797999954223633, + "IoU.flag": 0.35209999084472654, + "Acc.wall": 0.9070999908447266, + "Acc.building": 0.9375, + "Acc.sky": 0.9766999816894532, + "Acc.floor": 0.9125, + "Acc.tree": 0.8813999938964844, + "Acc.ceiling": 0.9120999908447266, + "Acc.road": 0.9094999694824218, + "Acc.bed ": 0.9583000183105469, + "Acc.windowpane": 0.7441999816894531, + "Acc.grass": 0.793499984741211, + "Acc.cabinet": 0.7851000213623047, + "Acc.sidewalk": 0.795, + "Acc.person": 0.9102999877929687, + "Acc.earth": 0.6516999816894531, + "Acc.door": 0.6951000213623046, + "Acc.table": 0.7725, + "Acc.mountain": 0.7063999938964843, + "Acc.plant": 0.6061000061035157, + "Acc.curtain": 0.8298999786376953, + "Acc.chair": 0.691500015258789, + "Acc.car": 0.910199966430664, + "Acc.water": 0.6954000091552734, + "Acc.painting": 0.8183999633789063, + "Acc.sofa": 0.819800033569336, + "Acc.shelf": 0.5784999847412109, + "Acc.house": 0.6462000274658203, + "Acc.sea": 0.8387000274658203, + "Acc.mirror": 0.7447000122070313, + "Acc.rug": 0.6427999877929688, + "Acc.field": 0.5579999923706055, + "Acc.armchair": 0.6629000091552735, + "Acc.seat": 0.8436000061035156, + "Acc.fence": 0.6188000106811523, + "Acc.desk": 0.6894999694824219, + "Acc.rock": 0.6987999725341797, + "Acc.wardrobe": 0.6955000305175781, + "Acc.lamp": 0.6204999923706055, + "Acc.bathtub": 0.8813999938964844, + "Acc.railing": 0.4956000137329102, + "Acc.cushion": 0.6825, + "Acc.base": 0.5061999893188477, + "Acc.box": 0.2855999946594238, + "Acc.column": 0.5718999862670898, + "Acc.signboard": 0.4354999923706055, + "Acc.chest of drawers": 0.5354000091552734, + "Acc.counter": 0.47319999694824216, + "Acc.sand": 0.5647999954223633, + "Acc.sink": 0.7258999633789063, + "Acc.skyscraper": 0.5940000152587891, + "Acc.fireplace": 0.8929000091552735, + "Acc.refrigerator": 0.795199966430664, + "Acc.grandstand": 0.7555999755859375, + "Acc.path": 0.233799991607666, + "Acc.stairs": 0.4366999816894531, + "Acc.runway": 0.8044000244140626, + "Acc.case": 0.7168000030517578, + "Acc.pool table": 0.9487999725341797, + "Acc.pillow": 0.6365000152587891, + "Acc.screen door": 0.7480999755859375, + "Acc.stairway": 0.4918000030517578, + "Acc.river": 0.39040000915527345, + "Acc.bridge": 0.6706999969482422, + "Acc.bookcase": 0.6088999938964844, + "Acc.blind": 0.39310001373291015, + "Acc.coffee table": 0.7711000061035156, + "Acc.toilet": 0.895, + "Acc.flower": 0.5202999877929687, + "Acc.book": 0.5181999969482421, + "Acc.hill": 0.2290999984741211, + "Acc.bench": 0.5511000061035156, + "Acc.countertop": 0.7462999725341797, + "Acc.stove": 0.8062999725341797, + "Acc.palm": 0.5529000091552735, + "Acc.kitchen island": 0.48759998321533204, + "Acc.computer": 0.8433000183105469, + "Acc.swivel chair": 0.6125, + "Acc.boat": 0.8083999633789063, + "Acc.bar": 0.6011999893188477, + "Acc.arcade machine": 0.41900001525878905, + "Acc.hovel": 0.2022999954223633, + "Acc.bus": 0.9355000305175781, + "Acc.towel": 0.7301000213623047, + "Acc.light": 0.20670000076293946, + "Acc.truck": 0.3661000061035156, + "Acc.tower": 0.30920000076293946, + "Acc.chandelier": 0.7104000091552735, + "Acc.awning": 0.17989999771118165, + "Acc.streetlight": 0.17100000381469727, + "Acc.booth": 0.475, + "Acc.television receiver": 0.7683000183105468, + "Acc.airplane": 0.6491999816894531, + "Acc.dirt track": 0.037100000381469725, + "Acc.apparel": 0.3377000045776367, + "Acc.pole": 0.29850000381469727, + "Acc.land": 0.012200000286102295, + "Acc.bannister": 0.07440000057220458, + "Acc.escalator": 0.7733999633789063, + "Acc.ottoman": 0.6744999694824219, + "Acc.bottle": 0.2671999931335449, + "Acc.buffet": 0.5136999893188476, + "Acc.poster": 0.41259998321533203, + "Acc.stage": 0.1743000030517578, + "Acc.van": 0.3270000076293945, + "Acc.ship": 0.5438000106811524, + "Acc.fountain": 0.23149999618530273, + "Acc.conveyer belt": 0.9151000213623047, + "Acc.canopy": 0.30590000152587893, + "Acc.washer": 0.7255000305175782, + "Acc.plaything": 0.5861000061035156, + "Acc.swimming pool": 0.8168000030517578, + "Acc.stool": 0.43349998474121093, + "Acc.barrel": 0.6405999755859375, + "Acc.basket": 0.2981999969482422, + "Acc.waterfall": 0.6338000106811523, + "Acc.tent": 0.9698999786376953, + "Acc.bag": 0.16370000839233398, + "Acc.minibike": 0.7825, + "Acc.cradle": 0.9462000274658203, + "Acc.oven": 0.5943000030517578, + "Acc.ball": 0.35310001373291017, + "Acc.food": 0.6104000091552735, + "Acc.step": 0.10699999809265137, + "Acc.tank": 0.629000015258789, + "Acc.trade name": 0.15880000114440918, + "Acc.microwave": 0.7783999633789063, + "Acc.pot": 0.5122000122070313, + "Acc.animal": 0.6681999969482422, + "Acc.bicycle": 0.5738999938964844, + "Acc.lake": 0.6858000183105468, + "Acc.dishwasher": 0.6215000152587891, + "Acc.screen": 0.6808999633789062, + "Acc.blanket": 0.13880000114440919, + "Acc.sculpture": 0.7616999816894531, + "Acc.hood": 0.44599998474121094, + "Acc.sconce": 0.2811000061035156, + "Acc.vase": 0.4052000045776367, + "Acc.traffic light": 0.21729999542236328, + "Acc.tray": 0.008199999928474427, + "Acc.ashcan": 0.5420000076293945, + "Acc.fan": 0.4234999847412109, + "Acc.pier": 0.41720001220703123, + "Acc.crt screen": 0.0, + "Acc.plate": 0.6566999816894531, + "Acc.monitor": 0.32119998931884763, + "Acc.bulletin board": 0.467599983215332, + "Acc.shower": 0.04110000133514404, + "Acc.radiator": 0.634000015258789, + "Acc.glass": 0.096899995803833, + "Acc.clock": 0.30950000762939456, + "Acc.flag": 0.3584000015258789 + } + }, + "79": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8329000000000001, + "mIoU": 0.4715, + "mAcc": 0.5523, + "IoU.wall": 0.7697000122070312, + "IoU.building": 0.8323000335693359, + "IoU.sky": 0.9354000091552734, + "IoU.floor": 0.8125, + "IoU.tree": 0.7333999633789062, + "IoU.ceiling": 0.8337999725341797, + "IoU.road": 0.8280000305175781, + "IoU.bed ": 0.8862000274658203, + "IoU.windowpane": 0.6052000045776367, + "IoU.grass": 0.6947000122070313, + "IoU.cabinet": 0.635, + "IoU.sidewalk": 0.6493000030517578, + "IoU.person": 0.8129000091552734, + "IoU.earth": 0.41310001373291017, + "IoU.door": 0.5118999862670899, + "IoU.table": 0.6188999938964844, + "IoU.mountain": 0.5895000076293946, + "IoU.plant": 0.5104000091552734, + "IoU.curtain": 0.7461000061035157, + "IoU.chair": 0.5549000167846679, + "IoU.car": 0.8334999847412109, + "IoU.water": 0.5838999938964844, + "IoU.painting": 0.7158000183105468, + "IoU.sofa": 0.6881999969482422, + "IoU.shelf": 0.4293000030517578, + "IoU.house": 0.4743000030517578, + "IoU.sea": 0.6830999755859375, + "IoU.mirror": 0.6873999786376953, + "IoU.rug": 0.5645000076293946, + "IoU.field": 0.35060001373291017, + "IoU.armchair": 0.45310001373291015, + "IoU.seat": 0.6811000061035156, + "IoU.fence": 0.4306999969482422, + "IoU.desk": 0.49720001220703125, + "IoU.rock": 0.5102000045776367, + "IoU.wardrobe": 0.5277000045776368, + "IoU.lamp": 0.5438999938964844, + "IoU.bathtub": 0.8284999847412109, + "IoU.railing": 0.3843999862670898, + "IoU.cushion": 0.590900001525879, + "IoU.base": 0.29469999313354495, + "IoU.box": 0.2502000045776367, + "IoU.column": 0.4704000091552734, + "IoU.signboard": 0.33669998168945314, + "IoU.chest of drawers": 0.36790000915527343, + "IoU.counter": 0.3843999862670898, + "IoU.sand": 0.43130001068115237, + "IoU.sink": 0.6812999725341797, + "IoU.skyscraper": 0.49849998474121093, + "IoU.fireplace": 0.6823000335693359, + "IoU.refrigerator": 0.7390000152587891, + "IoU.grandstand": 0.49130001068115237, + "IoU.path": 0.19620000839233398, + "IoU.stairs": 0.21899999618530275, + "IoU.runway": 0.6616000366210938, + "IoU.case": 0.5654000091552734, + "IoU.pool table": 0.9280000305175782, + "IoU.pillow": 0.4909000015258789, + "IoU.screen door": 0.7030000305175781, + "IoU.stairway": 0.32470001220703126, + "IoU.river": 0.15989999771118163, + "IoU.bridge": 0.5113000106811524, + "IoU.bookcase": 0.35139999389648435, + "IoU.blind": 0.349900016784668, + "IoU.coffee table": 0.6552999877929687, + "IoU.toilet": 0.8461000061035157, + "IoU.flower": 0.3486000061035156, + "IoU.book": 0.4481999969482422, + "IoU.hill": 0.1390999984741211, + "IoU.bench": 0.45529998779296876, + "IoU.countertop": 0.5597000122070312, + "IoU.stove": 0.7362000274658204, + "IoU.palm": 0.4231999969482422, + "IoU.kitchen island": 0.3110000038146973, + "IoU.computer": 0.6302000045776367, + "IoU.swivel chair": 0.42380001068115236, + "IoU.boat": 0.6848000335693359, + "IoU.bar": 0.5408000183105469, + "IoU.arcade machine": 0.5922000122070312, + "IoU.hovel": 0.25989999771118166, + "IoU.bus": 0.8958000183105469, + "IoU.towel": 0.6329999923706054, + "IoU.light": 0.1468000030517578, + "IoU.truck": 0.21860000610351563, + "IoU.tower": 0.18670000076293947, + "IoU.chandelier": 0.6033000183105469, + "IoU.awning": 0.1386999988555908, + "IoU.streetlight": 0.12260000228881836, + "IoU.booth": 0.35450000762939454, + "IoU.television receiver": 0.65, + "IoU.airplane": 0.5804000091552735, + "IoU.dirt track": 0.05409999847412109, + "IoU.apparel": 0.316200008392334, + "IoU.pole": 0.16840000152587892, + "IoU.land": 0.011799999475479127, + "IoU.bannister": 0.07980000019073487, + "IoU.escalator": 0.6270000076293946, + "IoU.ottoman": 0.5086999893188476, + "IoU.bottle": 0.22010000228881835, + "IoU.buffet": 0.33790000915527346, + "IoU.poster": 0.1840999984741211, + "IoU.stage": 0.09069999694824218, + "IoU.van": 0.22600000381469726, + "IoU.ship": 0.0696999979019165, + "IoU.fountain": 0.19190000534057616, + "IoU.conveyer belt": 0.7802999877929687, + "IoU.canopy": 0.19049999237060547, + "IoU.washer": 0.7204000091552735, + "IoU.plaything": 0.4179000091552734, + "IoU.swimming pool": 0.7543000030517578, + "IoU.stool": 0.3781999969482422, + "IoU.barrel": 0.59, + "IoU.basket": 0.2427000045776367, + "IoU.waterfall": 0.4759999847412109, + "IoU.tent": 0.9523000335693359, + "IoU.bag": 0.14479999542236327, + "IoU.minibike": 0.6819000244140625, + "IoU.cradle": 0.8037999725341797, + "IoU.oven": 0.21760000228881837, + "IoU.ball": 0.5418999862670898, + "IoU.food": 0.5906999969482422, + "IoU.step": 0.08869999885559082, + "IoU.tank": 0.450099983215332, + "IoU.trade name": 0.09180000305175781, + "IoU.microwave": 0.43709999084472656, + "IoU.pot": 0.4293000030517578, + "IoU.animal": 0.6056999969482422, + "IoU.bicycle": 0.5281000137329102, + "IoU.lake": 0.5691999816894531, + "IoU.dishwasher": 0.6513999938964844, + "IoU.screen": 0.5816999816894531, + "IoU.blanket": 0.11710000038146973, + "IoU.sculpture": 0.6897000122070313, + "IoU.hood": 0.38790000915527345, + "IoU.sconce": 0.2272999954223633, + "IoU.vase": 0.3384000015258789, + "IoU.traffic light": 0.1909000015258789, + "IoU.tray": 0.03119999885559082, + "IoU.ashcan": 0.36220001220703124, + "IoU.fan": 0.39930000305175783, + "IoU.pier": 0.33060001373291015, + "IoU.crt screen": 0.07510000228881836, + "IoU.plate": 0.4993000030517578, + "IoU.monitor": 0.44529998779296875, + "IoU.bulletin board": 0.3784000015258789, + "IoU.shower": 0.01059999942779541, + "IoU.radiator": 0.5636000061035156, + "IoU.glass": 0.061999998092651366, + "IoU.clock": 0.22959999084472657, + "IoU.flag": 0.28049999237060547, + "Acc.wall": 0.9219999694824219, + "Acc.building": 0.9429000091552734, + "Acc.sky": 0.9780999755859375, + "Acc.floor": 0.9201000213623047, + "Acc.tree": 0.8848999786376953, + "Acc.ceiling": 0.9129000091552735, + "Acc.road": 0.9205999755859375, + "Acc.bed ": 0.9536000061035156, + "Acc.windowpane": 0.716500015258789, + "Acc.grass": 0.8011000061035156, + "Acc.cabinet": 0.7818000030517578, + "Acc.sidewalk": 0.7862000274658203, + "Acc.person": 0.908499984741211, + "Acc.earth": 0.6327000045776368, + "Acc.door": 0.669000015258789, + "Acc.table": 0.7643000030517578, + "Acc.mountain": 0.6991000366210938, + "Acc.plant": 0.6041999816894531, + "Acc.curtain": 0.8266999816894531, + "Acc.chair": 0.6612999725341797, + "Acc.car": 0.900199966430664, + "Acc.water": 0.726500015258789, + "Acc.painting": 0.8098000335693359, + "Acc.sofa": 0.847699966430664, + "Acc.shelf": 0.594900016784668, + "Acc.house": 0.5681000137329102, + "Acc.sea": 0.8405999755859375, + "Acc.mirror": 0.7370999908447265, + "Acc.rug": 0.6154999923706055, + "Acc.field": 0.5236999893188476, + "Acc.armchair": 0.6609999847412109, + "Acc.seat": 0.8358000183105468, + "Acc.fence": 0.5875, + "Acc.desk": 0.688499984741211, + "Acc.rock": 0.6554000091552734, + "Acc.wardrobe": 0.6502999877929687, + "Acc.lamp": 0.6075, + "Acc.bathtub": 0.8641999816894531, + "Acc.railing": 0.49080001831054687, + "Acc.cushion": 0.6537000274658203, + "Acc.base": 0.4613999938964844, + "Acc.box": 0.3034000015258789, + "Acc.column": 0.5515999984741211, + "Acc.signboard": 0.39470001220703127, + "Acc.chest of drawers": 0.5147000122070312, + "Acc.counter": 0.5104000091552734, + "Acc.sand": 0.5697999954223633, + "Acc.sink": 0.7166999816894531, + "Acc.skyscraper": 0.5675, + "Acc.fireplace": 0.7659999847412109, + "Acc.refrigerator": 0.7736000061035156, + "Acc.grandstand": 0.7402999877929688, + "Acc.path": 0.24170000076293946, + "Acc.stairs": 0.2763999938964844, + "Acc.runway": 0.8552999877929688, + "Acc.case": 0.7052999877929688, + "Acc.pool table": 0.9498999786376953, + "Acc.pillow": 0.5518000030517578, + "Acc.screen door": 0.727699966430664, + "Acc.stairway": 0.4615999984741211, + "Acc.river": 0.342599983215332, + "Acc.bridge": 0.5706000137329101, + "Acc.bookcase": 0.5654999923706054, + "Acc.blind": 0.3668000030517578, + "Acc.coffee table": 0.7337000274658203, + "Acc.toilet": 0.8912999725341797, + "Acc.flower": 0.4136000061035156, + "Acc.book": 0.5693000030517578, + "Acc.hill": 0.21629999160766603, + "Acc.bench": 0.5095999908447265, + "Acc.countertop": 0.7404000091552735, + "Acc.stove": 0.7695999908447265, + "Acc.palm": 0.47810001373291017, + "Acc.kitchen island": 0.37560001373291013, + "Acc.computer": 0.6940000152587891, + "Acc.swivel chair": 0.5090999984741211, + "Acc.boat": 0.7248999786376953, + "Acc.bar": 0.6245000076293945, + "Acc.arcade machine": 0.619900016784668, + "Acc.hovel": 0.26920000076293943, + "Acc.bus": 0.9094999694824218, + "Acc.towel": 0.719800033569336, + "Acc.light": 0.1493000030517578, + "Acc.truck": 0.27549999237060546, + "Acc.tower": 0.23350000381469727, + "Acc.chandelier": 0.6919000244140625, + "Acc.awning": 0.14229999542236327, + "Acc.streetlight": 0.13859999656677247, + "Acc.booth": 0.3683000183105469, + "Acc.television receiver": 0.6959999847412109, + "Acc.airplane": 0.6190000152587891, + "Acc.dirt track": 0.1634000015258789, + "Acc.apparel": 0.40549999237060547, + "Acc.pole": 0.203700008392334, + "Acc.land": 0.01399999976158142, + "Acc.bannister": 0.09420000076293945, + "Acc.escalator": 0.7858999633789062, + "Acc.ottoman": 0.667300033569336, + "Acc.bottle": 0.25239999771118166, + "Acc.buffet": 0.3895999908447266, + "Acc.poster": 0.2893000030517578, + "Acc.stage": 0.1125, + "Acc.van": 0.2545000076293945, + "Acc.ship": 0.07400000095367432, + "Acc.fountain": 0.1925, + "Acc.conveyer belt": 0.9166999816894531, + "Acc.canopy": 0.19690000534057617, + "Acc.washer": 0.74, + "Acc.plaything": 0.5425, + "Acc.swimming pool": 0.8030999755859375, + "Acc.stool": 0.429900016784668, + "Acc.barrel": 0.6377999877929688, + "Acc.basket": 0.2734000015258789, + "Acc.waterfall": 0.5597000122070312, + "Acc.tent": 0.9616999816894531, + "Acc.bag": 0.15609999656677245, + "Acc.minibike": 0.732699966430664, + "Acc.cradle": 0.9469000244140625, + "Acc.oven": 0.5541999816894532, + "Acc.ball": 0.5802999877929688, + "Acc.food": 0.674000015258789, + "Acc.step": 0.10779999732971192, + "Acc.tank": 0.5063999938964844, + "Acc.trade name": 0.09470000267028808, + "Acc.microwave": 0.45430000305175783, + "Acc.pot": 0.46279998779296877, + "Acc.animal": 0.629000015258789, + "Acc.bicycle": 0.5902000045776368, + "Acc.lake": 0.6727999877929688, + "Acc.dishwasher": 0.6726000213623047, + "Acc.screen": 0.6733999633789063, + "Acc.blanket": 0.13289999961853027, + "Acc.sculpture": 0.7877999877929688, + "Acc.hood": 0.44459999084472657, + "Acc.sconce": 0.26059999465942385, + "Acc.vase": 0.42369998931884767, + "Acc.traffic light": 0.22100000381469725, + "Acc.tray": 0.034200000762939456, + "Acc.ashcan": 0.5125999832153321, + "Acc.fan": 0.45229999542236327, + "Acc.pier": 0.38799999237060545, + "Acc.crt screen": 0.10689999580383301, + "Acc.plate": 0.6066999816894532, + "Acc.monitor": 0.6677999877929688, + "Acc.bulletin board": 0.42439998626708986, + "Acc.shower": 0.015399999618530273, + "Acc.radiator": 0.5983000183105469, + "Acc.glass": 0.06300000190734863, + "Acc.clock": 0.24110000610351562, + "Acc.flag": 0.28420000076293944 + } + }, + "80": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8162999999999999, + "mIoU": 0.4484, + "mAcc": 0.5646, + "IoU.wall": 0.7572000122070313, + "IoU.building": 0.8205000305175781, + "IoU.sky": 0.9366000366210937, + "IoU.floor": 0.8012000274658203, + "IoU.tree": 0.7322000122070312, + "IoU.ceiling": 0.8270999908447265, + "IoU.road": 0.8194000244140625, + "IoU.bed ": 0.8630000305175781, + "IoU.windowpane": 0.6045999908447266, + "IoU.grass": 0.6544000244140625, + "IoU.cabinet": 0.5820999908447265, + "IoU.sidewalk": 0.6252999877929688, + "IoU.person": 0.7641999816894531, + "IoU.earth": 0.3295999908447266, + "IoU.door": 0.4490999984741211, + "IoU.table": 0.5454000091552734, + "IoU.mountain": 0.5640000152587891, + "IoU.plant": 0.49450000762939456, + "IoU.curtain": 0.7244999694824219, + "IoU.chair": 0.5029000091552734, + "IoU.car": 0.8137000274658203, + "IoU.water": 0.5133000183105468, + "IoU.painting": 0.6762999725341797, + "IoU.sofa": 0.6359999847412109, + "IoU.shelf": 0.4052999877929688, + "IoU.house": 0.5356999969482422, + "IoU.sea": 0.6020000076293945, + "IoU.mirror": 0.630099983215332, + "IoU.rug": 0.6676999664306641, + "IoU.field": 0.32060001373291014, + "IoU.armchair": 0.4370000076293945, + "IoU.seat": 0.5886999893188477, + "IoU.fence": 0.38119998931884763, + "IoU.desk": 0.45610000610351564, + "IoU.rock": 0.37790000915527344, + "IoU.wardrobe": 0.4490999984741211, + "IoU.lamp": 0.5329000091552735, + "IoU.bathtub": 0.7341999816894531, + "IoU.railing": 0.29719999313354495, + "IoU.cushion": 0.527400016784668, + "IoU.base": 0.24649999618530274, + "IoU.box": 0.2175, + "IoU.column": 0.44049999237060544, + "IoU.signboard": 0.32380001068115233, + "IoU.chest of drawers": 0.33180000305175783, + "IoU.counter": 0.2727000045776367, + "IoU.sand": 0.38529998779296876, + "IoU.sink": 0.6675, + "IoU.skyscraper": 0.5108000183105469, + "IoU.fireplace": 0.6868000030517578, + "IoU.refrigerator": 0.7611000061035156, + "IoU.grandstand": 0.4552000045776367, + "IoU.path": 0.2231999969482422, + "IoU.stairs": 0.2593000030517578, + "IoU.runway": 0.6573999786376953, + "IoU.case": 0.48150001525878905, + "IoU.pool table": 0.9123999786376953, + "IoU.pillow": 0.5272000122070313, + "IoU.screen door": 0.5554000091552734, + "IoU.stairway": 0.2528000068664551, + "IoU.river": 0.2531999969482422, + "IoU.bridge": 0.6822000122070313, + "IoU.bookcase": 0.34689998626708984, + "IoU.blind": 0.38479999542236326, + "IoU.coffee table": 0.574900016784668, + "IoU.toilet": 0.8162999725341797, + "IoU.flower": 0.34509998321533203, + "IoU.book": 0.41700000762939454, + "IoU.hill": 0.0784000015258789, + "IoU.bench": 0.4229000091552734, + "IoU.countertop": 0.5581000137329102, + "IoU.stove": 0.6852999877929687, + "IoU.palm": 0.4441999816894531, + "IoU.kitchen island": 0.35830001831054686, + "IoU.computer": 0.6377000045776368, + "IoU.swivel chair": 0.42069999694824217, + "IoU.boat": 0.6908999633789062, + "IoU.bar": 0.4986999893188477, + "IoU.arcade machine": 0.4136000061035156, + "IoU.hovel": 0.5309000015258789, + "IoU.bus": 0.7576000213623046, + "IoU.towel": 0.5422999954223633, + "IoU.light": 0.32229999542236326, + "IoU.truck": 0.1615999984741211, + "IoU.tower": 0.22719999313354491, + "IoU.chandelier": 0.5827000045776367, + "IoU.awning": 0.3120000076293945, + "IoU.streetlight": 0.14539999961853028, + "IoU.booth": 0.31790000915527344, + "IoU.television receiver": 0.6240999984741211, + "IoU.airplane": 0.6154999923706055, + "IoU.dirt track": 0.09840000152587891, + "IoU.apparel": 0.37909999847412107, + "IoU.pole": 0.185, + "IoU.land": 0.03559999942779541, + "IoU.bannister": 0.09640000343322754, + "IoU.escalator": 0.27899999618530275, + "IoU.ottoman": 0.47330001831054686, + "IoU.bottle": 0.32990001678466796, + "IoU.buffet": 0.3883000183105469, + "IoU.poster": 0.15989999771118163, + "IoU.stage": 0.20420000076293945, + "IoU.van": 0.41069999694824216, + "IoU.ship": 0.6677999877929688, + "IoU.fountain": 0.19540000915527345, + "IoU.conveyer belt": 0.5427000045776367, + "IoU.canopy": 0.22120000839233397, + "IoU.washer": 0.7043000030517578, + "IoU.plaything": 0.24469999313354493, + "IoU.swimming pool": 0.6124000167846679, + "IoU.stool": 0.2034000015258789, + "IoU.barrel": 0.4622000122070313, + "IoU.basket": 0.24489999771118165, + "IoU.waterfall": 0.6826000213623047, + "IoU.tent": 0.9216000366210938, + "IoU.bag": 0.1034000015258789, + "IoU.minibike": 0.543499984741211, + "IoU.cradle": 0.7408999633789063, + "IoU.oven": 0.17149999618530273, + "IoU.ball": 0.3813999938964844, + "IoU.food": 0.5441999816894532, + "IoU.step": 0.12710000038146974, + "IoU.tank": 0.49509998321533205, + "IoU.trade name": 0.22790000915527345, + "IoU.microwave": 0.35580001831054686, + "IoU.pot": 0.33689998626708983, + "IoU.animal": 0.5943999862670899, + "IoU.bicycle": 0.4634000015258789, + "IoU.lake": 0.03609999895095825, + "IoU.dishwasher": 0.4972999954223633, + "IoU.screen": 0.5940999984741211, + "IoU.blanket": 0.07820000171661377, + "IoU.sculpture": 0.48509998321533204, + "IoU.hood": 0.47630001068115235, + "IoU.sconce": 0.35139999389648435, + "IoU.vase": 0.22829999923706054, + "IoU.traffic light": 0.256200008392334, + "IoU.tray": 0.02700000047683716, + "IoU.ashcan": 0.32110000610351563, + "IoU.fan": 0.5027999877929688, + "IoU.pier": 0.29920000076293946, + "IoU.crt screen": 0.0, + "IoU.plate": 0.4070999908447266, + "IoU.monitor": 0.04369999885559082, + "IoU.bulletin board": 0.33189998626708983, + "IoU.shower": 0.001899999976158142, + "IoU.radiator": 0.5547000122070312, + "IoU.glass": 0.05559999942779541, + "IoU.clock": 0.2325, + "IoU.flag": 0.33220001220703127, + "Acc.wall": 0.8779000091552734, + "Acc.building": 0.9229000091552735, + "Acc.sky": 0.9762000274658204, + "Acc.floor": 0.9008999633789062, + "Acc.tree": 0.8619000244140625, + "Acc.ceiling": 0.9145999908447265, + "Acc.road": 0.8962000274658203, + "Acc.bed ": 0.9466999816894531, + "Acc.windowpane": 0.7679000091552735, + "Acc.grass": 0.8012000274658203, + "Acc.cabinet": 0.6911000061035156, + "Acc.sidewalk": 0.7713999938964844, + "Acc.person": 0.9130000305175782, + "Acc.earth": 0.4559000015258789, + "Acc.door": 0.6109000015258789, + "Acc.table": 0.7083000183105469, + "Acc.mountain": 0.7051000213623047, + "Acc.plant": 0.6166999816894532, + "Acc.curtain": 0.8354000091552735, + "Acc.chair": 0.6388999938964843, + "Acc.car": 0.9037999725341797, + "Acc.water": 0.6633000183105469, + "Acc.painting": 0.8433999633789062, + "Acc.sofa": 0.7945999908447265, + "Acc.shelf": 0.625, + "Acc.house": 0.6587000274658203, + "Acc.sea": 0.8858000183105469, + "Acc.mirror": 0.7191999816894531, + "Acc.rug": 0.7423000335693359, + "Acc.field": 0.5654999923706054, + "Acc.armchair": 0.6333000183105468, + "Acc.seat": 0.770199966430664, + "Acc.fence": 0.5143000030517578, + "Acc.desk": 0.6841000366210938, + "Acc.rock": 0.6216999816894532, + "Acc.wardrobe": 0.5904999923706055, + "Acc.lamp": 0.6626999664306641, + "Acc.bathtub": 0.8094999694824219, + "Acc.railing": 0.4670999908447266, + "Acc.cushion": 0.6691000366210937, + "Acc.base": 0.4118000030517578, + "Acc.box": 0.29940000534057615, + "Acc.column": 0.5565999984741211, + "Acc.signboard": 0.4261000061035156, + "Acc.chest of drawers": 0.5552999877929687, + "Acc.counter": 0.3890000152587891, + "Acc.sand": 0.5265999984741211, + "Acc.sink": 0.754000015258789, + "Acc.skyscraper": 0.6011000061035157, + "Acc.fireplace": 0.8904000091552734, + "Acc.refrigerator": 0.8408999633789063, + "Acc.grandstand": 0.7080000305175781, + "Acc.path": 0.31909999847412107, + "Acc.stairs": 0.33680000305175783, + "Acc.runway": 0.8126999664306641, + "Acc.case": 0.6018999862670898, + "Acc.pool table": 0.9593000030517578, + "Acc.pillow": 0.6138000106811523, + "Acc.screen door": 0.6502999877929687, + "Acc.stairway": 0.3672999954223633, + "Acc.river": 0.4086000061035156, + "Acc.bridge": 0.8131999969482422, + "Acc.bookcase": 0.5677999877929687, + "Acc.blind": 0.4229999923706055, + "Acc.coffee table": 0.7844999694824218, + "Acc.toilet": 0.8780999755859376, + "Acc.flower": 0.5318000030517578, + "Acc.book": 0.5763000106811523, + "Acc.hill": 0.154399995803833, + "Acc.bench": 0.5086999893188476, + "Acc.countertop": 0.696500015258789, + "Acc.stove": 0.7705000305175781, + "Acc.palm": 0.6202000045776367, + "Acc.kitchen island": 0.7213999938964843, + "Acc.computer": 0.7863999938964844, + "Acc.swivel chair": 0.5449000167846679, + "Acc.boat": 0.8433000183105469, + "Acc.bar": 0.6498999786376953, + "Acc.arcade machine": 0.44740001678466795, + "Acc.hovel": 0.6004000091552735, + "Acc.bus": 0.9091999816894532, + "Acc.towel": 0.7173000335693359, + "Acc.light": 0.35009998321533203, + "Acc.truck": 0.22739999771118163, + "Acc.tower": 0.3136000061035156, + "Acc.chandelier": 0.7370999908447265, + "Acc.awning": 0.36849998474121093, + "Acc.streetlight": 0.16030000686645507, + "Acc.booth": 0.3990999984741211, + "Acc.television receiver": 0.7320999908447265, + "Acc.airplane": 0.6877999877929688, + "Acc.dirt track": 0.15689999580383301, + "Acc.apparel": 0.530099983215332, + "Acc.pole": 0.23639999389648436, + "Acc.land": 0.059600000381469724, + "Acc.bannister": 0.12800000190734864, + "Acc.escalator": 0.32560001373291014, + "Acc.ottoman": 0.5945999908447266, + "Acc.bottle": 0.5634000015258789, + "Acc.buffet": 0.43259998321533205, + "Acc.poster": 0.20090000152587892, + "Acc.stage": 0.33369998931884765, + "Acc.van": 0.5179999923706055, + "Acc.ship": 0.745, + "Acc.fountain": 0.20700000762939452, + "Acc.conveyer belt": 0.7395999908447266, + "Acc.canopy": 0.32970001220703127, + "Acc.washer": 0.7138999938964844, + "Acc.plaything": 0.4638999938964844, + "Acc.swimming pool": 0.8388999938964844, + "Acc.stool": 0.27360000610351565, + "Acc.barrel": 0.5740999984741211, + "Acc.basket": 0.30260000228881834, + "Acc.waterfall": 0.7826999664306641, + "Acc.tent": 0.9912999725341797, + "Acc.bag": 0.11979999542236328, + "Acc.minibike": 0.6630999755859375, + "Acc.cradle": 0.9673999786376953, + "Acc.oven": 0.4665999984741211, + "Acc.ball": 0.46110000610351565, + "Acc.food": 0.709800033569336, + "Acc.step": 0.15640000343322755, + "Acc.tank": 0.6179000091552734, + "Acc.trade name": 0.26549999237060545, + "Acc.microwave": 0.3997999954223633, + "Acc.pot": 0.39220001220703127, + "Acc.animal": 0.6405000305175781, + "Acc.bicycle": 0.6916999816894531, + "Acc.lake": 0.03609999895095825, + "Acc.dishwasher": 0.5747999954223633, + "Acc.screen": 0.9286000061035157, + "Acc.blanket": 0.08760000228881835, + "Acc.sculpture": 0.6359999847412109, + "Acc.hood": 0.5279000091552735, + "Acc.sconce": 0.4259999847412109, + "Acc.vase": 0.29450000762939454, + "Acc.traffic light": 0.42220001220703124, + "Acc.tray": 0.037799999713897706, + "Acc.ashcan": 0.455, + "Acc.fan": 0.6536000061035157, + "Acc.pier": 0.4677000045776367, + "Acc.crt screen": 0.0, + "Acc.plate": 0.5161999893188477, + "Acc.monitor": 0.05760000228881836, + "Acc.bulletin board": 0.4552000045776367, + "Acc.shower": 0.010099999904632569, + "Acc.radiator": 0.5988000106811523, + "Acc.glass": 0.05809999942779541, + "Acc.clock": 0.258700008392334, + "Acc.flag": 0.40799999237060547 + } + }, + "81": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8172, + "mIoU": 0.45520000000000005, + "mAcc": 0.5733, + "IoU.wall": 0.7568000030517578, + "IoU.building": 0.8180999755859375, + "IoU.sky": 0.936500015258789, + "IoU.floor": 0.7973999786376953, + "IoU.tree": 0.7311000061035157, + "IoU.ceiling": 0.8269999694824218, + "IoU.road": 0.8233000183105469, + "IoU.bed ": 0.8691000366210937, + "IoU.windowpane": 0.6063000106811524, + "IoU.grass": 0.6416000366210938, + "IoU.cabinet": 0.58, + "IoU.sidewalk": 0.6311000061035156, + "IoU.person": 0.7698999786376953, + "IoU.earth": 0.33810001373291015, + "IoU.door": 0.4679000091552734, + "IoU.table": 0.5604000091552734, + "IoU.mountain": 0.5568999862670898, + "IoU.plant": 0.49810001373291013, + "IoU.curtain": 0.7301000213623047, + "IoU.chair": 0.5088000106811523, + "IoU.car": 0.8144000244140625, + "IoU.water": 0.5268999862670899, + "IoU.painting": 0.6731999969482422, + "IoU.sofa": 0.6220000076293946, + "IoU.shelf": 0.40869998931884766, + "IoU.house": 0.4929999923706055, + "IoU.sea": 0.6170999908447266, + "IoU.mirror": 0.6580999755859375, + "IoU.rug": 0.6648999786376953, + "IoU.field": 0.28889999389648435, + "IoU.armchair": 0.43099998474121093, + "IoU.seat": 0.6033000183105469, + "IoU.fence": 0.40380001068115234, + "IoU.desk": 0.44009998321533206, + "IoU.rock": 0.39540000915527346, + "IoU.wardrobe": 0.47569999694824217, + "IoU.lamp": 0.5384000015258789, + "IoU.bathtub": 0.7237000274658203, + "IoU.railing": 0.30540000915527343, + "IoU.cushion": 0.5229999923706055, + "IoU.base": 0.24829999923706056, + "IoU.box": 0.2181999969482422, + "IoU.column": 0.4534000015258789, + "IoU.signboard": 0.322599983215332, + "IoU.chest of drawers": 0.3060000038146973, + "IoU.counter": 0.24829999923706056, + "IoU.sand": 0.42119998931884767, + "IoU.sink": 0.6548999786376953, + "IoU.skyscraper": 0.5066999816894531, + "IoU.fireplace": 0.7358000183105469, + "IoU.refrigerator": 0.7525, + "IoU.grandstand": 0.405, + "IoU.path": 0.23, + "IoU.stairs": 0.26059999465942385, + "IoU.runway": 0.6683999633789063, + "IoU.case": 0.5272000122070313, + "IoU.pool table": 0.9070999908447266, + "IoU.pillow": 0.558499984741211, + "IoU.screen door": 0.6406999969482422, + "IoU.stairway": 0.3125, + "IoU.river": 0.2134000015258789, + "IoU.bridge": 0.6722000122070313, + "IoU.bookcase": 0.3443000030517578, + "IoU.blind": 0.4077000045776367, + "IoU.coffee table": 0.5852000045776368, + "IoU.toilet": 0.8151999664306641, + "IoU.flower": 0.3213999938964844, + "IoU.book": 0.4340999984741211, + "IoU.hill": 0.08939999580383301, + "IoU.bench": 0.425, + "IoU.countertop": 0.5508000183105469, + "IoU.stove": 0.691500015258789, + "IoU.palm": 0.46110000610351565, + "IoU.kitchen island": 0.36840000152587893, + "IoU.computer": 0.7170999908447265, + "IoU.swivel chair": 0.4152000045776367, + "IoU.boat": 0.709800033569336, + "IoU.bar": 0.510099983215332, + "IoU.arcade machine": 0.4072000122070312, + "IoU.hovel": 0.4793000030517578, + "IoU.bus": 0.7677999877929688, + "IoU.towel": 0.5540999984741211, + "IoU.light": 0.31489999771118166, + "IoU.truck": 0.24860000610351562, + "IoU.tower": 0.3240000152587891, + "IoU.chandelier": 0.5991999816894531, + "IoU.awning": 0.3, + "IoU.streetlight": 0.15510000228881837, + "IoU.booth": 0.31479999542236325, + "IoU.television receiver": 0.5979000091552734, + "IoU.airplane": 0.584900016784668, + "IoU.dirt track": 0.11829999923706054, + "IoU.apparel": 0.3256999969482422, + "IoU.pole": 0.18760000228881835, + "IoU.land": 0.013700000047683715, + "IoU.bannister": 0.105, + "IoU.escalator": 0.3290000152587891, + "IoU.ottoman": 0.44369998931884763, + "IoU.bottle": 0.3122999954223633, + "IoU.buffet": 0.4040999984741211, + "IoU.poster": 0.12979999542236328, + "IoU.stage": 0.17909999847412109, + "IoU.van": 0.4222999954223633, + "IoU.ship": 0.7183000183105469, + "IoU.fountain": 0.1896999931335449, + "IoU.conveyer belt": 0.6633999633789063, + "IoU.canopy": 0.19930000305175782, + "IoU.washer": 0.6975, + "IoU.plaything": 0.23270000457763673, + "IoU.swimming pool": 0.6352999877929687, + "IoU.stool": 0.24760000228881837, + "IoU.barrel": 0.5463000106811523, + "IoU.basket": 0.2240999984741211, + "IoU.waterfall": 0.6666999816894531, + "IoU.tent": 0.8898999786376953, + "IoU.bag": 0.09399999618530273, + "IoU.minibike": 0.5202000045776367, + "IoU.cradle": 0.7655999755859375, + "IoU.oven": 0.1777000045776367, + "IoU.ball": 0.37849998474121094, + "IoU.food": 0.5484000015258789, + "IoU.step": 0.1409000015258789, + "IoU.tank": 0.5047999954223633, + "IoU.trade name": 0.23149999618530273, + "IoU.microwave": 0.3825, + "IoU.pot": 0.34189998626708984, + "IoU.animal": 0.5961000061035157, + "IoU.bicycle": 0.4672000122070312, + "IoU.lake": 0.5668000030517578, + "IoU.dishwasher": 0.4979999923706055, + "IoU.screen": 0.5302999877929687, + "IoU.blanket": 0.10199999809265137, + "IoU.sculpture": 0.5016999816894532, + "IoU.hood": 0.467599983215332, + "IoU.sconce": 0.34900001525878904, + "IoU.vase": 0.22780000686645507, + "IoU.traffic light": 0.2560000038146973, + "IoU.tray": 0.015099999904632568, + "IoU.ashcan": 0.31040000915527344, + "IoU.fan": 0.48529998779296873, + "IoU.pier": 0.2795999908447266, + "IoU.crt screen": 0.0, + "IoU.plate": 0.40799999237060547, + "IoU.monitor": 0.020099999904632567, + "IoU.bulletin board": 0.3120000076293945, + "IoU.shower": 0.004199999868869781, + "IoU.radiator": 0.46419998168945314, + "IoU.glass": 0.05599999904632568, + "IoU.clock": 0.2343000030517578, + "IoU.flag": 0.35529998779296873, + "Acc.wall": 0.8781999969482421, + "Acc.building": 0.920199966430664, + "Acc.sky": 0.9761000061035157, + "Acc.floor": 0.8983000183105468, + "Acc.tree": 0.8618000030517579, + "Acc.ceiling": 0.9119999694824219, + "Acc.road": 0.8951000213623047, + "Acc.bed ": 0.9504000091552735, + "Acc.windowpane": 0.7627999877929688, + "Acc.grass": 0.7905999755859375, + "Acc.cabinet": 0.6855999755859375, + "Acc.sidewalk": 0.7716000366210938, + "Acc.person": 0.9162999725341797, + "Acc.earth": 0.46799999237060547, + "Acc.door": 0.629000015258789, + "Acc.table": 0.7180000305175781, + "Acc.mountain": 0.7087999725341797, + "Acc.plant": 0.622599983215332, + "Acc.curtain": 0.8295999908447266, + "Acc.chair": 0.6423999786376953, + "Acc.car": 0.9029000091552735, + "Acc.water": 0.6705999755859375, + "Acc.painting": 0.8405000305175782, + "Acc.sofa": 0.7787000274658203, + "Acc.shelf": 0.6168999862670899, + "Acc.house": 0.6102999877929688, + "Acc.sea": 0.8761000061035156, + "Acc.mirror": 0.7477999877929687, + "Acc.rug": 0.7333000183105469, + "Acc.field": 0.5106000137329102, + "Acc.armchair": 0.6470999908447266, + "Acc.seat": 0.778499984741211, + "Acc.fence": 0.5445999908447265, + "Acc.desk": 0.6487000274658203, + "Acc.rock": 0.6363000106811524, + "Acc.wardrobe": 0.609900016784668, + "Acc.lamp": 0.6636000061035157, + "Acc.bathtub": 0.8026000213623047, + "Acc.railing": 0.4602000045776367, + "Acc.cushion": 0.6683999633789063, + "Acc.base": 0.41119998931884766, + "Acc.box": 0.295, + "Acc.column": 0.5745000076293946, + "Acc.signboard": 0.4186000061035156, + "Acc.chest of drawers": 0.555099983215332, + "Acc.counter": 0.3486000061035156, + "Acc.sand": 0.5747999954223633, + "Acc.sink": 0.7529000091552734, + "Acc.skyscraper": 0.5936999893188477, + "Acc.fireplace": 0.8913999938964844, + "Acc.refrigerator": 0.8538999938964844, + "Acc.grandstand": 0.7201000213623047, + "Acc.path": 0.31799999237060544, + "Acc.stairs": 0.3375, + "Acc.runway": 0.8683000183105469, + "Acc.case": 0.6798000335693359, + "Acc.pool table": 0.9594999694824219, + "Acc.pillow": 0.6608000183105469, + "Acc.screen door": 0.75, + "Acc.stairway": 0.45060001373291014, + "Acc.river": 0.37270000457763675, + "Acc.bridge": 0.811500015258789, + "Acc.bookcase": 0.5695000076293946, + "Acc.blind": 0.45279998779296876, + "Acc.coffee table": 0.7876000213623047, + "Acc.toilet": 0.8816999816894531, + "Acc.flower": 0.5361000061035156, + "Acc.book": 0.5934000015258789, + "Acc.hill": 0.1722999954223633, + "Acc.bench": 0.513400001525879, + "Acc.countertop": 0.6805000305175781, + "Acc.stove": 0.7916000366210938, + "Acc.palm": 0.6211000061035157, + "Acc.kitchen island": 0.7058999633789063, + "Acc.computer": 0.8723999786376954, + "Acc.swivel chair": 0.5525, + "Acc.boat": 0.8506999969482422, + "Acc.bar": 0.6875, + "Acc.arcade machine": 0.44709999084472657, + "Acc.hovel": 0.5366999816894531, + "Acc.bus": 0.9083000183105469, + "Acc.towel": 0.7137000274658203, + "Acc.light": 0.3459000015258789, + "Acc.truck": 0.3491999816894531, + "Acc.tower": 0.45110000610351564, + "Acc.chandelier": 0.759000015258789, + "Acc.awning": 0.3554000091552734, + "Acc.streetlight": 0.17309999465942383, + "Acc.booth": 0.39419998168945314, + "Acc.television receiver": 0.7163999938964843, + "Acc.airplane": 0.6530000305175782, + "Acc.dirt track": 0.19100000381469726, + "Acc.apparel": 0.4856000137329102, + "Acc.pole": 0.23559999465942383, + "Acc.land": 0.022300000190734862, + "Acc.bannister": 0.14359999656677247, + "Acc.escalator": 0.40049999237060546, + "Acc.ottoman": 0.592599983215332, + "Acc.bottle": 0.4988000106811523, + "Acc.buffet": 0.4493000030517578, + "Acc.poster": 0.19059999465942382, + "Acc.stage": 0.3661000061035156, + "Acc.van": 0.5320000076293945, + "Acc.ship": 0.8197000122070313, + "Acc.fountain": 0.2075, + "Acc.conveyer belt": 0.7751000213623047, + "Acc.canopy": 0.31879999160766603, + "Acc.washer": 0.7125, + "Acc.plaything": 0.435, + "Acc.swimming pool": 0.8355999755859375, + "Acc.stool": 0.33130001068115233, + "Acc.barrel": 0.6281000137329101, + "Acc.basket": 0.2825, + "Acc.waterfall": 0.7375, + "Acc.tent": 0.9918000030517579, + "Acc.bag": 0.11300000190734863, + "Acc.minibike": 0.6318000030517578, + "Acc.cradle": 0.9758000183105469, + "Acc.oven": 0.47700000762939454, + "Acc.ball": 0.46540000915527346, + "Acc.food": 0.705, + "Acc.step": 0.1725, + "Acc.tank": 0.6476999664306641, + "Acc.trade name": 0.27280000686645506, + "Acc.microwave": 0.4306999969482422, + "Acc.pot": 0.40580001831054685, + "Acc.animal": 0.6388000106811523, + "Acc.bicycle": 0.6926000213623047, + "Acc.lake": 0.609900016784668, + "Acc.dishwasher": 0.5713999938964843, + "Acc.screen": 0.9230000305175782, + "Acc.blanket": 0.11130000114440917, + "Acc.sculpture": 0.6361000061035156, + "Acc.hood": 0.5193999862670898, + "Acc.sconce": 0.42259998321533204, + "Acc.vase": 0.2943000030517578, + "Acc.traffic light": 0.4234999847412109, + "Acc.tray": 0.020699999332427978, + "Acc.ashcan": 0.4490000152587891, + "Acc.fan": 0.6340999984741211, + "Acc.pier": 0.45060001373291014, + "Acc.crt screen": 0.0, + "Acc.plate": 0.5102000045776367, + "Acc.monitor": 0.021600000858306885, + "Acc.bulletin board": 0.4193000030517578, + "Acc.shower": 0.013700000047683715, + "Acc.radiator": 0.5111999893188477, + "Acc.glass": 0.05849999904632568, + "Acc.clock": 0.26680000305175783, + "Acc.flag": 0.45470001220703127 + } + }, + "82": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8216, + "mIoU": 0.465, + "mAcc": 0.5834, + "IoU.wall": 0.7602999877929687, + "IoU.building": 0.8244000244140625, + "IoU.sky": 0.938499984741211, + "IoU.floor": 0.8038999938964844, + "IoU.tree": 0.7395999908447266, + "IoU.ceiling": 0.8329000091552734, + "IoU.road": 0.8244000244140625, + "IoU.bed ": 0.8688999938964844, + "IoU.windowpane": 0.5959000015258789, + "IoU.grass": 0.6643000030517578, + "IoU.cabinet": 0.5911999893188477, + "IoU.sidewalk": 0.6391999816894531, + "IoU.person": 0.7777999877929688, + "IoU.earth": 0.3536000061035156, + "IoU.door": 0.48069999694824217, + "IoU.table": 0.5638999938964844, + "IoU.mountain": 0.5747999954223633, + "IoU.plant": 0.5093000030517578, + "IoU.curtain": 0.7287000274658203, + "IoU.chair": 0.5220999908447266, + "IoU.car": 0.8137999725341797, + "IoU.water": 0.5720999908447265, + "IoU.painting": 0.688499984741211, + "IoU.sofa": 0.6222000122070312, + "IoU.shelf": 0.4216999816894531, + "IoU.house": 0.4784000015258789, + "IoU.sea": 0.627400016784668, + "IoU.mirror": 0.625999984741211, + "IoU.rug": 0.6576000213623047, + "IoU.field": 0.30920000076293946, + "IoU.armchair": 0.3809000015258789, + "IoU.seat": 0.6093000030517578, + "IoU.fence": 0.42209999084472655, + "IoU.desk": 0.4527000045776367, + "IoU.rock": 0.43209999084472656, + "IoU.wardrobe": 0.518400001525879, + "IoU.lamp": 0.5468999862670898, + "IoU.bathtub": 0.7351000213623047, + "IoU.railing": 0.29569999694824217, + "IoU.cushion": 0.5318000030517578, + "IoU.base": 0.2545000076293945, + "IoU.box": 0.24379999160766602, + "IoU.column": 0.472400016784668, + "IoU.signboard": 0.327599983215332, + "IoU.chest of drawers": 0.31739999771118166, + "IoU.counter": 0.2747999954223633, + "IoU.sand": 0.3908000183105469, + "IoU.sink": 0.6701000213623047, + "IoU.skyscraper": 0.5790999984741211, + "IoU.fireplace": 0.7091999816894531, + "IoU.refrigerator": 0.765199966430664, + "IoU.grandstand": 0.4734999847412109, + "IoU.path": 0.23600000381469727, + "IoU.stairs": 0.27, + "IoU.runway": 0.6197000122070313, + "IoU.case": 0.4986000061035156, + "IoU.pool table": 0.9087000274658203, + "IoU.pillow": 0.553499984741211, + "IoU.screen door": 0.5563000106811523, + "IoU.stairway": 0.2865999984741211, + "IoU.river": 0.1990999984741211, + "IoU.bridge": 0.6770999908447266, + "IoU.bookcase": 0.34080001831054685, + "IoU.blind": 0.41310001373291017, + "IoU.coffee table": 0.5709000015258789, + "IoU.toilet": 0.8154000091552734, + "IoU.flower": 0.3397000122070313, + "IoU.book": 0.4420000076293945, + "IoU.hill": 0.10350000381469726, + "IoU.bench": 0.42889999389648437, + "IoU.countertop": 0.5084000015258789, + "IoU.stove": 0.7123999786376953, + "IoU.palm": 0.47779998779296873, + "IoU.kitchen island": 0.36779998779296874, + "IoU.computer": 0.715999984741211, + "IoU.swivel chair": 0.4993000030517578, + "IoU.boat": 0.6891999816894532, + "IoU.bar": 0.5531999969482422, + "IoU.arcade machine": 0.43759998321533206, + "IoU.hovel": 0.5725, + "IoU.bus": 0.7340000152587891, + "IoU.towel": 0.5493999862670899, + "IoU.light": 0.3171999931335449, + "IoU.truck": 0.1997999954223633, + "IoU.tower": 0.25739999771118166, + "IoU.chandelier": 0.6020000076293945, + "IoU.awning": 0.40669998168945315, + "IoU.streetlight": 0.16579999923706054, + "IoU.booth": 0.265, + "IoU.television receiver": 0.6097999954223633, + "IoU.airplane": 0.5866999816894531, + "IoU.dirt track": 0.0703000020980835, + "IoU.apparel": 0.38810001373291014, + "IoU.pole": 0.17, + "IoU.land": 0.013899999856948852, + "IoU.bannister": 0.11350000381469727, + "IoU.escalator": 0.32290000915527345, + "IoU.ottoman": 0.46810001373291016, + "IoU.bottle": 0.3365000152587891, + "IoU.buffet": 0.40279998779296877, + "IoU.poster": 0.26680000305175783, + "IoU.stage": 0.15859999656677246, + "IoU.van": 0.39869998931884765, + "IoU.ship": 0.5790000152587891, + "IoU.fountain": 0.20059999465942382, + "IoU.conveyer belt": 0.6998999786376953, + "IoU.canopy": 0.22139999389648438, + "IoU.washer": 0.6802999877929687, + "IoU.plaything": 0.23219999313354492, + "IoU.swimming pool": 0.5972000122070312, + "IoU.stool": 0.28700000762939454, + "IoU.barrel": 0.5552999877929687, + "IoU.basket": 0.2581999969482422, + "IoU.waterfall": 0.7190000152587891, + "IoU.tent": 0.9233000183105469, + "IoU.bag": 0.08350000381469727, + "IoU.minibike": 0.6304999923706055, + "IoU.cradle": 0.8179000091552734, + "IoU.oven": 0.18600000381469728, + "IoU.ball": 0.3643000030517578, + "IoU.food": 0.5634000015258789, + "IoU.step": 0.13109999656677246, + "IoU.tank": 0.5, + "IoU.trade name": 0.22680000305175782, + "IoU.microwave": 0.359900016784668, + "IoU.pot": 0.36119998931884767, + "IoU.animal": 0.6444000244140625, + "IoU.bicycle": 0.5311000061035156, + "IoU.lake": 0.5618999862670898, + "IoU.dishwasher": 0.6033000183105469, + "IoU.screen": 0.5777999877929687, + "IoU.blanket": 0.12529999732971192, + "IoU.sculpture": 0.4904999923706055, + "IoU.hood": 0.495, + "IoU.sconce": 0.34439998626708984, + "IoU.vase": 0.21440000534057618, + "IoU.traffic light": 0.2681999969482422, + "IoU.tray": 0.025799999237060545, + "IoU.ashcan": 0.3609999847412109, + "IoU.fan": 0.5065000152587891, + "IoU.pier": 0.3959000015258789, + "IoU.crt screen": 0.00019999999552965163, + "IoU.plate": 0.4615000152587891, + "IoU.monitor": 0.04190000057220459, + "IoU.bulletin board": 0.33599998474121096, + "IoU.shower": 0.00699999988079071, + "IoU.radiator": 0.4993000030517578, + "IoU.glass": 0.0528000020980835, + "IoU.clock": 0.23120000839233398, + "IoU.flag": 0.38990001678466796, + "Acc.wall": 0.8776999664306641, + "Acc.building": 0.9198999786376953, + "Acc.sky": 0.9768000030517578, + "Acc.floor": 0.9013999938964844, + "Acc.tree": 0.8672000122070312, + "Acc.ceiling": 0.9227999877929688, + "Acc.road": 0.9019000244140625, + "Acc.bed ": 0.9534999847412109, + "Acc.windowpane": 0.7577999877929688, + "Acc.grass": 0.8038999938964844, + "Acc.cabinet": 0.7002999877929688, + "Acc.sidewalk": 0.7809999847412109, + "Acc.person": 0.9154000091552734, + "Acc.earth": 0.4961000061035156, + "Acc.door": 0.6408999633789062, + "Acc.table": 0.7143000030517578, + "Acc.mountain": 0.7255999755859375, + "Acc.plant": 0.6343000030517578, + "Acc.curtain": 0.8304000091552735, + "Acc.chair": 0.6558000183105469, + "Acc.car": 0.9008000183105469, + "Acc.water": 0.7112000274658203, + "Acc.painting": 0.8533999633789062, + "Acc.sofa": 0.78, + "Acc.shelf": 0.6165999984741211, + "Acc.house": 0.6011999893188477, + "Acc.sea": 0.8441999816894531, + "Acc.mirror": 0.705999984741211, + "Acc.rug": 0.7408000183105469, + "Acc.field": 0.5265999984741211, + "Acc.armchair": 0.6020999908447265, + "Acc.seat": 0.8094999694824219, + "Acc.fence": 0.5734999847412109, + "Acc.desk": 0.6736000061035157, + "Acc.rock": 0.6480999755859375, + "Acc.wardrobe": 0.644000015258789, + "Acc.lamp": 0.669000015258789, + "Acc.bathtub": 0.7968000030517578, + "Acc.railing": 0.457599983215332, + "Acc.cushion": 0.6719999694824219, + "Acc.base": 0.4218000030517578, + "Acc.box": 0.3315000152587891, + "Acc.column": 0.5968999862670898, + "Acc.signboard": 0.4265999984741211, + "Acc.chest of drawers": 0.5588999938964844, + "Acc.counter": 0.3816999816894531, + "Acc.sand": 0.5370000076293945, + "Acc.sink": 0.7495999908447266, + "Acc.skyscraper": 0.7194999694824219, + "Acc.fireplace": 0.8819000244140625, + "Acc.refrigerator": 0.8601000213623047, + "Acc.grandstand": 0.7333000183105469, + "Acc.path": 0.30690000534057615, + "Acc.stairs": 0.35650001525878905, + "Acc.runway": 0.82, + "Acc.case": 0.6586000061035157, + "Acc.pool table": 0.9698999786376953, + "Acc.pillow": 0.659000015258789, + "Acc.screen door": 0.7087000274658203, + "Acc.stairway": 0.39169998168945314, + "Acc.river": 0.38029998779296875, + "Acc.bridge": 0.821500015258789, + "Acc.bookcase": 0.5754999923706055, + "Acc.blind": 0.4575, + "Acc.coffee table": 0.7926999664306641, + "Acc.toilet": 0.8902999877929687, + "Acc.flower": 0.5318999862670899, + "Acc.book": 0.6095999908447266, + "Acc.hill": 0.1828000068664551, + "Acc.bench": 0.505, + "Acc.countertop": 0.6508999633789062, + "Acc.stove": 0.7998999786376954, + "Acc.palm": 0.6629000091552735, + "Acc.kitchen island": 0.6719000244140625, + "Acc.computer": 0.8616999816894532, + "Acc.swivel chair": 0.6388000106811523, + "Acc.boat": 0.8366999816894531, + "Acc.bar": 0.7572000122070313, + "Acc.arcade machine": 0.4672000122070312, + "Acc.hovel": 0.6491000366210937, + "Acc.bus": 0.9079000091552735, + "Acc.towel": 0.6791999816894532, + "Acc.light": 0.34119998931884765, + "Acc.truck": 0.28200000762939453, + "Acc.tower": 0.3802000045776367, + "Acc.chandelier": 0.7615000152587891, + "Acc.awning": 0.47630001068115235, + "Acc.streetlight": 0.19370000839233398, + "Acc.booth": 0.39779998779296877, + "Acc.television receiver": 0.7394999694824219, + "Acc.airplane": 0.6583000183105469, + "Acc.dirt track": 0.10029999732971191, + "Acc.apparel": 0.5354000091552734, + "Acc.pole": 0.21629999160766603, + "Acc.land": 0.02059999942779541, + "Acc.bannister": 0.16200000762939454, + "Acc.escalator": 0.38529998779296876, + "Acc.ottoman": 0.5702000045776368, + "Acc.bottle": 0.5681999969482422, + "Acc.buffet": 0.4602000045776367, + "Acc.poster": 0.35310001373291017, + "Acc.stage": 0.33220001220703127, + "Acc.van": 0.48970001220703124, + "Acc.ship": 0.7022000122070312, + "Acc.fountain": 0.20760000228881836, + "Acc.conveyer belt": 0.8808000183105469, + "Acc.canopy": 0.3127000045776367, + "Acc.washer": 0.6829000091552735, + "Acc.plaything": 0.39470001220703127, + "Acc.swimming pool": 0.7819999694824219, + "Acc.stool": 0.37689998626708987, + "Acc.barrel": 0.6302000045776367, + "Acc.basket": 0.31290000915527344, + "Acc.waterfall": 0.7991999816894532, + "Acc.tent": 0.9919000244140626, + "Acc.bag": 0.10090000152587891, + "Acc.minibike": 0.7601000213623047, + "Acc.cradle": 0.9722000122070312, + "Acc.oven": 0.5120000076293946, + "Acc.ball": 0.4331000137329102, + "Acc.food": 0.6991999816894531, + "Acc.step": 0.15229999542236328, + "Acc.tank": 0.634000015258789, + "Acc.trade name": 0.26209999084472657, + "Acc.microwave": 0.40330001831054685, + "Acc.pot": 0.4266999816894531, + "Acc.animal": 0.688499984741211, + "Acc.bicycle": 0.701500015258789, + "Acc.lake": 0.6481999969482422, + "Acc.dishwasher": 0.6905999755859376, + "Acc.screen": 0.9058000183105469, + "Acc.blanket": 0.13550000190734862, + "Acc.sculpture": 0.6422000122070313, + "Acc.hood": 0.5615999984741211, + "Acc.sconce": 0.4179999923706055, + "Acc.vase": 0.27920000076293944, + "Acc.traffic light": 0.44790000915527345, + "Acc.tray": 0.036600000858306884, + "Acc.ashcan": 0.504900016784668, + "Acc.fan": 0.6622000122070313, + "Acc.pier": 0.6969999694824218, + "Acc.crt screen": 0.00039999999105930326, + "Acc.plate": 0.5729999923706055, + "Acc.monitor": 0.04409999847412109, + "Acc.bulletin board": 0.44349998474121094, + "Acc.shower": 0.02740000009536743, + "Acc.radiator": 0.5434000015258789, + "Acc.glass": 0.05449999809265137, + "Acc.clock": 0.2543000030517578, + "Acc.flag": 0.4879999923706055 + } + }, + "83": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8240000000000001, + "mIoU": 0.469, + "mAcc": 0.5855, + "IoU.wall": 0.7613999938964844, + "IoU.building": 0.827699966430664, + "IoU.sky": 0.9375, + "IoU.floor": 0.8025, + "IoU.tree": 0.740999984741211, + "IoU.ceiling": 0.8295999908447266, + "IoU.road": 0.827699966430664, + "IoU.bed ": 0.8718000030517579, + "IoU.windowpane": 0.610099983215332, + "IoU.grass": 0.6594999694824218, + "IoU.cabinet": 0.605099983215332, + "IoU.sidewalk": 0.6338000106811523, + "IoU.person": 0.7883000183105469, + "IoU.earth": 0.3516999816894531, + "IoU.door": 0.47439998626708985, + "IoU.table": 0.6002999877929688, + "IoU.mountain": 0.575999984741211, + "IoU.plant": 0.5216999816894531, + "IoU.curtain": 0.7408000183105469, + "IoU.chair": 0.5259999847412109, + "IoU.car": 0.8191999816894531, + "IoU.water": 0.5529000091552735, + "IoU.painting": 0.7047000122070313, + "IoU.sofa": 0.6447000122070312, + "IoU.shelf": 0.44360000610351563, + "IoU.house": 0.5072999954223633, + "IoU.sea": 0.6116999816894532, + "IoU.mirror": 0.6416999816894531, + "IoU.rug": 0.6619000244140625, + "IoU.field": 0.303799991607666, + "IoU.armchair": 0.39290000915527346, + "IoU.seat": 0.6320999908447266, + "IoU.fence": 0.40099998474121096, + "IoU.desk": 0.46540000915527346, + "IoU.rock": 0.47439998626708985, + "IoU.wardrobe": 0.5227000045776368, + "IoU.lamp": 0.5740000152587891, + "IoU.bathtub": 0.7598000335693359, + "IoU.railing": 0.3452000045776367, + "IoU.cushion": 0.5645000076293946, + "IoU.base": 0.31, + "IoU.box": 0.2564999961853027, + "IoU.column": 0.4859999847412109, + "IoU.signboard": 0.3427000045776367, + "IoU.chest of drawers": 0.3365000152587891, + "IoU.counter": 0.3156999969482422, + "IoU.sand": 0.40380001068115234, + "IoU.sink": 0.6637999725341797, + "IoU.skyscraper": 0.6268999862670899, + "IoU.fireplace": 0.7190000152587891, + "IoU.refrigerator": 0.7551000213623047, + "IoU.grandstand": 0.40819999694824216, + "IoU.path": 0.22309999465942382, + "IoU.stairs": 0.27420000076293943, + "IoU.runway": 0.6762999725341797, + "IoU.case": 0.545, + "IoU.pool table": 0.9136000061035157, + "IoU.pillow": 0.5633000183105469, + "IoU.screen door": 0.41049999237060547, + "IoU.stairway": 0.35310001373291017, + "IoU.river": 0.13640000343322753, + "IoU.bridge": 0.6573999786376953, + "IoU.bookcase": 0.3591999816894531, + "IoU.blind": 0.4279999923706055, + "IoU.coffee table": 0.6063999938964844, + "IoU.toilet": 0.8187999725341797, + "IoU.flower": 0.3497999954223633, + "IoU.book": 0.45439998626708983, + "IoU.hill": 0.12420000076293945, + "IoU.bench": 0.46540000915527346, + "IoU.countertop": 0.47330001831054686, + "IoU.stove": 0.6988999938964844, + "IoU.palm": 0.48369998931884767, + "IoU.kitchen island": 0.4034000015258789, + "IoU.computer": 0.7283999633789062, + "IoU.swivel chair": 0.4072999954223633, + "IoU.boat": 0.5220000076293946, + "IoU.bar": 0.5520999908447266, + "IoU.arcade machine": 0.35450000762939454, + "IoU.hovel": 0.5436999893188477, + "IoU.bus": 0.7351000213623047, + "IoU.towel": 0.5936999893188477, + "IoU.light": 0.3315999984741211, + "IoU.truck": 0.25379999160766603, + "IoU.tower": 0.2909000015258789, + "IoU.chandelier": 0.6222000122070312, + "IoU.awning": 0.4377000045776367, + "IoU.streetlight": 0.16760000228881836, + "IoU.booth": 0.2968000030517578, + "IoU.television receiver": 0.6558999633789062, + "IoU.airplane": 0.571500015258789, + "IoU.dirt track": 0.11720000267028809, + "IoU.apparel": 0.3397000122070313, + "IoU.pole": 0.15279999732971192, + "IoU.land": 0.05190000057220459, + "IoU.bannister": 0.11010000228881836, + "IoU.escalator": 0.26, + "IoU.ottoman": 0.43209999084472656, + "IoU.bottle": 0.20909999847412108, + "IoU.buffet": 0.4333000183105469, + "IoU.poster": 0.28850000381469726, + "IoU.stage": 0.17020000457763673, + "IoU.van": 0.43459999084472656, + "IoU.ship": 0.24010000228881836, + "IoU.fountain": 0.1965999984741211, + "IoU.conveyer belt": 0.7523999786376954, + "IoU.canopy": 0.26209999084472657, + "IoU.washer": 0.6981999969482422, + "IoU.plaything": 0.2602000045776367, + "IoU.swimming pool": 0.6768000030517578, + "IoU.stool": 0.2929000091552734, + "IoU.barrel": 0.5536999893188477, + "IoU.basket": 0.25129999160766603, + "IoU.waterfall": 0.6926000213623047, + "IoU.tent": 0.9354000091552734, + "IoU.bag": 0.13130000114440918, + "IoU.minibike": 0.6455000305175781, + "IoU.cradle": 0.8068000030517578, + "IoU.oven": 0.17559999465942383, + "IoU.ball": 0.46349998474121096, + "IoU.food": 0.509900016784668, + "IoU.step": 0.08869999885559082, + "IoU.tank": 0.5545000076293946, + "IoU.trade name": 0.22809999465942382, + "IoU.microwave": 0.35369998931884766, + "IoU.pot": 0.3884000015258789, + "IoU.animal": 0.6270000076293946, + "IoU.bicycle": 0.5352000045776367, + "IoU.lake": 0.5941999816894531, + "IoU.dishwasher": 0.5318000030517578, + "IoU.screen": 0.5813999938964843, + "IoU.blanket": 0.14050000190734863, + "IoU.sculpture": 0.4891999816894531, + "IoU.hood": 0.5470000076293945, + "IoU.sconce": 0.3629999923706055, + "IoU.vase": 0.27549999237060546, + "IoU.traffic light": 0.2509000015258789, + "IoU.tray": 0.03559999942779541, + "IoU.ashcan": 0.37490001678466794, + "IoU.fan": 0.5122999954223633, + "IoU.pier": 0.2352000045776367, + "IoU.crt screen": 0.019700000286102293, + "IoU.plate": 0.48209999084472654, + "IoU.monitor": 0.06239999771118164, + "IoU.bulletin board": 0.4463000106811523, + "IoU.shower": 0.002199999988079071, + "IoU.radiator": 0.5311000061035156, + "IoU.glass": 0.06559999942779542, + "IoU.clock": 0.2613999938964844, + "IoU.flag": 0.40849998474121096, + "Acc.wall": 0.8758999633789063, + "Acc.building": 0.9226000213623047, + "Acc.sky": 0.9762000274658204, + "Acc.floor": 0.9037000274658203, + "Acc.tree": 0.8690000152587891, + "Acc.ceiling": 0.9191999816894532, + "Acc.road": 0.905199966430664, + "Acc.bed ": 0.9536000061035156, + "Acc.windowpane": 0.7637000274658203, + "Acc.grass": 0.8070999908447266, + "Acc.cabinet": 0.7173999786376953, + "Acc.sidewalk": 0.7752999877929687, + "Acc.person": 0.9148000335693359, + "Acc.earth": 0.4906000137329102, + "Acc.door": 0.6469999694824219, + "Acc.table": 0.7362999725341797, + "Acc.mountain": 0.7156999969482422, + "Acc.plant": 0.6313999938964844, + "Acc.curtain": 0.8625, + "Acc.chair": 0.6581999969482422, + "Acc.car": 0.9012000274658203, + "Acc.water": 0.7041000366210938, + "Acc.painting": 0.8488999938964844, + "Acc.sofa": 0.803499984741211, + "Acc.shelf": 0.6631999969482422, + "Acc.house": 0.6498000335693359, + "Acc.sea": 0.8405000305175782, + "Acc.mirror": 0.7204000091552735, + "Acc.rug": 0.7202999877929688, + "Acc.field": 0.5461999893188476, + "Acc.armchair": 0.6022000122070312, + "Acc.seat": 0.8158999633789062, + "Acc.fence": 0.5463000106811523, + "Acc.desk": 0.6751000213623047, + "Acc.rock": 0.7048999786376953, + "Acc.wardrobe": 0.6504000091552734, + "Acc.lamp": 0.7093000030517578, + "Acc.bathtub": 0.8420999908447265, + "Acc.railing": 0.5052000045776367, + "Acc.cushion": 0.6898999786376954, + "Acc.base": 0.4538999938964844, + "Acc.box": 0.34349998474121096, + "Acc.column": 0.6181999969482422, + "Acc.signboard": 0.4393000030517578, + "Acc.chest of drawers": 0.6025, + "Acc.counter": 0.4602000045776367, + "Acc.sand": 0.55, + "Acc.sink": 0.7437000274658203, + "Acc.skyscraper": 0.7191999816894531, + "Acc.fireplace": 0.9012999725341797, + "Acc.refrigerator": 0.845999984741211, + "Acc.grandstand": 0.7627999877929688, + "Acc.path": 0.30170000076293946, + "Acc.stairs": 0.35009998321533203, + "Acc.runway": 0.8109999847412109, + "Acc.case": 0.7244000244140625, + "Acc.pool table": 0.9701999664306641, + "Acc.pillow": 0.653499984741211, + "Acc.screen door": 0.5072999954223633, + "Acc.stairway": 0.4804000091552734, + "Acc.river": 0.2640999984741211, + "Acc.bridge": 0.795, + "Acc.bookcase": 0.5913000106811523, + "Acc.blind": 0.4972999954223633, + "Acc.coffee table": 0.8094999694824219, + "Acc.toilet": 0.8930999755859375, + "Acc.flower": 0.535, + "Acc.book": 0.6277999877929688, + "Acc.hill": 0.23329999923706055, + "Acc.bench": 0.5449000167846679, + "Acc.countertop": 0.624900016784668, + "Acc.stove": 0.7962000274658203, + "Acc.palm": 0.675199966430664, + "Acc.kitchen island": 0.714800033569336, + "Acc.computer": 0.8683000183105469, + "Acc.swivel chair": 0.5288999938964843, + "Acc.boat": 0.6451000213623047, + "Acc.bar": 0.7130000305175781, + "Acc.arcade machine": 0.37650001525878907, + "Acc.hovel": 0.6343000030517578, + "Acc.bus": 0.9312999725341797, + "Acc.towel": 0.7305999755859375, + "Acc.light": 0.35880001068115236, + "Acc.truck": 0.3454000091552734, + "Acc.tower": 0.43849998474121094, + "Acc.chandelier": 0.7848999786376953, + "Acc.awning": 0.5293000030517578, + "Acc.streetlight": 0.20469999313354492, + "Acc.booth": 0.40349998474121096, + "Acc.television receiver": 0.7777999877929688, + "Acc.airplane": 0.6448000335693359, + "Acc.dirt track": 0.19, + "Acc.apparel": 0.46549999237060546, + "Acc.pole": 0.1931999969482422, + "Acc.land": 0.07929999828338623, + "Acc.bannister": 0.16139999389648438, + "Acc.escalator": 0.30690000534057615, + "Acc.ottoman": 0.5647999954223633, + "Acc.bottle": 0.27850000381469725, + "Acc.buffet": 0.4861000061035156, + "Acc.poster": 0.3758000183105469, + "Acc.stage": 0.31510000228881835, + "Acc.van": 0.53, + "Acc.ship": 0.31979999542236326, + "Acc.fountain": 0.20440000534057617, + "Acc.conveyer belt": 0.9220999908447266, + "Acc.canopy": 0.36650001525878906, + "Acc.washer": 0.7027999877929687, + "Acc.plaything": 0.43709999084472656, + "Acc.swimming pool": 0.783499984741211, + "Acc.stool": 0.36259998321533204, + "Acc.barrel": 0.6259000015258789, + "Acc.basket": 0.3302000045776367, + "Acc.waterfall": 0.7780000305175782, + "Acc.tent": 0.9906999969482422, + "Acc.bag": 0.16639999389648438, + "Acc.minibike": 0.7780000305175782, + "Acc.cradle": 0.9730000305175781, + "Acc.oven": 0.4809999847412109, + "Acc.ball": 0.550099983215332, + "Acc.food": 0.6165000152587891, + "Acc.step": 0.11770000457763671, + "Acc.tank": 0.642300033569336, + "Acc.trade name": 0.25809999465942385, + "Acc.microwave": 0.3965000152587891, + "Acc.pot": 0.46240001678466797, + "Acc.animal": 0.6848999786376954, + "Acc.bicycle": 0.7106999969482422, + "Acc.lake": 0.6420999908447266, + "Acc.dishwasher": 0.6245999908447266, + "Acc.screen": 0.9016000366210938, + "Acc.blanket": 0.15270000457763672, + "Acc.sculpture": 0.6425, + "Acc.hood": 0.6104999923706055, + "Acc.sconce": 0.4311999893188477, + "Acc.vase": 0.3625, + "Acc.traffic light": 0.42150001525878905, + "Acc.tray": 0.048899998664855955, + "Acc.ashcan": 0.4936000061035156, + "Acc.fan": 0.6633000183105469, + "Acc.pier": 0.4418000030517578, + "Acc.crt screen": 0.048899998664855955, + "Acc.plate": 0.64, + "Acc.monitor": 0.06800000190734863, + "Acc.bulletin board": 0.6247000122070312, + "Acc.shower": 0.018300000429153442, + "Acc.radiator": 0.5986999893188476, + "Acc.glass": 0.0678000020980835, + "Acc.clock": 0.29809999465942383, + "Acc.flag": 0.46880001068115235 + } + }, + "84": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8278, + "mIoU": 0.4789, + "mAcc": 0.598, + "IoU.wall": 0.7659999847412109, + "IoU.building": 0.8333999633789062, + "IoU.sky": 0.9386000061035156, + "IoU.floor": 0.8180999755859375, + "IoU.tree": 0.7413999938964844, + "IoU.ceiling": 0.8330000305175781, + "IoU.road": 0.8336000061035156, + "IoU.bed ": 0.8787000274658203, + "IoU.windowpane": 0.6141999816894531, + "IoU.grass": 0.6626999664306641, + "IoU.cabinet": 0.6183000183105469, + "IoU.sidewalk": 0.6511000061035156, + "IoU.person": 0.7962999725341797, + "IoU.earth": 0.3781999969482422, + "IoU.door": 0.4809000015258789, + "IoU.table": 0.5768999862670898, + "IoU.mountain": 0.5856000137329102, + "IoU.plant": 0.5065000152587891, + "IoU.curtain": 0.7436000061035156, + "IoU.chair": 0.5372999954223633, + "IoU.car": 0.8340000152587891, + "IoU.water": 0.5202999877929687, + "IoU.painting": 0.6961000061035156, + "IoU.sofa": 0.6497000122070312, + "IoU.shelf": 0.452599983215332, + "IoU.house": 0.5045000076293945, + "IoU.sea": 0.6427999877929688, + "IoU.mirror": 0.6318999862670899, + "IoU.rug": 0.6777999877929688, + "IoU.field": 0.30840000152587893, + "IoU.armchair": 0.39599998474121095, + "IoU.seat": 0.6605999755859375, + "IoU.fence": 0.4102000045776367, + "IoU.desk": 0.49840000152587893, + "IoU.rock": 0.43650001525878906, + "IoU.wardrobe": 0.53, + "IoU.lamp": 0.5700999832153321, + "IoU.bathtub": 0.8111000061035156, + "IoU.railing": 0.3604000091552734, + "IoU.cushion": 0.5629000091552734, + "IoU.base": 0.3213999938964844, + "IoU.box": 0.26680000305175783, + "IoU.column": 0.4865999984741211, + "IoU.signboard": 0.35759998321533204, + "IoU.chest of drawers": 0.337599983215332, + "IoU.counter": 0.3295999908447266, + "IoU.sand": 0.5202000045776367, + "IoU.sink": 0.6741000366210937, + "IoU.skyscraper": 0.6379999923706055, + "IoU.fireplace": 0.6775, + "IoU.refrigerator": 0.7679000091552735, + "IoU.grandstand": 0.45680000305175783, + "IoU.path": 0.21700000762939453, + "IoU.stairs": 0.27610000610351565, + "IoU.runway": 0.5877999877929687, + "IoU.case": 0.5661000061035156, + "IoU.pool table": 0.9186000061035157, + "IoU.pillow": 0.5675, + "IoU.screen door": 0.4520999908447266, + "IoU.stairway": 0.3884000015258789, + "IoU.river": 0.13010000228881835, + "IoU.bridge": 0.6905999755859376, + "IoU.bookcase": 0.332599983215332, + "IoU.blind": 0.38, + "IoU.coffee table": 0.5656000137329101, + "IoU.toilet": 0.762300033569336, + "IoU.flower": 0.3295000076293945, + "IoU.book": 0.46169998168945314, + "IoU.hill": 0.11920000076293945, + "IoU.bench": 0.47049999237060547, + "IoU.countertop": 0.46419998168945314, + "IoU.stove": 0.7041000366210938, + "IoU.palm": 0.4829000091552734, + "IoU.kitchen island": 0.3634000015258789, + "IoU.computer": 0.7573999786376953, + "IoU.swivel chair": 0.4966999816894531, + "IoU.boat": 0.7056999969482421, + "IoU.bar": 0.45279998779296876, + "IoU.arcade machine": 0.4388999938964844, + "IoU.hovel": 0.5365999984741211, + "IoU.bus": 0.841500015258789, + "IoU.towel": 0.6006999969482422, + "IoU.light": 0.31, + "IoU.truck": 0.3215000152587891, + "IoU.tower": 0.3029999923706055, + "IoU.chandelier": 0.6175, + "IoU.awning": 0.4008000183105469, + "IoU.streetlight": 0.19290000915527344, + "IoU.booth": 0.3096999931335449, + "IoU.television receiver": 0.6786000061035157, + "IoU.airplane": 0.6365000152587891, + "IoU.dirt track": 0.125, + "IoU.apparel": 0.3665999984741211, + "IoU.pole": 0.18069999694824218, + "IoU.land": 0.02430000066757202, + "IoU.bannister": 0.1256999969482422, + "IoU.escalator": 0.34869998931884766, + "IoU.ottoman": 0.4402000045776367, + "IoU.bottle": 0.30840000152587893, + "IoU.buffet": 0.4890999984741211, + "IoU.poster": 0.21510000228881837, + "IoU.stage": 0.1584000015258789, + "IoU.van": 0.43209999084472656, + "IoU.ship": 0.4559000015258789, + "IoU.fountain": 0.203700008392334, + "IoU.conveyer belt": 0.7813999938964844, + "IoU.canopy": 0.25629999160766603, + "IoU.washer": 0.7161000061035157, + "IoU.plaything": 0.3331999969482422, + "IoU.swimming pool": 0.7555000305175781, + "IoU.stool": 0.3452000045776367, + "IoU.barrel": 0.4718000030517578, + "IoU.basket": 0.23059999465942382, + "IoU.waterfall": 0.7480999755859375, + "IoU.tent": 0.9506999969482421, + "IoU.bag": 0.13029999732971193, + "IoU.minibike": 0.5420000076293945, + "IoU.cradle": 0.7733999633789063, + "IoU.oven": 0.21709999084472656, + "IoU.ball": 0.5091999816894531, + "IoU.food": 0.46650001525878904, + "IoU.step": 0.09100000381469726, + "IoU.tank": 0.5702999877929688, + "IoU.trade name": 0.26079999923706054, + "IoU.microwave": 0.42520000457763674, + "IoU.pot": 0.400099983215332, + "IoU.animal": 0.6202000045776367, + "IoU.bicycle": 0.485, + "IoU.lake": 0.5861999893188476, + "IoU.dishwasher": 0.5329999923706055, + "IoU.screen": 0.5843999862670899, + "IoU.blanket": 0.1325, + "IoU.sculpture": 0.4986000061035156, + "IoU.hood": 0.4961999893188477, + "IoU.sconce": 0.3234000015258789, + "IoU.vase": 0.29850000381469727, + "IoU.traffic light": 0.25540000915527344, + "IoU.tray": 0.025299999713897705, + "IoU.ashcan": 0.41380001068115235, + "IoU.fan": 0.5327999877929688, + "IoU.pier": 0.2740999984741211, + "IoU.crt screen": 0.0325, + "IoU.plate": 0.4581999969482422, + "IoU.monitor": 0.09789999961853027, + "IoU.bulletin board": 0.42529998779296874, + "IoU.shower": 0.0005999999865889549, + "IoU.radiator": 0.5606999969482422, + "IoU.glass": 0.08390000343322754, + "IoU.clock": 0.27049999237060546, + "IoU.flag": 0.39049999237060545, + "Acc.wall": 0.8797000122070312, + "Acc.building": 0.928499984741211, + "Acc.sky": 0.9762999725341797, + "Acc.floor": 0.9094000244140625, + "Acc.tree": 0.8718000030517579, + "Acc.ceiling": 0.9170999908447266, + "Acc.road": 0.9012999725341797, + "Acc.bed ": 0.9516999816894531, + "Acc.windowpane": 0.7655999755859375, + "Acc.grass": 0.7947000122070312, + "Acc.cabinet": 0.719800033569336, + "Acc.sidewalk": 0.7902999877929687, + "Acc.person": 0.9212999725341797, + "Acc.earth": 0.5209999847412109, + "Acc.door": 0.6661000061035156, + "Acc.table": 0.7294000244140625, + "Acc.mountain": 0.6845999908447266, + "Acc.plant": 0.6197999954223633, + "Acc.curtain": 0.8587000274658203, + "Acc.chair": 0.6606999969482422, + "Acc.car": 0.9212999725341797, + "Acc.water": 0.6669999694824219, + "Acc.painting": 0.8586000061035156, + "Acc.sofa": 0.8081999969482422, + "Acc.shelf": 0.6823000335693359, + "Acc.house": 0.6552999877929687, + "Acc.sea": 0.8991000366210937, + "Acc.mirror": 0.7291000366210938, + "Acc.rug": 0.7455000305175781, + "Acc.field": 0.5661999893188476, + "Acc.armchair": 0.6131999969482422, + "Acc.seat": 0.8413999938964843, + "Acc.fence": 0.5436999893188477, + "Acc.desk": 0.704800033569336, + "Acc.rock": 0.6941000366210938, + "Acc.wardrobe": 0.6211999893188477, + "Acc.lamp": 0.7045999908447266, + "Acc.bathtub": 0.8758999633789063, + "Acc.railing": 0.5191999816894531, + "Acc.cushion": 0.6991000366210938, + "Acc.base": 0.46169998168945314, + "Acc.box": 0.3486000061035156, + "Acc.column": 0.6195000076293945, + "Acc.signboard": 0.474900016784668, + "Acc.chest of drawers": 0.6088000106811523, + "Acc.counter": 0.4418000030517578, + "Acc.sand": 0.6945999908447266, + "Acc.sink": 0.7587000274658203, + "Acc.skyscraper": 0.7233999633789062, + "Acc.fireplace": 0.9248000335693359, + "Acc.refrigerator": 0.8626000213623047, + "Acc.grandstand": 0.75, + "Acc.path": 0.2780999946594238, + "Acc.stairs": 0.3579000091552734, + "Acc.runway": 0.7633000183105468, + "Acc.case": 0.7251999664306641, + "Acc.pool table": 0.9711000061035157, + "Acc.pillow": 0.6726999664306641, + "Acc.screen door": 0.542400016784668, + "Acc.stairway": 0.5579999923706055, + "Acc.river": 0.23920000076293946, + "Acc.bridge": 0.8302999877929688, + "Acc.bookcase": 0.5466999816894531, + "Acc.blind": 0.43849998474121094, + "Acc.coffee table": 0.8205000305175781, + "Acc.toilet": 0.8888999938964843, + "Acc.flower": 0.5459000015258789, + "Acc.book": 0.6445999908447265, + "Acc.hill": 0.23829999923706055, + "Acc.bench": 0.542599983215332, + "Acc.countertop": 0.5984999847412109, + "Acc.stove": 0.8072000122070313, + "Acc.palm": 0.6637999725341797, + "Acc.kitchen island": 0.6077999877929687, + "Acc.computer": 0.900199966430664, + "Acc.swivel chair": 0.6475, + "Acc.boat": 0.8555000305175782, + "Acc.bar": 0.6004000091552735, + "Acc.arcade machine": 0.46919998168945315, + "Acc.hovel": 0.5881999969482422, + "Acc.bus": 0.9369999694824219, + "Acc.towel": 0.7388999938964844, + "Acc.light": 0.33439998626708983, + "Acc.truck": 0.45279998779296876, + "Acc.tower": 0.4197999954223633, + "Acc.chandelier": 0.7963999938964844, + "Acc.awning": 0.5070000076293946, + "Acc.streetlight": 0.22649999618530273, + "Acc.booth": 0.395099983215332, + "Acc.television receiver": 0.8054000091552734, + "Acc.airplane": 0.7116000366210937, + "Acc.dirt track": 0.18510000228881837, + "Acc.apparel": 0.5384000015258789, + "Acc.pole": 0.235, + "Acc.land": 0.0352999997138977, + "Acc.bannister": 0.16969999313354492, + "Acc.escalator": 0.397599983215332, + "Acc.ottoman": 0.5884999847412109, + "Acc.bottle": 0.4311999893188477, + "Acc.buffet": 0.6002000045776367, + "Acc.poster": 0.2613999938964844, + "Acc.stage": 0.255, + "Acc.van": 0.520099983215332, + "Acc.ship": 0.5018000030517578, + "Acc.fountain": 0.2134000015258789, + "Acc.conveyer belt": 0.8945999908447265, + "Acc.canopy": 0.3975, + "Acc.washer": 0.7218000030517578, + "Acc.plaything": 0.6022999954223632, + "Acc.swimming pool": 0.8351999664306641, + "Acc.stool": 0.45630001068115233, + "Acc.barrel": 0.635, + "Acc.basket": 0.3295999908447266, + "Acc.waterfall": 0.8156999969482421, + "Acc.tent": 0.9848999786376953, + "Acc.bag": 0.15270000457763672, + "Acc.minibike": 0.6554000091552734, + "Acc.cradle": 0.9787000274658203, + "Acc.oven": 0.5608000183105468, + "Acc.ball": 0.5611000061035156, + "Acc.food": 0.5740000152587891, + "Acc.step": 0.12159999847412109, + "Acc.tank": 0.6431999969482421, + "Acc.trade name": 0.31120000839233397, + "Acc.microwave": 0.48319999694824217, + "Acc.pot": 0.45549999237060546, + "Acc.animal": 0.6591999816894532, + "Acc.bicycle": 0.7240000152587891, + "Acc.lake": 0.6288999938964843, + "Acc.dishwasher": 0.6686000061035157, + "Acc.screen": 0.9141000366210937, + "Acc.blanket": 0.14229999542236327, + "Acc.sculpture": 0.7116000366210937, + "Acc.hood": 0.572400016784668, + "Acc.sconce": 0.41509998321533204, + "Acc.vase": 0.385099983215332, + "Acc.traffic light": 0.397599983215332, + "Acc.tray": 0.034200000762939456, + "Acc.ashcan": 0.5268000030517578, + "Acc.fan": 0.6595999908447265, + "Acc.pier": 0.48380001068115236, + "Acc.crt screen": 0.08060000419616699, + "Acc.plate": 0.6361999893188477, + "Acc.monitor": 0.12130000114440918, + "Acc.bulletin board": 0.6063999938964844, + "Acc.shower": 0.005299999713897705, + "Acc.radiator": 0.6052000045776367, + "Acc.glass": 0.0884000015258789, + "Acc.clock": 0.33049999237060546, + "Acc.flag": 0.44979999542236326 + } + }, + "85": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8320000000000001, + "mIoU": 0.489, + "mAcc": 0.6031, + "IoU.wall": 0.7698000335693359, + "IoU.building": 0.8312000274658203, + "IoU.sky": 0.9383999633789063, + "IoU.floor": 0.8234999847412109, + "IoU.tree": 0.7445999908447266, + "IoU.ceiling": 0.8379000091552734, + "IoU.road": 0.8362000274658203, + "IoU.bed ": 0.8898999786376953, + "IoU.windowpane": 0.6266999816894532, + "IoU.grass": 0.6901000213623046, + "IoU.cabinet": 0.6225, + "IoU.sidewalk": 0.655, + "IoU.person": 0.800199966430664, + "IoU.earth": 0.39, + "IoU.door": 0.49700000762939456, + "IoU.table": 0.5979000091552734, + "IoU.mountain": 0.5761000061035156, + "IoU.plant": 0.5186999893188476, + "IoU.curtain": 0.7251000213623047, + "IoU.chair": 0.544099998474121, + "IoU.car": 0.8373999786376953, + "IoU.water": 0.5399000167846679, + "IoU.painting": 0.6875, + "IoU.sofa": 0.6809999847412109, + "IoU.shelf": 0.4438000106811523, + "IoU.house": 0.5009000015258789, + "IoU.sea": 0.5959000015258789, + "IoU.mirror": 0.6791000366210938, + "IoU.rug": 0.6708999633789062, + "IoU.field": 0.32540000915527345, + "IoU.armchair": 0.4484000015258789, + "IoU.seat": 0.6652999877929687, + "IoU.fence": 0.43099998474121093, + "IoU.desk": 0.4702999877929688, + "IoU.rock": 0.48529998779296873, + "IoU.wardrobe": 0.5741999816894531, + "IoU.lamp": 0.5627999877929688, + "IoU.bathtub": 0.7987000274658204, + "IoU.railing": 0.37970001220703126, + "IoU.cushion": 0.567599983215332, + "IoU.base": 0.3004999923706055, + "IoU.box": 0.2265999984741211, + "IoU.column": 0.4856999969482422, + "IoU.signboard": 0.35310001373291017, + "IoU.chest of drawers": 0.37189998626708987, + "IoU.counter": 0.4234999847412109, + "IoU.sand": 0.5690000152587891, + "IoU.sink": 0.6709999847412109, + "IoU.skyscraper": 0.5470000076293945, + "IoU.fireplace": 0.6994000244140625, + "IoU.refrigerator": 0.7573999786376953, + "IoU.grandstand": 0.47259998321533203, + "IoU.path": 0.22540000915527345, + "IoU.stairs": 0.2957999992370606, + "IoU.runway": 0.6675, + "IoU.case": 0.6719000244140625, + "IoU.pool table": 0.9309999847412109, + "IoU.pillow": 0.5886999893188477, + "IoU.screen door": 0.5161999893188477, + "IoU.stairway": 0.35810001373291017, + "IoU.river": 0.19840000152587892, + "IoU.bridge": 0.6754000091552734, + "IoU.bookcase": 0.33619998931884765, + "IoU.blind": 0.40970001220703123, + "IoU.coffee table": 0.6202000045776367, + "IoU.toilet": 0.7830999755859375, + "IoU.flower": 0.35520000457763673, + "IoU.book": 0.44720001220703126, + "IoU.hill": 0.13039999961853027, + "IoU.bench": 0.48509998321533204, + "IoU.countertop": 0.47139999389648435, + "IoU.stove": 0.6868000030517578, + "IoU.palm": 0.47119998931884766, + "IoU.kitchen island": 0.42270000457763673, + "IoU.computer": 0.7423000335693359, + "IoU.swivel chair": 0.4406999969482422, + "IoU.boat": 0.696500015258789, + "IoU.bar": 0.6075, + "IoU.arcade machine": 0.34330001831054685, + "IoU.hovel": 0.5061999893188477, + "IoU.bus": 0.9030999755859375, + "IoU.towel": 0.6243999862670898, + "IoU.light": 0.34369998931884765, + "IoU.truck": 0.3164999961853027, + "IoU.tower": 0.31, + "IoU.chandelier": 0.6268000030517578, + "IoU.awning": 0.2581999969482422, + "IoU.streetlight": 0.15310000419616698, + "IoU.booth": 0.33299999237060546, + "IoU.television receiver": 0.7023000335693359, + "IoU.airplane": 0.6188000106811523, + "IoU.dirt track": 0.0027000001072883606, + "IoU.apparel": 0.32, + "IoU.pole": 0.17549999237060546, + "IoU.land": 0.022699999809265136, + "IoU.bannister": 0.11949999809265137, + "IoU.escalator": 0.5595999908447266, + "IoU.ottoman": 0.4333000183105469, + "IoU.bottle": 0.28950000762939454, + "IoU.buffet": 0.3656999969482422, + "IoU.poster": 0.22540000915527345, + "IoU.stage": 0.15800000190734864, + "IoU.van": 0.3718000030517578, + "IoU.ship": 0.7643000030517578, + "IoU.fountain": 0.1803000068664551, + "IoU.conveyer belt": 0.7933000183105469, + "IoU.canopy": 0.20399999618530273, + "IoU.washer": 0.7230999755859375, + "IoU.plaything": 0.263700008392334, + "IoU.swimming pool": 0.7381999969482422, + "IoU.stool": 0.34060001373291016, + "IoU.barrel": 0.5465999984741211, + "IoU.basket": 0.24309999465942383, + "IoU.waterfall": 0.6648000335693359, + "IoU.tent": 0.8445999908447266, + "IoU.bag": 0.130600004196167, + "IoU.minibike": 0.6569000244140625, + "IoU.cradle": 0.8219000244140625, + "IoU.oven": 0.22940000534057617, + "IoU.ball": 0.43990001678466795, + "IoU.food": 0.5659000015258789, + "IoU.step": 0.09010000228881836, + "IoU.tank": 0.5563000106811523, + "IoU.trade name": 0.273700008392334, + "IoU.microwave": 0.4534000015258789, + "IoU.pot": 0.422400016784668, + "IoU.animal": 0.6583999633789063, + "IoU.bicycle": 0.5552999877929687, + "IoU.lake": 0.585, + "IoU.dishwasher": 0.5470000076293945, + "IoU.screen": 0.5695999908447266, + "IoU.blanket": 0.12859999656677246, + "IoU.sculpture": 0.547400016784668, + "IoU.hood": 0.552599983215332, + "IoU.sconce": 0.31170000076293947, + "IoU.vase": 0.3104999923706055, + "IoU.traffic light": 0.24209999084472655, + "IoU.tray": 0.019500000476837157, + "IoU.ashcan": 0.3640999984741211, + "IoU.fan": 0.5095999908447265, + "IoU.pier": 0.3168000030517578, + "IoU.crt screen": 0.057800002098083496, + "IoU.plate": 0.4941999816894531, + "IoU.monitor": 0.34700000762939454, + "IoU.bulletin board": 0.42970001220703125, + "IoU.shower": 0.0010000000149011613, + "IoU.radiator": 0.6122000122070312, + "IoU.glass": 0.08310000419616699, + "IoU.clock": 0.2752000045776367, + "IoU.flag": 0.4272999954223633, + "Acc.wall": 0.877300033569336, + "Acc.building": 0.9288999938964844, + "Acc.sky": 0.9766000366210937, + "Acc.floor": 0.9131999969482422, + "Acc.tree": 0.8666000366210938, + "Acc.ceiling": 0.9206999969482422, + "Acc.road": 0.8955999755859375, + "Acc.bed ": 0.9558999633789063, + "Acc.windowpane": 0.7815000152587891, + "Acc.grass": 0.8048999786376954, + "Acc.cabinet": 0.7304000091552735, + "Acc.sidewalk": 0.800199966430664, + "Acc.person": 0.92, + "Acc.earth": 0.5697999954223633, + "Acc.door": 0.6827999877929688, + "Acc.table": 0.7491000366210937, + "Acc.mountain": 0.7055999755859375, + "Acc.plant": 0.6416000366210938, + "Acc.curtain": 0.8556999969482422, + "Acc.chair": 0.6615000152587891, + "Acc.car": 0.925199966430664, + "Acc.water": 0.7169000244140625, + "Acc.painting": 0.8651999664306641, + "Acc.sofa": 0.8370999908447265, + "Acc.shelf": 0.6494000244140625, + "Acc.house": 0.6770999908447266, + "Acc.sea": 0.7848999786376953, + "Acc.mirror": 0.7691000366210937, + "Acc.rug": 0.7486000061035156, + "Acc.field": 0.5259999847412109, + "Acc.armchair": 0.6705000305175781, + "Acc.seat": 0.8379000091552734, + "Acc.fence": 0.589900016784668, + "Acc.desk": 0.6587000274658203, + "Acc.rock": 0.7448999786376953, + "Acc.wardrobe": 0.6845999908447266, + "Acc.lamp": 0.6951000213623046, + "Acc.bathtub": 0.8562000274658204, + "Acc.railing": 0.5479999923706055, + "Acc.cushion": 0.7030999755859375, + "Acc.base": 0.4534000015258789, + "Acc.box": 0.30489999771118165, + "Acc.column": 0.6022999954223632, + "Acc.signboard": 0.46290000915527346, + "Acc.chest of drawers": 0.6243999862670898, + "Acc.counter": 0.5215000152587891, + "Acc.sand": 0.7380000305175781, + "Acc.sink": 0.74, + "Acc.skyscraper": 0.6254999923706055, + "Acc.fireplace": 0.8888999938964843, + "Acc.refrigerator": 0.8109999847412109, + "Acc.grandstand": 0.721500015258789, + "Acc.path": 0.3081999969482422, + "Acc.stairs": 0.3913999938964844, + "Acc.runway": 0.8483000183105469, + "Acc.case": 0.8295999908447266, + "Acc.pool table": 0.9712999725341797, + "Acc.pillow": 0.6848999786376954, + "Acc.screen door": 0.5977999877929687, + "Acc.stairway": 0.46040000915527346, + "Acc.river": 0.3345999908447266, + "Acc.bridge": 0.8125, + "Acc.bookcase": 0.5581000137329102, + "Acc.blind": 0.45689998626708983, + "Acc.coffee table": 0.8086000061035157, + "Acc.toilet": 0.8894000244140625, + "Acc.flower": 0.540099983215332, + "Acc.book": 0.6554000091552734, + "Acc.hill": 0.23079999923706054, + "Acc.bench": 0.5745999908447266, + "Acc.countertop": 0.6243000030517578, + "Acc.stove": 0.7998000335693359, + "Acc.palm": 0.6762999725341797, + "Acc.kitchen island": 0.6805000305175781, + "Acc.computer": 0.8755999755859375, + "Acc.swivel chair": 0.622599983215332, + "Acc.boat": 0.8506999969482422, + "Acc.bar": 0.7602999877929687, + "Acc.arcade machine": 0.37240001678466794, + "Acc.hovel": 0.5829000091552734, + "Acc.bus": 0.9455999755859374, + "Acc.towel": 0.7563999938964844, + "Acc.light": 0.37740001678466795, + "Acc.truck": 0.4570999908447266, + "Acc.tower": 0.4125, + "Acc.chandelier": 0.7869999694824219, + "Acc.awning": 0.2986000061035156, + "Acc.streetlight": 0.17790000915527343, + "Acc.booth": 0.41439998626708985, + "Acc.television receiver": 0.8293000030517578, + "Acc.airplane": 0.6870999908447266, + "Acc.dirt track": 0.011200000047683716, + "Acc.apparel": 0.46580001831054685, + "Acc.pole": 0.23049999237060548, + "Acc.land": 0.03009999990463257, + "Acc.bannister": 0.16540000915527345, + "Acc.escalator": 0.7738999938964843, + "Acc.ottoman": 0.5720999908447265, + "Acc.bottle": 0.4072000122070312, + "Acc.buffet": 0.4333000183105469, + "Acc.poster": 0.306299991607666, + "Acc.stage": 0.23209999084472657, + "Acc.van": 0.48209999084472654, + "Acc.ship": 0.8313999938964843, + "Acc.fountain": 0.21549999237060546, + "Acc.conveyer belt": 0.9141000366210937, + "Acc.canopy": 0.32599998474121095, + "Acc.washer": 0.7283999633789062, + "Acc.plaything": 0.38860000610351564, + "Acc.swimming pool": 0.8119000244140625, + "Acc.stool": 0.40380001068115234, + "Acc.barrel": 0.6470999908447266, + "Acc.basket": 0.3277000045776367, + "Acc.waterfall": 0.7726000213623047, + "Acc.tent": 0.9844999694824219, + "Acc.bag": 0.149399995803833, + "Acc.minibike": 0.7822000122070313, + "Acc.cradle": 0.9701000213623047, + "Acc.oven": 0.5861999893188476, + "Acc.ball": 0.46540000915527346, + "Acc.food": 0.6952999877929688, + "Acc.step": 0.12380000114440919, + "Acc.tank": 0.645, + "Acc.trade name": 0.3129999923706055, + "Acc.microwave": 0.5034000015258789, + "Acc.pot": 0.49450000762939456, + "Acc.animal": 0.7061000061035156, + "Acc.bicycle": 0.7312999725341797, + "Acc.lake": 0.6122000122070312, + "Acc.dishwasher": 0.6481999969482422, + "Acc.screen": 0.7405000305175782, + "Acc.blanket": 0.13960000038146972, + "Acc.sculpture": 0.6763999938964844, + "Acc.hood": 0.6063999938964844, + "Acc.sconce": 0.4031999969482422, + "Acc.vase": 0.387599983215332, + "Acc.traffic light": 0.355, + "Acc.tray": 0.025099999904632568, + "Acc.ashcan": 0.4906999969482422, + "Acc.fan": 0.6609999847412109, + "Acc.pier": 0.4718000030517578, + "Acc.crt screen": 0.1084000015258789, + "Acc.plate": 0.6531999969482422, + "Acc.monitor": 0.48020000457763673, + "Acc.bulletin board": 0.6163999938964844, + "Acc.shower": 0.008199999928474427, + "Acc.radiator": 0.6929000091552734, + "Acc.glass": 0.08550000190734863, + "Acc.clock": 0.3370999908447266, + "Acc.flag": 0.47369998931884766 + } + }, + "86": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8338, + "mIoU": 0.4894, + "mAcc": 0.6014, + "IoU.wall": 0.7756999969482422, + "IoU.building": 0.8273999786376953, + "IoU.sky": 0.94, + "IoU.floor": 0.8213999938964843, + "IoU.tree": 0.7544999694824219, + "IoU.ceiling": 0.839800033569336, + "IoU.road": 0.8294999694824219, + "IoU.bed ": 0.8906999969482422, + "IoU.windowpane": 0.6236999893188476, + "IoU.grass": 0.7041999816894531, + "IoU.cabinet": 0.6231000137329101, + "IoU.sidewalk": 0.6487000274658203, + "IoU.person": 0.8056999969482422, + "IoU.earth": 0.3793999862670898, + "IoU.door": 0.5084999847412109, + "IoU.table": 0.6025, + "IoU.mountain": 0.5709999847412109, + "IoU.plant": 0.5386999893188477, + "IoU.curtain": 0.7320999908447265, + "IoU.chair": 0.5613999938964844, + "IoU.car": 0.8343000030517578, + "IoU.water": 0.5800999832153321, + "IoU.painting": 0.6961000061035156, + "IoU.sofa": 0.6779000091552735, + "IoU.shelf": 0.4316999816894531, + "IoU.house": 0.47189998626708984, + "IoU.sea": 0.6930000305175781, + "IoU.mirror": 0.6731999969482422, + "IoU.rug": 0.6377999877929688, + "IoU.field": 0.4047999954223633, + "IoU.armchair": 0.4420000076293945, + "IoU.seat": 0.6651000213623047, + "IoU.fence": 0.47869998931884766, + "IoU.desk": 0.492400016784668, + "IoU.rock": 0.4859000015258789, + "IoU.wardrobe": 0.5631000137329102, + "IoU.lamp": 0.5608000183105468, + "IoU.bathtub": 0.8380000305175781, + "IoU.railing": 0.38279998779296875, + "IoU.cushion": 0.5786999893188477, + "IoU.base": 0.33560001373291015, + "IoU.box": 0.2352000045776367, + "IoU.column": 0.4704000091552734, + "IoU.signboard": 0.3572999954223633, + "IoU.chest of drawers": 0.349900016784668, + "IoU.counter": 0.40889999389648435, + "IoU.sand": 0.4672999954223633, + "IoU.sink": 0.6737000274658204, + "IoU.skyscraper": 0.5072000122070313, + "IoU.fireplace": 0.6995999908447266, + "IoU.refrigerator": 0.7295999908447266, + "IoU.grandstand": 0.5106999969482422, + "IoU.path": 0.18920000076293944, + "IoU.stairs": 0.293700008392334, + "IoU.runway": 0.6381000137329101, + "IoU.case": 0.6465000152587891, + "IoU.pool table": 0.930199966430664, + "IoU.pillow": 0.5970999908447265, + "IoU.screen door": 0.6070999908447265, + "IoU.stairway": 0.3683000183105469, + "IoU.river": 0.17780000686645508, + "IoU.bridge": 0.5988999938964844, + "IoU.bookcase": 0.32619998931884764, + "IoU.blind": 0.4054000091552734, + "IoU.coffee table": 0.6218000030517579, + "IoU.toilet": 0.7661000061035156, + "IoU.flower": 0.3766999816894531, + "IoU.book": 0.4483000183105469, + "IoU.hill": 0.1534000015258789, + "IoU.bench": 0.4527000045776367, + "IoU.countertop": 0.5468999862670898, + "IoU.stove": 0.7340000152587891, + "IoU.palm": 0.49520000457763674, + "IoU.kitchen island": 0.482400016784668, + "IoU.computer": 0.7506999969482422, + "IoU.swivel chair": 0.5202999877929687, + "IoU.boat": 0.6468000030517578, + "IoU.bar": 0.552599983215332, + "IoU.arcade machine": 0.7138999938964844, + "IoU.hovel": 0.5538999938964844, + "IoU.bus": 0.8904000091552734, + "IoU.towel": 0.6341999816894531, + "IoU.light": 0.33779998779296877, + "IoU.truck": 0.22799999237060548, + "IoU.tower": 0.2723999977111816, + "IoU.chandelier": 0.6313999938964844, + "IoU.awning": 0.31020000457763675, + "IoU.streetlight": 0.16170000076293944, + "IoU.booth": 0.43, + "IoU.television receiver": 0.7066000366210937, + "IoU.airplane": 0.6227999877929687, + "IoU.dirt track": 0.0033000001311302186, + "IoU.apparel": 0.3413999938964844, + "IoU.pole": 0.19040000915527344, + "IoU.land": 0.01190000057220459, + "IoU.bannister": 0.10199999809265137, + "IoU.escalator": 0.5411000061035156, + "IoU.ottoman": 0.4911999893188477, + "IoU.bottle": 0.3606999969482422, + "IoU.buffet": 0.3659000015258789, + "IoU.poster": 0.24709999084472656, + "IoU.stage": 0.13859999656677247, + "IoU.van": 0.3766999816894531, + "IoU.ship": 0.11670000076293946, + "IoU.fountain": 0.17879999160766602, + "IoU.conveyer belt": 0.7975, + "IoU.canopy": 0.20959999084472655, + "IoU.washer": 0.6787999725341797, + "IoU.plaything": 0.38029998779296875, + "IoU.swimming pool": 0.7390000152587891, + "IoU.stool": 0.30739999771118165, + "IoU.barrel": 0.5709000015258789, + "IoU.basket": 0.31860000610351563, + "IoU.waterfall": 0.5359000015258789, + "IoU.tent": 0.9402999877929688, + "IoU.bag": 0.1234000015258789, + "IoU.minibike": 0.640199966430664, + "IoU.cradle": 0.8462000274658203, + "IoU.oven": 0.4765999984741211, + "IoU.ball": 0.251200008392334, + "IoU.food": 0.5511999893188476, + "IoU.step": 0.08979999542236328, + "IoU.tank": 0.5754999923706055, + "IoU.trade name": 0.27350000381469725, + "IoU.microwave": 0.798499984741211, + "IoU.pot": 0.44290000915527344, + "IoU.animal": 0.6225, + "IoU.bicycle": 0.5243000030517578, + "IoU.lake": 0.06860000133514405, + "IoU.dishwasher": 0.515099983215332, + "IoU.screen": 0.5141999816894531, + "IoU.blanket": 0.13760000228881836, + "IoU.sculpture": 0.5438999938964844, + "IoU.hood": 0.5595000076293946, + "IoU.sconce": 0.32939998626708983, + "IoU.vase": 0.31940000534057617, + "IoU.traffic light": 0.2613999938964844, + "IoU.tray": 0.019700000286102293, + "IoU.ashcan": 0.37490001678466794, + "IoU.fan": 0.4958000183105469, + "IoU.pier": 0.3945000076293945, + "IoU.crt screen": 0.0475, + "IoU.plate": 0.47759998321533204, + "IoU.monitor": 0.2531999969482422, + "IoU.bulletin board": 0.349900016784668, + "IoU.shower": 0.009200000166893006, + "IoU.radiator": 0.5452000045776367, + "IoU.glass": 0.1015999984741211, + "IoU.clock": 0.2556999969482422, + "IoU.flag": 0.5488999938964844, + "Acc.wall": 0.8816000366210938, + "Acc.building": 0.9226999664306641, + "Acc.sky": 0.9762999725341797, + "Acc.floor": 0.9126000213623047, + "Acc.tree": 0.8688999938964844, + "Acc.ceiling": 0.9266000366210938, + "Acc.road": 0.8944999694824218, + "Acc.bed ": 0.9518000030517578, + "Acc.windowpane": 0.7752999877929687, + "Acc.grass": 0.8316999816894531, + "Acc.cabinet": 0.7318000030517579, + "Acc.sidewalk": 0.7931999969482422, + "Acc.person": 0.9216999816894531, + "Acc.earth": 0.5611000061035156, + "Acc.door": 0.6973999786376953, + "Acc.table": 0.7633000183105468, + "Acc.mountain": 0.6993000030517578, + "Acc.plant": 0.6636000061035157, + "Acc.curtain": 0.8562000274658204, + "Acc.chair": 0.6893000030517578, + "Acc.car": 0.919000015258789, + "Acc.water": 0.7705999755859375, + "Acc.painting": 0.8586000061035156, + "Acc.sofa": 0.8176000213623047, + "Acc.shelf": 0.6043000030517578, + "Acc.house": 0.715, + "Acc.sea": 0.8758999633789063, + "Acc.mirror": 0.7661000061035156, + "Acc.rug": 0.7219000244140625, + "Acc.field": 0.5856000137329102, + "Acc.armchair": 0.6769999694824219, + "Acc.seat": 0.8258000183105468, + "Acc.fence": 0.6594999694824218, + "Acc.desk": 0.6858999633789062, + "Acc.rock": 0.7173000335693359, + "Acc.wardrobe": 0.6863999938964844, + "Acc.lamp": 0.6940000152587891, + "Acc.bathtub": 0.8898000335693359, + "Acc.railing": 0.5286000061035157, + "Acc.cushion": 0.7280999755859375, + "Acc.base": 0.5402999877929687, + "Acc.box": 0.32990001678466796, + "Acc.column": 0.6074000167846679, + "Acc.signboard": 0.4620999908447266, + "Acc.chest of drawers": 0.5688000106811524, + "Acc.counter": 0.5111999893188477, + "Acc.sand": 0.6309999847412109, + "Acc.sink": 0.7444999694824219, + "Acc.skyscraper": 0.5918000030517578, + "Acc.fireplace": 0.8936000061035156, + "Acc.refrigerator": 0.8080000305175781, + "Acc.grandstand": 0.759000015258789, + "Acc.path": 0.24950000762939453, + "Acc.stairs": 0.3783000183105469, + "Acc.runway": 0.8126000213623047, + "Acc.case": 0.8141999816894532, + "Acc.pool table": 0.9679000091552734, + "Acc.pillow": 0.7016000366210937, + "Acc.screen door": 0.6987000274658203, + "Acc.stairway": 0.4588999938964844, + "Acc.river": 0.2777000045776367, + "Acc.bridge": 0.7188999938964844, + "Acc.bookcase": 0.5672999954223633, + "Acc.blind": 0.44, + "Acc.coffee table": 0.8008999633789062, + "Acc.toilet": 0.8981999969482422, + "Acc.flower": 0.5706000137329101, + "Acc.book": 0.6286999893188476, + "Acc.hill": 0.25629999160766603, + "Acc.bench": 0.5272000122070313, + "Acc.countertop": 0.6875, + "Acc.stove": 0.8229000091552734, + "Acc.palm": 0.6980999755859375, + "Acc.kitchen island": 0.739800033569336, + "Acc.computer": 0.8937000274658203, + "Acc.swivel chair": 0.6676000213623047, + "Acc.boat": 0.834800033569336, + "Acc.bar": 0.6809999847412109, + "Acc.arcade machine": 0.7661000061035156, + "Acc.hovel": 0.6113000106811524, + "Acc.bus": 0.9338999938964844, + "Acc.towel": 0.7695999908447265, + "Acc.light": 0.3622999954223633, + "Acc.truck": 0.3243000030517578, + "Acc.tower": 0.369900016784668, + "Acc.chandelier": 0.7868000030517578, + "Acc.awning": 0.3806999969482422, + "Acc.streetlight": 0.18899999618530272, + "Acc.booth": 0.48220001220703124, + "Acc.television receiver": 0.8227999877929687, + "Acc.airplane": 0.7005999755859375, + "Acc.dirt track": 0.013899999856948852, + "Acc.apparel": 0.4625, + "Acc.pole": 0.25190000534057616, + "Acc.land": 0.019500000476837157, + "Acc.bannister": 0.1559000015258789, + "Acc.escalator": 0.8062000274658203, + "Acc.ottoman": 0.659000015258789, + "Acc.bottle": 0.6309999847412109, + "Acc.buffet": 0.44369998931884763, + "Acc.poster": 0.3135000038146973, + "Acc.stage": 0.2538999938964844, + "Acc.van": 0.47130001068115235, + "Acc.ship": 0.12789999961853027, + "Acc.fountain": 0.2125, + "Acc.conveyer belt": 0.9177999877929688, + "Acc.canopy": 0.29270000457763673, + "Acc.washer": 0.7340000152587891, + "Acc.plaything": 0.5709999847412109, + "Acc.swimming pool": 0.8252999877929688, + "Acc.stool": 0.37759998321533206, + "Acc.barrel": 0.643499984741211, + "Acc.basket": 0.38220001220703126, + "Acc.waterfall": 0.63, + "Acc.tent": 0.9837999725341797, + "Acc.bag": 0.13800000190734862, + "Acc.minibike": 0.7038999938964844, + "Acc.cradle": 0.9684999847412109, + "Acc.oven": 0.6630999755859375, + "Acc.ball": 0.2640999984741211, + "Acc.food": 0.6313000106811524, + "Acc.step": 0.11920000076293945, + "Acc.tank": 0.655, + "Acc.trade name": 0.3096999931335449, + "Acc.microwave": 0.8683000183105469, + "Acc.pot": 0.5243999862670898, + "Acc.animal": 0.6615000152587891, + "Acc.bicycle": 0.72, + "Acc.lake": 0.0728000020980835, + "Acc.dishwasher": 0.6229000091552734, + "Acc.screen": 0.7301000213623047, + "Acc.blanket": 0.14960000038146973, + "Acc.sculpture": 0.6894999694824219, + "Acc.hood": 0.6395999908447265, + "Acc.sconce": 0.41189998626708985, + "Acc.vase": 0.40430000305175784, + "Acc.traffic light": 0.3738000106811523, + "Acc.tray": 0.02190000057220459, + "Acc.ashcan": 0.492400016784668, + "Acc.fan": 0.590900001525879, + "Acc.pier": 0.5472000122070313, + "Acc.crt screen": 0.11229999542236328, + "Acc.plate": 0.6561000061035156, + "Acc.monitor": 0.31420000076293947, + "Acc.bulletin board": 0.515, + "Acc.shower": 0.05, + "Acc.radiator": 0.5990000152587891, + "Acc.glass": 0.10579999923706054, + "Acc.clock": 0.3104999923706055, + "Acc.flag": 0.6052999877929688 + } + }, + "87": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8347, + "mIoU": 0.49119999999999997, + "mAcc": 0.601, + "IoU.wall": 0.7773999786376953, + "IoU.building": 0.8304000091552735, + "IoU.sky": 0.9379000091552734, + "IoU.floor": 0.8183999633789063, + "IoU.tree": 0.7444999694824219, + "IoU.ceiling": 0.8394999694824219, + "IoU.road": 0.8291999816894531, + "IoU.bed ": 0.8958000183105469, + "IoU.windowpane": 0.6204000091552735, + "IoU.grass": 0.6926000213623047, + "IoU.cabinet": 0.6366999816894531, + "IoU.sidewalk": 0.6512000274658203, + "IoU.person": 0.8081999969482422, + "IoU.earth": 0.37779998779296875, + "IoU.door": 0.5163999938964844, + "IoU.table": 0.6081000137329101, + "IoU.mountain": 0.5813999938964843, + "IoU.plant": 0.524900016784668, + "IoU.curtain": 0.725, + "IoU.chair": 0.567400016784668, + "IoU.car": 0.842699966430664, + "IoU.water": 0.5720000076293945, + "IoU.painting": 0.7070999908447265, + "IoU.sofa": 0.6887000274658203, + "IoU.shelf": 0.42770000457763674, + "IoU.house": 0.4816999816894531, + "IoU.sea": 0.6769999694824219, + "IoU.mirror": 0.6901000213623046, + "IoU.rug": 0.6404000091552734, + "IoU.field": 0.4022999954223633, + "IoU.armchair": 0.4418000030517578, + "IoU.seat": 0.67, + "IoU.fence": 0.46610000610351565, + "IoU.desk": 0.4906000137329102, + "IoU.rock": 0.5011999893188477, + "IoU.wardrobe": 0.5663000106811523, + "IoU.lamp": 0.5613999938964844, + "IoU.bathtub": 0.8516999816894532, + "IoU.railing": 0.3838999938964844, + "IoU.cushion": 0.5920999908447265, + "IoU.base": 0.30590000152587893, + "IoU.box": 0.2384000015258789, + "IoU.column": 0.48830001831054687, + "IoU.signboard": 0.35439998626708985, + "IoU.chest of drawers": 0.34240001678466797, + "IoU.counter": 0.39240001678466796, + "IoU.sand": 0.4722999954223633, + "IoU.sink": 0.6997000122070313, + "IoU.skyscraper": 0.5197000122070312, + "IoU.fireplace": 0.715, + "IoU.refrigerator": 0.7495999908447266, + "IoU.grandstand": 0.47189998626708984, + "IoU.path": 0.196299991607666, + "IoU.stairs": 0.27030000686645506, + "IoU.runway": 0.6002999877929688, + "IoU.case": 0.6113999938964844, + "IoU.pool table": 0.9326999664306641, + "IoU.pillow": 0.5772000122070312, + "IoU.screen door": 0.5611999893188476, + "IoU.stairway": 0.4275, + "IoU.river": 0.1793000030517578, + "IoU.bridge": 0.6252000045776367, + "IoU.bookcase": 0.345, + "IoU.blind": 0.40099998474121096, + "IoU.coffee table": 0.6363000106811524, + "IoU.toilet": 0.8386000061035156, + "IoU.flower": 0.41970001220703124, + "IoU.book": 0.42830001831054687, + "IoU.hill": 0.13989999771118164, + "IoU.bench": 0.45689998626708983, + "IoU.countertop": 0.5693000030517578, + "IoU.stove": 0.7679000091552735, + "IoU.palm": 0.4911000061035156, + "IoU.kitchen island": 0.465099983215332, + "IoU.computer": 0.7416000366210938, + "IoU.swivel chair": 0.5141999816894531, + "IoU.boat": 0.6583999633789063, + "IoU.bar": 0.5308000183105469, + "IoU.arcade machine": 0.5061000061035156, + "IoU.hovel": 0.21149999618530274, + "IoU.bus": 0.9031999969482422, + "IoU.towel": 0.6140000152587891, + "IoU.light": 0.29149999618530276, + "IoU.truck": 0.23909999847412108, + "IoU.tower": 0.2297999954223633, + "IoU.chandelier": 0.6347999954223633, + "IoU.awning": 0.24729999542236328, + "IoU.streetlight": 0.17040000915527342, + "IoU.booth": 0.3575, + "IoU.television receiver": 0.7068000030517578, + "IoU.airplane": 0.6158000183105469, + "IoU.dirt track": 0.03069999933242798, + "IoU.apparel": 0.3270000076293945, + "IoU.pole": 0.1725, + "IoU.land": 0.07730000019073487, + "IoU.bannister": 0.06010000228881836, + "IoU.escalator": 0.5429999923706055, + "IoU.ottoman": 0.504000015258789, + "IoU.bottle": 0.369900016784668, + "IoU.buffet": 0.4306999969482422, + "IoU.poster": 0.33869998931884765, + "IoU.stage": 0.15529999732971192, + "IoU.van": 0.31260000228881835, + "IoU.ship": 0.7666000366210938, + "IoU.fountain": 0.2134000015258789, + "IoU.conveyer belt": 0.7705999755859375, + "IoU.canopy": 0.27799999237060546, + "IoU.washer": 0.6234999847412109, + "IoU.plaything": 0.37349998474121093, + "IoU.swimming pool": 0.7677999877929688, + "IoU.stool": 0.3818000030517578, + "IoU.barrel": 0.2193000030517578, + "IoU.basket": 0.2628000068664551, + "IoU.waterfall": 0.5088000106811523, + "IoU.tent": 0.9527999877929687, + "IoU.bag": 0.15130000114440917, + "IoU.minibike": 0.7131999969482422, + "IoU.cradle": 0.7591999816894531, + "IoU.oven": 0.4290999984741211, + "IoU.ball": 0.3358000183105469, + "IoU.food": 0.5325999832153321, + "IoU.step": 0.07949999809265136, + "IoU.tank": 0.5922000122070312, + "IoU.trade name": 0.2525, + "IoU.microwave": 0.8087999725341797, + "IoU.pot": 0.49200000762939455, + "IoU.animal": 0.6261000061035156, + "IoU.bicycle": 0.5643999862670899, + "IoU.lake": 0.6588999938964843, + "IoU.dishwasher": 0.6116999816894532, + "IoU.screen": 0.5186999893188476, + "IoU.blanket": 0.1463000011444092, + "IoU.sculpture": 0.6277000045776367, + "IoU.hood": 0.4866999816894531, + "IoU.sconce": 0.27430000305175783, + "IoU.vase": 0.31510000228881835, + "IoU.traffic light": 0.24819999694824219, + "IoU.tray": 0.015499999523162842, + "IoU.ashcan": 0.32369998931884764, + "IoU.fan": 0.4311999893188477, + "IoU.pier": 0.29350000381469726, + "IoU.crt screen": 0.030199999809265136, + "IoU.plate": 0.5004999923706055, + "IoU.monitor": 0.19600000381469726, + "IoU.bulletin board": 0.35150001525878904, + "IoU.shower": 0.008100000023841859, + "IoU.radiator": 0.5331000137329102, + "IoU.glass": 0.10090000152587891, + "IoU.clock": 0.278700008392334, + "IoU.flag": 0.44610000610351563, + "Acc.wall": 0.8883000183105468, + "Acc.building": 0.9259999847412109, + "Acc.sky": 0.9769999694824218, + "Acc.floor": 0.9075, + "Acc.tree": 0.8706999969482422, + "Acc.ceiling": 0.9295999908447266, + "Acc.road": 0.899000015258789, + "Acc.bed ": 0.9566999816894531, + "Acc.windowpane": 0.779000015258789, + "Acc.grass": 0.810199966430664, + "Acc.cabinet": 0.7516999816894532, + "Acc.sidewalk": 0.7918000030517578, + "Acc.person": 0.92, + "Acc.earth": 0.543499984741211, + "Acc.door": 0.6943000030517578, + "Acc.table": 0.768499984741211, + "Acc.mountain": 0.7173999786376953, + "Acc.plant": 0.6633999633789063, + "Acc.curtain": 0.8419000244140625, + "Acc.chair": 0.696500015258789, + "Acc.car": 0.9315000152587891, + "Acc.water": 0.727699966430664, + "Acc.painting": 0.8369999694824218, + "Acc.sofa": 0.8412000274658203, + "Acc.shelf": 0.6095999908447266, + "Acc.house": 0.6994999694824219, + "Acc.sea": 0.8919999694824219, + "Acc.mirror": 0.772300033569336, + "Acc.rug": 0.7266000366210937, + "Acc.field": 0.5936000061035156, + "Acc.armchair": 0.6279999923706054, + "Acc.seat": 0.8445999908447266, + "Acc.fence": 0.6288000106811523, + "Acc.desk": 0.6741999816894532, + "Acc.rock": 0.7269000244140625, + "Acc.wardrobe": 0.677300033569336, + "Acc.lamp": 0.6704000091552734, + "Acc.bathtub": 0.8925, + "Acc.railing": 0.5161999893188477, + "Acc.cushion": 0.7481999969482422, + "Acc.base": 0.5084999847412109, + "Acc.box": 0.31040000915527344, + "Acc.column": 0.6031999969482422, + "Acc.signboard": 0.4684000015258789, + "Acc.chest of drawers": 0.56, + "Acc.counter": 0.5215000152587891, + "Acc.sand": 0.6218000030517579, + "Acc.sink": 0.7713999938964844, + "Acc.skyscraper": 0.6134000015258789, + "Acc.fireplace": 0.9061000061035156, + "Acc.refrigerator": 0.8105000305175781, + "Acc.grandstand": 0.7404000091552735, + "Acc.path": 0.2618000030517578, + "Acc.stairs": 0.35509998321533204, + "Acc.runway": 0.7638999938964843, + "Acc.case": 0.7562999725341797, + "Acc.pool table": 0.9626000213623047, + "Acc.pillow": 0.6601000213623047, + "Acc.screen door": 0.6427999877929688, + "Acc.stairway": 0.552400016784668, + "Acc.river": 0.3185000038146973, + "Acc.bridge": 0.7227999877929687, + "Acc.bookcase": 0.5918000030517578, + "Acc.blind": 0.43689998626708987, + "Acc.coffee table": 0.7798999786376953, + "Acc.toilet": 0.8955999755859375, + "Acc.flower": 0.5981000137329101, + "Acc.book": 0.5911000061035157, + "Acc.hill": 0.24059999465942383, + "Acc.bench": 0.5504999923706054, + "Acc.countertop": 0.7161000061035157, + "Acc.stove": 0.8362999725341796, + "Acc.palm": 0.6716000366210938, + "Acc.kitchen island": 0.6202999877929688, + "Acc.computer": 0.8837999725341796, + "Acc.swivel chair": 0.6365999984741211, + "Acc.boat": 0.829800033569336, + "Acc.bar": 0.6316999816894531, + "Acc.arcade machine": 0.5409999847412109, + "Acc.hovel": 0.23090000152587892, + "Acc.bus": 0.9458999633789062, + "Acc.towel": 0.7654000091552734, + "Acc.light": 0.3061000061035156, + "Acc.truck": 0.332599983215332, + "Acc.tower": 0.36540000915527343, + "Acc.chandelier": 0.7719000244140625, + "Acc.awning": 0.29670000076293945, + "Acc.streetlight": 0.19889999389648438, + "Acc.booth": 0.46360000610351565, + "Acc.television receiver": 0.7956999969482422, + "Acc.airplane": 0.6881999969482422, + "Acc.dirt track": 0.1361999988555908, + "Acc.apparel": 0.44240001678466795, + "Acc.pole": 0.21940000534057616, + "Acc.land": 0.10189999580383301, + "Acc.bannister": 0.07800000190734863, + "Acc.escalator": 0.6966000366210937, + "Acc.ottoman": 0.6609999847412109, + "Acc.bottle": 0.582599983215332, + "Acc.buffet": 0.4954999923706055, + "Acc.poster": 0.5172000122070313, + "Acc.stage": 0.23940000534057618, + "Acc.van": 0.3747999954223633, + "Acc.ship": 0.8156999969482421, + "Acc.fountain": 0.21739999771118165, + "Acc.conveyer belt": 0.9155000305175781, + "Acc.canopy": 0.3375, + "Acc.washer": 0.7258000183105469, + "Acc.plaything": 0.5593000030517579, + "Acc.swimming pool": 0.8825, + "Acc.stool": 0.4463999938964844, + "Acc.barrel": 0.6441000366210937, + "Acc.basket": 0.335, + "Acc.waterfall": 0.6006999969482422, + "Acc.tent": 0.977699966430664, + "Acc.bag": 0.17450000762939452, + "Acc.minibike": 0.8106999969482422, + "Acc.cradle": 0.96, + "Acc.oven": 0.5120000076293946, + "Acc.ball": 0.37220001220703125, + "Acc.food": 0.620099983215332, + "Acc.step": 0.10439999580383301, + "Acc.tank": 0.6497000122070312, + "Acc.trade name": 0.27790000915527346, + "Acc.microwave": 0.899000015258789, + "Acc.pot": 0.5829999923706055, + "Acc.animal": 0.6644000244140625, + "Acc.bicycle": 0.6873999786376953, + "Acc.lake": 0.6856999969482422, + "Acc.dishwasher": 0.7255000305175782, + "Acc.screen": 0.7318000030517579, + "Acc.blanket": 0.16399999618530273, + "Acc.sculpture": 0.7745999908447265, + "Acc.hood": 0.6161999893188477, + "Acc.sconce": 0.34599998474121096, + "Acc.vase": 0.41759998321533204, + "Acc.traffic light": 0.3593000030517578, + "Acc.tray": 0.01740000009536743, + "Acc.ashcan": 0.47080001831054685, + "Acc.fan": 0.48130001068115236, + "Acc.pier": 0.4388999938964844, + "Acc.crt screen": 0.07179999828338623, + "Acc.plate": 0.6712000274658203, + "Acc.monitor": 0.24209999084472655, + "Acc.bulletin board": 0.4702000045776367, + "Acc.shower": 0.0496999979019165, + "Acc.radiator": 0.5938000106811523, + "Acc.glass": 0.10420000076293945, + "Acc.clock": 0.30370000839233396, + "Acc.flag": 0.48529998779296873 + } + }, + "88": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8344, + "mIoU": 0.4889, + "mAcc": 0.588, + "IoU.wall": 0.7738999938964843, + "IoU.building": 0.8301999664306641, + "IoU.sky": 0.9361000061035156, + "IoU.floor": 0.8183999633789063, + "IoU.tree": 0.7401999664306641, + "IoU.ceiling": 0.8366999816894531, + "IoU.road": 0.83, + "IoU.bed ": 0.8941000366210937, + "IoU.windowpane": 0.6238000106811523, + "IoU.grass": 0.6758999633789062, + "IoU.cabinet": 0.6365000152587891, + "IoU.sidewalk": 0.655199966430664, + "IoU.person": 0.8094000244140624, + "IoU.earth": 0.37979999542236326, + "IoU.door": 0.5177999877929688, + "IoU.table": 0.6075, + "IoU.mountain": 0.5804000091552735, + "IoU.plant": 0.5288000106811523, + "IoU.curtain": 0.7333000183105469, + "IoU.chair": 0.5618999862670898, + "IoU.car": 0.8441999816894531, + "IoU.water": 0.5770000076293945, + "IoU.painting": 0.7245999908447266, + "IoU.sofa": 0.6891000366210938, + "IoU.shelf": 0.43290000915527344, + "IoU.house": 0.4520000076293945, + "IoU.sea": 0.6722000122070313, + "IoU.mirror": 0.7095999908447266, + "IoU.rug": 0.6504000091552734, + "IoU.field": 0.37020000457763674, + "IoU.armchair": 0.4325, + "IoU.seat": 0.6698999786376953, + "IoU.fence": 0.43389999389648437, + "IoU.desk": 0.49770000457763675, + "IoU.rock": 0.49630001068115237, + "IoU.wardrobe": 0.5736000061035156, + "IoU.lamp": 0.5691999816894531, + "IoU.bathtub": 0.8555999755859375, + "IoU.railing": 0.39430000305175783, + "IoU.cushion": 0.6002000045776367, + "IoU.base": 0.31940000534057617, + "IoU.box": 0.24469999313354493, + "IoU.column": 0.4918000030517578, + "IoU.signboard": 0.3679999923706055, + "IoU.chest of drawers": 0.34720001220703123, + "IoU.counter": 0.3863999938964844, + "IoU.sand": 0.5163000106811524, + "IoU.sink": 0.7080000305175781, + "IoU.skyscraper": 0.5122000122070313, + "IoU.fireplace": 0.725, + "IoU.refrigerator": 0.7568000030517578, + "IoU.grandstand": 0.4988000106811523, + "IoU.path": 0.20139999389648439, + "IoU.stairs": 0.22680000305175782, + "IoU.runway": 0.64, + "IoU.case": 0.5708000183105468, + "IoU.pool table": 0.9323999786376953, + "IoU.pillow": 0.5608000183105468, + "IoU.screen door": 0.6840000152587891, + "IoU.stairway": 0.3377000045776367, + "IoU.river": 0.17540000915527343, + "IoU.bridge": 0.4995000076293945, + "IoU.bookcase": 0.32939998626708983, + "IoU.blind": 0.39869998931884765, + "IoU.coffee table": 0.6451000213623047, + "IoU.toilet": 0.8455999755859375, + "IoU.flower": 0.36950000762939456, + "IoU.book": 0.4565000152587891, + "IoU.hill": 0.14449999809265138, + "IoU.bench": 0.42189998626708985, + "IoU.countertop": 0.5779000091552734, + "IoU.stove": 0.7408999633789063, + "IoU.palm": 0.48270000457763673, + "IoU.kitchen island": 0.47619998931884766, + "IoU.computer": 0.7094000244140625, + "IoU.swivel chair": 0.47939998626708985, + "IoU.boat": 0.747699966430664, + "IoU.bar": 0.5643000030517578, + "IoU.arcade machine": 0.4509000015258789, + "IoU.hovel": 0.37200000762939456, + "IoU.bus": 0.8998999786376953, + "IoU.towel": 0.6502999877929687, + "IoU.light": 0.31610000610351563, + "IoU.truck": 0.22229999542236328, + "IoU.tower": 0.1711000061035156, + "IoU.chandelier": 0.6386000061035156, + "IoU.awning": 0.23059999465942382, + "IoU.streetlight": 0.16120000839233398, + "IoU.booth": 0.3593000030517578, + "IoU.television receiver": 0.6995999908447266, + "IoU.airplane": 0.5834999847412109, + "IoU.dirt track": 0.0784000015258789, + "IoU.apparel": 0.3497999954223633, + "IoU.pole": 0.14420000076293946, + "IoU.land": 0.04510000228881836, + "IoU.bannister": 0.08109999656677246, + "IoU.escalator": 0.5956999969482422, + "IoU.ottoman": 0.48869998931884767, + "IoU.bottle": 0.31190000534057616, + "IoU.buffet": 0.37560001373291013, + "IoU.poster": 0.2596999931335449, + "IoU.stage": 0.15010000228881837, + "IoU.van": 0.3763000106811523, + "IoU.ship": 0.12770000457763672, + "IoU.fountain": 0.14039999961853028, + "IoU.conveyer belt": 0.734800033569336, + "IoU.canopy": 0.22809999465942382, + "IoU.washer": 0.7256999969482422, + "IoU.plaything": 0.2986000061035156, + "IoU.swimming pool": 0.7906999969482422, + "IoU.stool": 0.4013999938964844, + "IoU.barrel": 0.5795000076293946, + "IoU.basket": 0.33990001678466797, + "IoU.waterfall": 0.46130001068115234, + "IoU.tent": 0.9530000305175781, + "IoU.bag": 0.16860000610351564, + "IoU.minibike": 0.704800033569336, + "IoU.cradle": 0.8345999908447266, + "IoU.oven": 0.21489999771118165, + "IoU.ball": 0.5681999969482422, + "IoU.food": 0.49970001220703125, + "IoU.step": 0.08550000190734863, + "IoU.tank": 0.5675, + "IoU.trade name": 0.2669000053405762, + "IoU.microwave": 0.5163999938964844, + "IoU.pot": 0.5033000183105468, + "IoU.animal": 0.5866999816894531, + "IoU.bicycle": 0.5736000061035156, + "IoU.lake": 0.4266999816894531, + "IoU.dishwasher": 0.6612999725341797, + "IoU.screen": 0.571500015258789, + "IoU.blanket": 0.14869999885559082, + "IoU.sculpture": 0.6476000213623047, + "IoU.hood": 0.5016999816894532, + "IoU.sconce": 0.28889999389648435, + "IoU.vase": 0.33299999237060546, + "IoU.traffic light": 0.25489999771118166, + "IoU.tray": 0.030999999046325683, + "IoU.ashcan": 0.38110000610351563, + "IoU.fan": 0.4259999847412109, + "IoU.pier": 0.276200008392334, + "IoU.crt screen": 0.07510000228881836, + "IoU.plate": 0.5313999938964844, + "IoU.monitor": 0.5277999877929688, + "IoU.bulletin board": 0.3915000152587891, + "IoU.shower": 0.017999999523162842, + "IoU.radiator": 0.5427999877929688, + "IoU.glass": 0.05190000057220459, + "IoU.clock": 0.2631999969482422, + "IoU.flag": 0.385099983215332, + "Acc.wall": 0.8987999725341796, + "Acc.building": 0.9308000183105469, + "Acc.sky": 0.976500015258789, + "Acc.floor": 0.9163999938964844, + "Acc.tree": 0.8711000061035157, + "Acc.ceiling": 0.9313999938964844, + "Acc.road": 0.9069999694824219, + "Acc.bed ": 0.954800033569336, + "Acc.windowpane": 0.7741999816894531, + "Acc.grass": 0.8006999969482422, + "Acc.cabinet": 0.7454000091552735, + "Acc.sidewalk": 0.7833000183105469, + "Acc.person": 0.9175, + "Acc.earth": 0.5370000076293945, + "Acc.door": 0.6708999633789062, + "Acc.table": 0.7655999755859375, + "Acc.mountain": 0.7205999755859375, + "Acc.plant": 0.6565000152587891, + "Acc.curtain": 0.8473000335693359, + "Acc.chair": 0.6712000274658203, + "Acc.car": 0.9262999725341797, + "Acc.water": 0.7437000274658203, + "Acc.painting": 0.8473999786376953, + "Acc.sofa": 0.8825, + "Acc.shelf": 0.6256999969482422, + "Acc.house": 0.6090999984741211, + "Acc.sea": 0.8698999786376953, + "Acc.mirror": 0.78, + "Acc.rug": 0.7379000091552734, + "Acc.field": 0.5997999954223633, + "Acc.armchair": 0.5986000061035156, + "Acc.seat": 0.8393000030517578, + "Acc.fence": 0.5761999893188476, + "Acc.desk": 0.6837000274658203, + "Acc.rock": 0.686500015258789, + "Acc.wardrobe": 0.6606999969482422, + "Acc.lamp": 0.663499984741211, + "Acc.bathtub": 0.8948999786376953, + "Acc.railing": 0.5295000076293945, + "Acc.cushion": 0.7288999938964844, + "Acc.base": 0.4940999984741211, + "Acc.box": 0.3170000076293945, + "Acc.column": 0.5972999954223632, + "Acc.signboard": 0.45939998626708983, + "Acc.chest of drawers": 0.5425, + "Acc.counter": 0.4954999923706055, + "Acc.sand": 0.6986000061035156, + "Acc.sink": 0.7722000122070313, + "Acc.skyscraper": 0.5872999954223633, + "Acc.fireplace": 0.857699966430664, + "Acc.refrigerator": 0.8076000213623047, + "Acc.grandstand": 0.7555999755859375, + "Acc.path": 0.2738999938964844, + "Acc.stairs": 0.3032999992370605, + "Acc.runway": 0.8179000091552734, + "Acc.case": 0.7519999694824219, + "Acc.pool table": 0.9668000030517578, + "Acc.pillow": 0.6359000015258789, + "Acc.screen door": 0.7580999755859374, + "Acc.stairway": 0.47720001220703123, + "Acc.river": 0.3135000038146973, + "Acc.bridge": 0.5654000091552734, + "Acc.bookcase": 0.5652999877929688, + "Acc.blind": 0.427400016784668, + "Acc.coffee table": 0.7944000244140625, + "Acc.toilet": 0.8866999816894531, + "Acc.flower": 0.5004999923706055, + "Acc.book": 0.6038999938964844, + "Acc.hill": 0.23370000839233399, + "Acc.bench": 0.48650001525878905, + "Acc.countertop": 0.7043000030517578, + "Acc.stove": 0.8216000366210937, + "Acc.palm": 0.634900016784668, + "Acc.kitchen island": 0.6197999954223633, + "Acc.computer": 0.8062999725341797, + "Acc.swivel chair": 0.5920999908447265, + "Acc.boat": 0.8047000122070312, + "Acc.bar": 0.6698000335693359, + "Acc.arcade machine": 0.47639999389648435, + "Acc.hovel": 0.39490001678466796, + "Acc.bus": 0.9279000091552735, + "Acc.towel": 0.7515000152587891, + "Acc.light": 0.33529998779296877, + "Acc.truck": 0.29170000076293945, + "Acc.tower": 0.2670000076293945, + "Acc.chandelier": 0.7854000091552734, + "Acc.awning": 0.26440000534057617, + "Acc.streetlight": 0.19530000686645507, + "Acc.booth": 0.3784000015258789, + "Acc.television receiver": 0.7761000061035156, + "Acc.airplane": 0.6356000137329102, + "Acc.dirt track": 0.320099983215332, + "Acc.apparel": 0.4752000045776367, + "Acc.pole": 0.17649999618530274, + "Acc.land": 0.0571999979019165, + "Acc.bannister": 0.09779999732971191, + "Acc.escalator": 0.7638999938964843, + "Acc.ottoman": 0.6340999984741211, + "Acc.bottle": 0.4275, + "Acc.buffet": 0.4475, + "Acc.poster": 0.47049999237060547, + "Acc.stage": 0.20729999542236327, + "Acc.van": 0.4534000015258789, + "Acc.ship": 0.13510000228881835, + "Acc.fountain": 0.14140000343322753, + "Acc.conveyer belt": 0.9155000305175781, + "Acc.canopy": 0.24770000457763672, + "Acc.washer": 0.7437999725341797, + "Acc.plaything": 0.385, + "Acc.swimming pool": 0.9037000274658203, + "Acc.stool": 0.48770000457763674, + "Acc.barrel": 0.6441000366210937, + "Acc.basket": 0.41900001525878905, + "Acc.waterfall": 0.5688000106811524, + "Acc.tent": 0.9713999938964843, + "Acc.bag": 0.19430000305175782, + "Acc.minibike": 0.7973999786376953, + "Acc.cradle": 0.955, + "Acc.oven": 0.4909999847412109, + "Acc.ball": 0.6486000061035156, + "Acc.food": 0.5711000061035156, + "Acc.step": 0.10939999580383301, + "Acc.tank": 0.6505999755859375, + "Acc.trade name": 0.2911000061035156, + "Acc.microwave": 0.5647999954223633, + "Acc.pot": 0.5711999893188476, + "Acc.animal": 0.61, + "Acc.bicycle": 0.6765000152587891, + "Acc.lake": 0.5259000015258789, + "Acc.dishwasher": 0.6980000305175781, + "Acc.screen": 0.7054000091552735, + "Acc.blanket": 0.16190000534057616, + "Acc.sculpture": 0.7230000305175781, + "Acc.hood": 0.6272000122070313, + "Acc.sconce": 0.3502000045776367, + "Acc.vase": 0.4022999954223633, + "Acc.traffic light": 0.34669998168945315, + "Acc.tray": 0.03539999961853027, + "Acc.ashcan": 0.5331000137329102, + "Acc.fan": 0.4797999954223633, + "Acc.pier": 0.4216999816894531, + "Acc.crt screen": 0.10850000381469727, + "Acc.plate": 0.6741999816894532, + "Acc.monitor": 0.6930999755859375, + "Acc.bulletin board": 0.46110000610351565, + "Acc.shower": 0.04309999942779541, + "Acc.radiator": 0.5831999969482422, + "Acc.glass": 0.0528000020980835, + "Acc.clock": 0.27700000762939453, + "Acc.flag": 0.42580001831054687 + } + }, + "89": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8323, + "mIoU": 0.479, + "mAcc": 0.5689, + "IoU.wall": 0.7725, + "IoU.building": 0.8262999725341796, + "IoU.sky": 0.928499984741211, + "IoU.floor": 0.8113999938964844, + "IoU.tree": 0.7229000091552734, + "IoU.ceiling": 0.8358000183105468, + "IoU.road": 0.8256999969482421, + "IoU.bed ": 0.8887999725341796, + "IoU.windowpane": 0.6184999847412109, + "IoU.grass": 0.6719999694824219, + "IoU.cabinet": 0.645199966430664, + "IoU.sidewalk": 0.6472000122070313, + "IoU.person": 0.8094999694824219, + "IoU.earth": 0.37990001678466795, + "IoU.door": 0.5059000015258789, + "IoU.table": 0.6134999847412109, + "IoU.mountain": 0.5770999908447265, + "IoU.plant": 0.5240999984741211, + "IoU.curtain": 0.7337000274658203, + "IoU.chair": 0.5559000015258789, + "IoU.car": 0.8384999847412109, + "IoU.water": 0.587400016784668, + "IoU.painting": 0.7134999847412109, + "IoU.sofa": 0.6763999938964844, + "IoU.shelf": 0.43470001220703125, + "IoU.house": 0.45669998168945314, + "IoU.sea": 0.685, + "IoU.mirror": 0.6908000183105468, + "IoU.rug": 0.6356000137329102, + "IoU.field": 0.3541999816894531, + "IoU.armchair": 0.3945000076293945, + "IoU.seat": 0.6751000213623047, + "IoU.fence": 0.427400016784668, + "IoU.desk": 0.4865999984741211, + "IoU.rock": 0.5097999954223633, + "IoU.wardrobe": 0.5729000091552734, + "IoU.lamp": 0.5472000122070313, + "IoU.bathtub": 0.8370999908447265, + "IoU.railing": 0.39299999237060546, + "IoU.cushion": 0.5893000030517578, + "IoU.base": 0.2996999931335449, + "IoU.box": 0.26040000915527345, + "IoU.column": 0.4702999877929688, + "IoU.signboard": 0.34299999237060547, + "IoU.chest of drawers": 0.359900016784668, + "IoU.counter": 0.3816999816894531, + "IoU.sand": 0.5011999893188477, + "IoU.sink": 0.7020999908447265, + "IoU.skyscraper": 0.4866999816894531, + "IoU.fireplace": 0.7151000213623047, + "IoU.refrigerator": 0.7597000122070312, + "IoU.grandstand": 0.48080001831054686, + "IoU.path": 0.22049999237060547, + "IoU.stairs": 0.16809999465942382, + "IoU.runway": 0.6537999725341797, + "IoU.case": 0.5311000061035156, + "IoU.pool table": 0.9283999633789063, + "IoU.pillow": 0.5009000015258789, + "IoU.screen door": 0.7062999725341796, + "IoU.stairway": 0.31790000915527344, + "IoU.river": 0.185, + "IoU.bridge": 0.5877000045776367, + "IoU.bookcase": 0.30440000534057615, + "IoU.blind": 0.3770000076293945, + "IoU.coffee table": 0.6343000030517578, + "IoU.toilet": 0.8338999938964844, + "IoU.flower": 0.3752000045776367, + "IoU.book": 0.4279999923706055, + "IoU.hill": 0.1422000026702881, + "IoU.bench": 0.47450000762939454, + "IoU.countertop": 0.5577000045776367, + "IoU.stove": 0.7454000091552735, + "IoU.palm": 0.42830001831054687, + "IoU.kitchen island": 0.47439998626708985, + "IoU.computer": 0.7602999877929687, + "IoU.swivel chair": 0.48220001220703124, + "IoU.boat": 0.5868000030517578, + "IoU.bar": 0.5638000106811524, + "IoU.arcade machine": 0.35520000457763673, + "IoU.hovel": 0.22969999313354492, + "IoU.bus": 0.9033000183105468, + "IoU.towel": 0.6494999694824218, + "IoU.light": 0.32860000610351564, + "IoU.truck": 0.23649999618530274, + "IoU.tower": 0.16969999313354492, + "IoU.chandelier": 0.6120000076293945, + "IoU.awning": 0.21440000534057618, + "IoU.streetlight": 0.17, + "IoU.booth": 0.34720001220703123, + "IoU.television receiver": 0.6811000061035156, + "IoU.airplane": 0.5836999893188477, + "IoU.dirt track": 0.06300000190734863, + "IoU.apparel": 0.32979999542236327, + "IoU.pole": 0.108100004196167, + "IoU.land": 0.06670000076293946, + "IoU.bannister": 0.09979999542236329, + "IoU.escalator": 0.5320999908447266, + "IoU.ottoman": 0.44799999237060545, + "IoU.bottle": 0.20540000915527343, + "IoU.buffet": 0.38229999542236326, + "IoU.poster": 0.1625, + "IoU.stage": 0.12619999885559083, + "IoU.van": 0.31379999160766603, + "IoU.ship": 0.10680000305175781, + "IoU.fountain": 0.21719999313354493, + "IoU.conveyer belt": 0.754000015258789, + "IoU.canopy": 0.19350000381469726, + "IoU.washer": 0.701500015258789, + "IoU.plaything": 0.28719999313354494, + "IoU.swimming pool": 0.784000015258789, + "IoU.stool": 0.39169998168945314, + "IoU.barrel": 0.5847000122070313, + "IoU.basket": 0.35389999389648436, + "IoU.waterfall": 0.4520999908447266, + "IoU.tent": 0.9480999755859375, + "IoU.bag": 0.15279999732971192, + "IoU.minibike": 0.6545999908447265, + "IoU.cradle": 0.8248999786376953, + "IoU.oven": 0.281200008392334, + "IoU.ball": 0.5416999816894531, + "IoU.food": 0.4375, + "IoU.step": 0.08739999771118163, + "IoU.tank": 0.5509000015258789, + "IoU.trade name": 0.19770000457763673, + "IoU.microwave": 0.7633000183105468, + "IoU.pot": 0.4961000061035156, + "IoU.animal": 0.5820000076293945, + "IoU.bicycle": 0.5286000061035157, + "IoU.lake": 0.38860000610351564, + "IoU.dishwasher": 0.6138000106811523, + "IoU.screen": 0.5486000061035157, + "IoU.blanket": 0.12149999618530273, + "IoU.sculpture": 0.6525, + "IoU.hood": 0.4906000137329102, + "IoU.sconce": 0.2514999961853027, + "IoU.vase": 0.3238999938964844, + "IoU.traffic light": 0.2384000015258789, + "IoU.tray": 0.05309999942779541, + "IoU.ashcan": 0.3484000015258789, + "IoU.fan": 0.4727999877929687, + "IoU.pier": 0.2765999984741211, + "IoU.crt screen": 0.04420000076293945, + "IoU.plate": 0.4991999816894531, + "IoU.monitor": 0.5693000030517578, + "IoU.bulletin board": 0.2602000045776367, + "IoU.shower": 0.023299999237060547, + "IoU.radiator": 0.5872000122070312, + "IoU.glass": 0.10270000457763671, + "IoU.clock": 0.2713999938964844, + "IoU.flag": 0.39849998474121096, + "Acc.wall": 0.8980999755859375, + "Acc.building": 0.9462999725341796, + "Acc.sky": 0.9838999938964844, + "Acc.floor": 0.9254000091552734, + "Acc.tree": 0.8231999969482422, + "Acc.ceiling": 0.9361000061035156, + "Acc.road": 0.9109999847412109, + "Acc.bed ": 0.9580000305175781, + "Acc.windowpane": 0.7991999816894532, + "Acc.grass": 0.8137000274658203, + "Acc.cabinet": 0.7695999908447265, + "Acc.sidewalk": 0.785, + "Acc.person": 0.8893000030517578, + "Acc.earth": 0.5420999908447266, + "Acc.door": 0.627400016784668, + "Acc.table": 0.7902999877929687, + "Acc.mountain": 0.7225, + "Acc.plant": 0.6188000106811523, + "Acc.curtain": 0.8466999816894532, + "Acc.chair": 0.6562000274658203, + "Acc.car": 0.904000015258789, + "Acc.water": 0.7822000122070313, + "Acc.painting": 0.8269000244140625, + "Acc.sofa": 0.8844000244140625, + "Acc.shelf": 0.6265999984741211, + "Acc.house": 0.5790000152587891, + "Acc.sea": 0.8826000213623046, + "Acc.mirror": 0.7587000274658203, + "Acc.rug": 0.7226000213623047, + "Acc.field": 0.577400016784668, + "Acc.armchair": 0.48119998931884767, + "Acc.seat": 0.8462999725341797, + "Acc.fence": 0.5647999954223633, + "Acc.desk": 0.6197999954223633, + "Acc.rock": 0.6873000335693359, + "Acc.wardrobe": 0.6794000244140626, + "Acc.lamp": 0.6195999908447266, + "Acc.bathtub": 0.8675, + "Acc.railing": 0.5397999954223632, + "Acc.cushion": 0.7004000091552735, + "Acc.base": 0.39779998779296877, + "Acc.box": 0.32029998779296875, + "Acc.column": 0.5468999862670898, + "Acc.signboard": 0.44729999542236326, + "Acc.chest of drawers": 0.5272000122070313, + "Acc.counter": 0.5115999984741211, + "Acc.sand": 0.6433999633789063, + "Acc.sink": 0.7594999694824218, + "Acc.skyscraper": 0.5454999923706054, + "Acc.fireplace": 0.8104000091552734, + "Acc.refrigerator": 0.790199966430664, + "Acc.grandstand": 0.7319999694824219, + "Acc.path": 0.2928000068664551, + "Acc.stairs": 0.2143000030517578, + "Acc.runway": 0.8516999816894532, + "Acc.case": 0.7287999725341797, + "Acc.pool table": 0.9633000183105469, + "Acc.pillow": 0.5659000015258789, + "Acc.screen door": 0.7662000274658203, + "Acc.stairway": 0.5027000045776367, + "Acc.river": 0.28079999923706056, + "Acc.bridge": 0.6733000183105469, + "Acc.bookcase": 0.4988999938964844, + "Acc.blind": 0.3990999984741211, + "Acc.coffee table": 0.7708999633789062, + "Acc.toilet": 0.8715000152587891, + "Acc.flower": 0.532400016784668, + "Acc.book": 0.6027999877929687, + "Acc.hill": 0.21149999618530274, + "Acc.bench": 0.5188000106811523, + "Acc.countertop": 0.7044000244140625, + "Acc.stove": 0.8093000030517579, + "Acc.palm": 0.5259999847412109, + "Acc.kitchen island": 0.6837000274658203, + "Acc.computer": 0.8594000244140625, + "Acc.swivel chair": 0.5740999984741211, + "Acc.boat": 0.6322999954223633, + "Acc.bar": 0.6404000091552734, + "Acc.arcade machine": 0.3704999923706055, + "Acc.hovel": 0.24229999542236327, + "Acc.bus": 0.9320999908447266, + "Acc.towel": 0.7862999725341797, + "Acc.light": 0.36580001831054687, + "Acc.truck": 0.2902000045776367, + "Acc.tower": 0.21649999618530275, + "Acc.chandelier": 0.7758000183105469, + "Acc.awning": 0.23530000686645508, + "Acc.streetlight": 0.21540000915527344, + "Acc.booth": 0.3725, + "Acc.television receiver": 0.7308999633789063, + "Acc.airplane": 0.6258000183105469, + "Acc.dirt track": 0.2059000015258789, + "Acc.apparel": 0.41639999389648436, + "Acc.pole": 0.12789999961853027, + "Acc.land": 0.09199999809265137, + "Acc.bannister": 0.12800000190734864, + "Acc.escalator": 0.6766999816894531, + "Acc.ottoman": 0.5836000061035156, + "Acc.bottle": 0.24799999237060547, + "Acc.buffet": 0.45549999237060546, + "Acc.poster": 0.226299991607666, + "Acc.stage": 0.16760000228881836, + "Acc.van": 0.36130001068115236, + "Acc.ship": 0.12609999656677245, + "Acc.fountain": 0.21979999542236328, + "Acc.conveyer belt": 0.9198000335693359, + "Acc.canopy": 0.2, + "Acc.washer": 0.7343000030517578, + "Acc.plaything": 0.38979999542236327, + "Acc.swimming pool": 0.8854000091552734, + "Acc.stool": 0.4695999908447266, + "Acc.barrel": 0.6411000061035156, + "Acc.basket": 0.45439998626708983, + "Acc.waterfall": 0.5836000061035156, + "Acc.tent": 0.9570999908447265, + "Acc.bag": 0.17860000610351562, + "Acc.minibike": 0.6891999816894532, + "Acc.cradle": 0.9475, + "Acc.oven": 0.36790000915527343, + "Acc.ball": 0.6520999908447266, + "Acc.food": 0.4916999816894531, + "Acc.step": 0.10800000190734864, + "Acc.tank": 0.6068999862670899, + "Acc.trade name": 0.20940000534057618, + "Acc.microwave": 0.8436000061035156, + "Acc.pot": 0.549099998474121, + "Acc.animal": 0.5988000106811523, + "Acc.bicycle": 0.6133000183105469, + "Acc.lake": 0.4325, + "Acc.dishwasher": 0.6997000122070313, + "Acc.screen": 0.7501000213623047, + "Acc.blanket": 0.14810000419616698, + "Acc.sculpture": 0.6840000152587891, + "Acc.hood": 0.586500015258789, + "Acc.sconce": 0.2872999954223633, + "Acc.vase": 0.39360000610351564, + "Acc.traffic light": 0.3036000061035156, + "Acc.tray": 0.06420000076293945, + "Acc.ashcan": 0.47819999694824217, + "Acc.fan": 0.5277999877929688, + "Acc.pier": 0.3977000045776367, + "Acc.crt screen": 0.06289999961853027, + "Acc.plate": 0.6224000167846679, + "Acc.monitor": 0.6520999908447266, + "Acc.bulletin board": 0.3290000152587891, + "Acc.shower": 0.04639999866485596, + "Acc.radiator": 0.6466999816894531, + "Acc.glass": 0.10800000190734864, + "Acc.clock": 0.2904000091552734, + "Acc.flag": 0.429900016784668 + } + }, + "90": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8175, + "mIoU": 0.457, + "mAcc": 0.5740999999999999, + "IoU.wall": 0.7566999816894531, + "IoU.building": 0.8188999938964844, + "IoU.sky": 0.9376999664306641, + "IoU.floor": 0.795199966430664, + "IoU.tree": 0.7344999694824219, + "IoU.ceiling": 0.8223999786376953, + "IoU.road": 0.819800033569336, + "IoU.bed ": 0.8705999755859375, + "IoU.windowpane": 0.6047999954223633, + "IoU.grass": 0.6397999954223633, + "IoU.cabinet": 0.5856999969482422, + "IoU.sidewalk": 0.6313999938964844, + "IoU.person": 0.7691000366210937, + "IoU.earth": 0.33630001068115234, + "IoU.door": 0.455099983215332, + "IoU.table": 0.5631999969482422, + "IoU.mountain": 0.5636999893188477, + "IoU.plant": 0.4993000030517578, + "IoU.curtain": 0.7213999938964843, + "IoU.chair": 0.5166999816894531, + "IoU.car": 0.8190000152587891, + "IoU.water": 0.5347999954223632, + "IoU.painting": 0.6779000091552735, + "IoU.sofa": 0.6104999923706055, + "IoU.shelf": 0.4068000030517578, + "IoU.house": 0.5027999877929688, + "IoU.sea": 0.6336999893188476, + "IoU.mirror": 0.6561000061035156, + "IoU.rug": 0.6569999694824219, + "IoU.field": 0.2879999923706055, + "IoU.armchair": 0.41450000762939454, + "IoU.seat": 0.6177000045776367, + "IoU.fence": 0.36279998779296874, + "IoU.desk": 0.44549999237060545, + "IoU.rock": 0.39919998168945314, + "IoU.wardrobe": 0.4909999847412109, + "IoU.lamp": 0.5379000091552735, + "IoU.bathtub": 0.7318000030517579, + "IoU.railing": 0.2979999923706055, + "IoU.cushion": 0.5336999893188477, + "IoU.base": 0.25549999237060544, + "IoU.box": 0.2168000030517578, + "IoU.column": 0.4533000183105469, + "IoU.signboard": 0.3338999938964844, + "IoU.chest of drawers": 0.32979999542236327, + "IoU.counter": 0.24360000610351562, + "IoU.sand": 0.40689998626708984, + "IoU.sink": 0.6686000061035157, + "IoU.skyscraper": 0.5190999984741211, + "IoU.fireplace": 0.7187000274658203, + "IoU.refrigerator": 0.7498999786376953, + "IoU.grandstand": 0.4090999984741211, + "IoU.path": 0.22840000152587892, + "IoU.stairs": 0.26540000915527345, + "IoU.runway": 0.6816999816894531, + "IoU.case": 0.5415999984741211, + "IoU.pool table": 0.9015000152587891, + "IoU.pillow": 0.5786999893188477, + "IoU.screen door": 0.6644999694824218, + "IoU.stairway": 0.30989999771118165, + "IoU.river": 0.19809999465942382, + "IoU.bridge": 0.6854000091552734, + "IoU.bookcase": 0.3334000015258789, + "IoU.blind": 0.43200000762939456, + "IoU.coffee table": 0.5845000076293946, + "IoU.toilet": 0.8072000122070313, + "IoU.flower": 0.30440000534057615, + "IoU.book": 0.42650001525878906, + "IoU.hill": 0.09470000267028808, + "IoU.bench": 0.4125, + "IoU.countertop": 0.5695999908447266, + "IoU.stove": 0.6816999816894531, + "IoU.palm": 0.4588999938964844, + "IoU.kitchen island": 0.3584000015258789, + "IoU.computer": 0.7162999725341797, + "IoU.swivel chair": 0.4634000015258789, + "IoU.boat": 0.6966999816894531, + "IoU.bar": 0.5293999862670898, + "IoU.arcade machine": 0.3438999938964844, + "IoU.hovel": 0.4911999893188477, + "IoU.bus": 0.8319999694824218, + "IoU.towel": 0.5602999877929687, + "IoU.light": 0.31420000076293947, + "IoU.truck": 0.2989999961853027, + "IoU.tower": 0.2861000061035156, + "IoU.chandelier": 0.5915999984741211, + "IoU.awning": 0.36189998626708986, + "IoU.streetlight": 0.14050000190734863, + "IoU.booth": 0.3741999816894531, + "IoU.television receiver": 0.6106999969482422, + "IoU.airplane": 0.5790000152587891, + "IoU.dirt track": 0.1165999984741211, + "IoU.apparel": 0.3243999862670898, + "IoU.pole": 0.1590999984741211, + "IoU.land": 0.027300000190734863, + "IoU.bannister": 0.105600004196167, + "IoU.escalator": 0.3268999862670898, + "IoU.ottoman": 0.474900016784668, + "IoU.bottle": 0.3060000038146973, + "IoU.buffet": 0.3675, + "IoU.poster": 0.23260000228881836, + "IoU.stage": 0.18329999923706056, + "IoU.van": 0.4134999847412109, + "IoU.ship": 0.7287999725341797, + "IoU.fountain": 0.18610000610351562, + "IoU.conveyer belt": 0.6277000045776367, + "IoU.canopy": 0.20719999313354492, + "IoU.washer": 0.7141999816894531, + "IoU.plaything": 0.2509000015258789, + "IoU.swimming pool": 0.6211999893188477, + "IoU.stool": 0.24139999389648437, + "IoU.barrel": 0.5640999984741211, + "IoU.basket": 0.19959999084472657, + "IoU.waterfall": 0.587400016784668, + "IoU.tent": 0.8751000213623047, + "IoU.bag": 0.10189999580383301, + "IoU.minibike": 0.5515999984741211, + "IoU.cradle": 0.7798999786376953, + "IoU.oven": 0.19190000534057616, + "IoU.ball": 0.3647999954223633, + "IoU.food": 0.5504999923706054, + "IoU.step": 0.13680000305175782, + "IoU.tank": 0.4972999954223633, + "IoU.trade name": 0.23309999465942383, + "IoU.microwave": 0.3584999847412109, + "IoU.pot": 0.3484999847412109, + "IoU.animal": 0.5863000106811523, + "IoU.bicycle": 0.4743000030517578, + "IoU.lake": 0.5499000167846679, + "IoU.dishwasher": 0.514000015258789, + "IoU.screen": 0.5420000076293945, + "IoU.blanket": 0.12140000343322754, + "IoU.sculpture": 0.48709999084472655, + "IoU.hood": 0.4779000091552734, + "IoU.sconce": 0.34119998931884765, + "IoU.vase": 0.2606999969482422, + "IoU.traffic light": 0.24379999160766602, + "IoU.tray": 0.024200000762939454, + "IoU.ashcan": 0.28170000076293944, + "IoU.fan": 0.4865999984741211, + "IoU.pier": 0.2955999946594238, + "IoU.crt screen": 0.0, + "IoU.plate": 0.3793999862670898, + "IoU.monitor": 0.02319999933242798, + "IoU.bulletin board": 0.3340999984741211, + "IoU.shower": 0.0040999999642372135, + "IoU.radiator": 0.44310001373291014, + "IoU.glass": 0.05630000114440918, + "IoU.clock": 0.22329999923706054, + "IoU.flag": 0.37130001068115237, + "Acc.wall": 0.8776999664306641, + "Acc.building": 0.9219999694824219, + "Acc.sky": 0.9769999694824218, + "Acc.floor": 0.8977999877929688, + "Acc.tree": 0.8645999908447266, + "Acc.ceiling": 0.9112999725341797, + "Acc.road": 0.8966999816894531, + "Acc.bed ": 0.950999984741211, + "Acc.windowpane": 0.7516000366210938, + "Acc.grass": 0.7920999908447266, + "Acc.cabinet": 0.6919000244140625, + "Acc.sidewalk": 0.7733000183105468, + "Acc.person": 0.912300033569336, + "Acc.earth": 0.4633000183105469, + "Acc.door": 0.6145000076293945, + "Acc.table": 0.7166000366210937, + "Acc.mountain": 0.7090000152587891, + "Acc.plant": 0.6227000045776367, + "Acc.curtain": 0.8301999664306641, + "Acc.chair": 0.6494999694824218, + "Acc.car": 0.9116999816894531, + "Acc.water": 0.6770999908447266, + "Acc.painting": 0.8468000030517578, + "Acc.sofa": 0.7693000030517578, + "Acc.shelf": 0.6156000137329102, + "Acc.house": 0.6227999877929687, + "Acc.sea": 0.8719999694824219, + "Acc.mirror": 0.7425, + "Acc.rug": 0.7298999786376953, + "Acc.field": 0.513499984741211, + "Acc.armchair": 0.6263000106811524, + "Acc.seat": 0.8002999877929687, + "Acc.fence": 0.4966999816894531, + "Acc.desk": 0.6559999847412109, + "Acc.rock": 0.6459999847412109, + "Acc.wardrobe": 0.6222999954223633, + "Acc.lamp": 0.6627999877929688, + "Acc.bathtub": 0.7980999755859375, + "Acc.railing": 0.44209999084472656, + "Acc.cushion": 0.6787999725341797, + "Acc.base": 0.41830001831054686, + "Acc.box": 0.2997999954223633, + "Acc.column": 0.5745000076293946, + "Acc.signboard": 0.43529998779296875, + "Acc.chest of drawers": 0.5693000030517578, + "Acc.counter": 0.3415999984741211, + "Acc.sand": 0.5533000183105469, + "Acc.sink": 0.7530999755859376, + "Acc.skyscraper": 0.571500015258789, + "Acc.fireplace": 0.892300033569336, + "Acc.refrigerator": 0.8579000091552734, + "Acc.grandstand": 0.7161000061035157, + "Acc.path": 0.3175, + "Acc.stairs": 0.3472999954223633, + "Acc.runway": 0.8886000061035156, + "Acc.case": 0.7026000213623047, + "Acc.pool table": 0.9597000122070313, + "Acc.pillow": 0.6733000183105469, + "Acc.screen door": 0.7704000091552734, + "Acc.stairway": 0.43439998626708987, + "Acc.river": 0.36700000762939455, + "Acc.bridge": 0.8076000213623047, + "Acc.bookcase": 0.5533000183105469, + "Acc.blind": 0.49279998779296874, + "Acc.coffee table": 0.7726000213623047, + "Acc.toilet": 0.8806999969482422, + "Acc.flower": 0.5352000045776367, + "Acc.book": 0.5906999969482422, + "Acc.hill": 0.17540000915527343, + "Acc.bench": 0.5006999969482422, + "Acc.countertop": 0.7090000152587891, + "Acc.stove": 0.7770999908447266, + "Acc.palm": 0.6395000076293945, + "Acc.kitchen island": 0.6833000183105469, + "Acc.computer": 0.8697000122070313, + "Acc.swivel chair": 0.6111999893188477, + "Acc.boat": 0.8454000091552735, + "Acc.bar": 0.7206999969482422, + "Acc.arcade machine": 0.3813999938964844, + "Acc.hovel": 0.5631999969482422, + "Acc.bus": 0.9087999725341797, + "Acc.towel": 0.7158999633789063, + "Acc.light": 0.34509998321533203, + "Acc.truck": 0.39799999237060546, + "Acc.tower": 0.4015000152587891, + "Acc.chandelier": 0.7441999816894531, + "Acc.awning": 0.4234999847412109, + "Acc.streetlight": 0.15960000038146974, + "Acc.booth": 0.4615000152587891, + "Acc.television receiver": 0.7308000183105469, + "Acc.airplane": 0.6494999694824218, + "Acc.dirt track": 0.18719999313354493, + "Acc.apparel": 0.4622999954223633, + "Acc.pole": 0.19649999618530273, + "Acc.land": 0.042800002098083496, + "Acc.bannister": 0.14010000228881836, + "Acc.escalator": 0.3925, + "Acc.ottoman": 0.6113999938964844, + "Acc.bottle": 0.48209999084472654, + "Acc.buffet": 0.4134000015258789, + "Acc.poster": 0.34299999237060547, + "Acc.stage": 0.3977000045776367, + "Acc.van": 0.5302999877929687, + "Acc.ship": 0.8023999786376953, + "Acc.fountain": 0.20850000381469727, + "Acc.conveyer belt": 0.7363999938964844, + "Acc.canopy": 0.3089999961853027, + "Acc.washer": 0.7222000122070312, + "Acc.plaything": 0.4395000076293945, + "Acc.swimming pool": 0.8237999725341797, + "Acc.stool": 0.3056999969482422, + "Acc.barrel": 0.6059000015258789, + "Acc.basket": 0.2613999938964844, + "Acc.waterfall": 0.6491999816894531, + "Acc.tent": 0.9925, + "Acc.bag": 0.125, + "Acc.minibike": 0.6718000030517578, + "Acc.cradle": 0.9761000061035157, + "Acc.oven": 0.5186000061035156, + "Acc.ball": 0.4613999938964844, + "Acc.food": 0.7001999664306641, + "Acc.step": 0.16049999237060547, + "Acc.tank": 0.637400016784668, + "Acc.trade name": 0.2780999946594238, + "Acc.microwave": 0.4038999938964844, + "Acc.pot": 0.4125, + "Acc.animal": 0.627400016784668, + "Acc.bicycle": 0.6975, + "Acc.lake": 0.6202999877929688, + "Acc.dishwasher": 0.5863999938964843, + "Acc.screen": 0.9262999725341797, + "Acc.blanket": 0.13289999961853027, + "Acc.sculpture": 0.6415000152587891, + "Acc.hood": 0.5291999816894531, + "Acc.sconce": 0.4079000091552734, + "Acc.vase": 0.33430000305175783, + "Acc.traffic light": 0.407400016784668, + "Acc.tray": 0.030799999237060546, + "Acc.ashcan": 0.40939998626708984, + "Acc.fan": 0.6408000183105469, + "Acc.pier": 0.44790000915527345, + "Acc.crt screen": 0.0, + "Acc.plate": 0.4702999877929688, + "Acc.monitor": 0.025099999904632568, + "Acc.bulletin board": 0.4441999816894531, + "Acc.shower": 0.0125, + "Acc.radiator": 0.5025, + "Acc.glass": 0.05900000095367432, + "Acc.clock": 0.25479999542236326, + "Acc.flag": 0.45549999237060546 + } + }, + "91": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8217, + "mIoU": 0.4675, + "mAcc": 0.5847, + "IoU.wall": 0.7593000030517578, + "IoU.building": 0.8261000061035156, + "IoU.sky": 0.9386000061035156, + "IoU.floor": 0.8013999938964844, + "IoU.tree": 0.7430999755859375, + "IoU.ceiling": 0.8293000030517578, + "IoU.road": 0.8245999908447266, + "IoU.bed ": 0.8705000305175781, + "IoU.windowpane": 0.5956999969482422, + "IoU.grass": 0.6602999877929687, + "IoU.cabinet": 0.5890000152587891, + "IoU.sidewalk": 0.6487999725341796, + "IoU.person": 0.7772000122070313, + "IoU.earth": 0.33310001373291015, + "IoU.door": 0.4752999877929687, + "IoU.table": 0.5734000015258789, + "IoU.mountain": 0.5645999908447266, + "IoU.plant": 0.5022999954223633, + "IoU.curtain": 0.7219999694824218, + "IoU.chair": 0.5265000152587891, + "IoU.car": 0.819800033569336, + "IoU.water": 0.5941999816894531, + "IoU.painting": 0.6905000305175781, + "IoU.sofa": 0.6224000167846679, + "IoU.shelf": 0.4231999969482422, + "IoU.house": 0.512400016784668, + "IoU.sea": 0.6486000061035156, + "IoU.mirror": 0.6254000091552734, + "IoU.rug": 0.6543000030517578, + "IoU.field": 0.306299991607666, + "IoU.armchair": 0.3716999816894531, + "IoU.seat": 0.6138999938964844, + "IoU.fence": 0.41959999084472654, + "IoU.desk": 0.46290000915527346, + "IoU.rock": 0.422400016784668, + "IoU.wardrobe": 0.5288999938964843, + "IoU.lamp": 0.5431000137329102, + "IoU.bathtub": 0.7415000152587891, + "IoU.railing": 0.30239999771118165, + "IoU.cushion": 0.5418999862670898, + "IoU.base": 0.2734000015258789, + "IoU.box": 0.24479999542236328, + "IoU.column": 0.4608000183105469, + "IoU.signboard": 0.3306999969482422, + "IoU.chest of drawers": 0.3315000152587891, + "IoU.counter": 0.2795999908447266, + "IoU.sand": 0.3875, + "IoU.sink": 0.6723999786376953, + "IoU.skyscraper": 0.5418000030517578, + "IoU.fireplace": 0.6843000030517579, + "IoU.refrigerator": 0.7644999694824218, + "IoU.grandstand": 0.4695999908447266, + "IoU.path": 0.2538999938964844, + "IoU.stairs": 0.27260000228881837, + "IoU.runway": 0.6016999816894532, + "IoU.case": 0.5118999862670899, + "IoU.pool table": 0.9070999908447266, + "IoU.pillow": 0.5581000137329102, + "IoU.screen door": 0.6133000183105469, + "IoU.stairway": 0.29520000457763673, + "IoU.river": 0.16079999923706054, + "IoU.bridge": 0.701500015258789, + "IoU.bookcase": 0.34380001068115235, + "IoU.blind": 0.43139999389648437, + "IoU.coffee table": 0.5836999893188477, + "IoU.toilet": 0.8127999877929688, + "IoU.flower": 0.32310001373291014, + "IoU.book": 0.44349998474121094, + "IoU.hill": 0.10520000457763672, + "IoU.bench": 0.42029998779296873, + "IoU.countertop": 0.5297000122070312, + "IoU.stove": 0.707699966430664, + "IoU.palm": 0.475, + "IoU.kitchen island": 0.35619998931884767, + "IoU.computer": 0.7136000061035156, + "IoU.swivel chair": 0.5359000015258789, + "IoU.boat": 0.6969999694824218, + "IoU.bar": 0.5481000137329102, + "IoU.arcade machine": 0.534000015258789, + "IoU.hovel": 0.5470000076293945, + "IoU.bus": 0.8047000122070312, + "IoU.towel": 0.5438000106811524, + "IoU.light": 0.3104999923706055, + "IoU.truck": 0.21780000686645506, + "IoU.tower": 0.266200008392334, + "IoU.chandelier": 0.6029999923706054, + "IoU.awning": 0.40200000762939453, + "IoU.streetlight": 0.15560000419616699, + "IoU.booth": 0.2886000061035156, + "IoU.television receiver": 0.6316999816894531, + "IoU.airplane": 0.5879000091552734, + "IoU.dirt track": 0.09149999618530273, + "IoU.apparel": 0.3758000183105469, + "IoU.pole": 0.16709999084472657, + "IoU.land": 0.022300000190734862, + "IoU.bannister": 0.10939999580383301, + "IoU.escalator": 0.33799999237060546, + "IoU.ottoman": 0.46349998474121096, + "IoU.bottle": 0.330099983215332, + "IoU.buffet": 0.4070999908447266, + "IoU.poster": 0.31260000228881835, + "IoU.stage": 0.16489999771118163, + "IoU.van": 0.39930000305175783, + "IoU.ship": 0.6227999877929687, + "IoU.fountain": 0.2015999984741211, + "IoU.conveyer belt": 0.7087999725341797, + "IoU.canopy": 0.21200000762939453, + "IoU.washer": 0.6840000152587891, + "IoU.plaything": 0.24549999237060546, + "IoU.swimming pool": 0.5913000106811523, + "IoU.stool": 0.29170000076293945, + "IoU.barrel": 0.5468999862670898, + "IoU.basket": 0.26229999542236326, + "IoU.waterfall": 0.6483000183105468, + "IoU.tent": 0.9133999633789063, + "IoU.bag": 0.09109999656677246, + "IoU.minibike": 0.6347999954223633, + "IoU.cradle": 0.8144000244140625, + "IoU.oven": 0.19149999618530272, + "IoU.ball": 0.37009998321533205, + "IoU.food": 0.5549000167846679, + "IoU.step": 0.11920000076293945, + "IoU.tank": 0.4797999954223633, + "IoU.trade name": 0.22420000076293944, + "IoU.microwave": 0.3427999877929688, + "IoU.pot": 0.35009998321533203, + "IoU.animal": 0.6177999877929687, + "IoU.bicycle": 0.5229999923706055, + "IoU.lake": 0.579900016784668, + "IoU.dishwasher": 0.6008000183105469, + "IoU.screen": 0.5886000061035156, + "IoU.blanket": 0.10920000076293945, + "IoU.sculpture": 0.4804000091552734, + "IoU.hood": 0.5088000106811523, + "IoU.sconce": 0.34029998779296877, + "IoU.vase": 0.20899999618530274, + "IoU.traffic light": 0.2602000045776367, + "IoU.tray": 0.028900001049041748, + "IoU.ashcan": 0.3475, + "IoU.fan": 0.5111000061035156, + "IoU.pier": 0.44779998779296876, + "IoU.crt screen": 0.0, + "IoU.plate": 0.4556999969482422, + "IoU.monitor": 0.04570000171661377, + "IoU.bulletin board": 0.37599998474121094, + "IoU.shower": 0.005899999737739563, + "IoU.radiator": 0.4752999877929687, + "IoU.glass": 0.059499998092651364, + "IoU.clock": 0.21719999313354493, + "IoU.flag": 0.4063999938964844, + "Acc.wall": 0.8777999877929688, + "Acc.building": 0.9219999694824219, + "Acc.sky": 0.9769999694824218, + "Acc.floor": 0.9, + "Acc.tree": 0.8637000274658203, + "Acc.ceiling": 0.9233999633789063, + "Acc.road": 0.9005999755859375, + "Acc.bed ": 0.9538999938964844, + "Acc.windowpane": 0.75, + "Acc.grass": 0.8055999755859375, + "Acc.cabinet": 0.7012000274658203, + "Acc.sidewalk": 0.7876999664306641, + "Acc.person": 0.9108999633789062, + "Acc.earth": 0.47759998321533204, + "Acc.door": 0.6308000183105469, + "Acc.table": 0.7251000213623047, + "Acc.mountain": 0.7233000183105469, + "Acc.plant": 0.6329000091552734, + "Acc.curtain": 0.8280999755859375, + "Acc.chair": 0.6598000335693359, + "Acc.car": 0.9069000244140625, + "Acc.water": 0.7390000152587891, + "Acc.painting": 0.8548999786376953, + "Acc.sofa": 0.7827999877929688, + "Acc.shelf": 0.6136000061035156, + "Acc.house": 0.6395999908447265, + "Acc.sea": 0.8263999938964843, + "Acc.mirror": 0.7043000030517578, + "Acc.rug": 0.735999984741211, + "Acc.field": 0.5370999908447266, + "Acc.armchair": 0.5868000030517578, + "Acc.seat": 0.8068000030517578, + "Acc.fence": 0.5706999969482421, + "Acc.desk": 0.6723999786376953, + "Acc.rock": 0.6316999816894531, + "Acc.wardrobe": 0.6505000305175781, + "Acc.lamp": 0.6656999969482422, + "Acc.bathtub": 0.8066999816894531, + "Acc.railing": 0.4570999908447266, + "Acc.cushion": 0.6808999633789062, + "Acc.base": 0.4433000183105469, + "Acc.box": 0.34330001831054685, + "Acc.column": 0.584900016784668, + "Acc.signboard": 0.4290999984741211, + "Acc.chest of drawers": 0.5645000076293946, + "Acc.counter": 0.3856999969482422, + "Acc.sand": 0.5211000061035156, + "Acc.sink": 0.749000015258789, + "Acc.skyscraper": 0.6733999633789063, + "Acc.fireplace": 0.8868000030517578, + "Acc.refrigerator": 0.8631999969482422, + "Acc.grandstand": 0.7448999786376953, + "Acc.path": 0.32740001678466796, + "Acc.stairs": 0.35639999389648436, + "Acc.runway": 0.7979000091552735, + "Acc.case": 0.6973000335693359, + "Acc.pool table": 0.9683999633789062, + "Acc.pillow": 0.6579000091552735, + "Acc.screen door": 0.7697000122070312, + "Acc.stairway": 0.40900001525878904, + "Acc.river": 0.3152000045776367, + "Acc.bridge": 0.8326000213623047, + "Acc.bookcase": 0.5595999908447266, + "Acc.blind": 0.487400016784668, + "Acc.coffee table": 0.7766999816894531, + "Acc.toilet": 0.8886000061035156, + "Acc.flower": 0.522400016784668, + "Acc.book": 0.6086000061035156, + "Acc.hill": 0.18209999084472656, + "Acc.bench": 0.4915999984741211, + "Acc.countertop": 0.6787999725341797, + "Acc.stove": 0.7956999969482422, + "Acc.palm": 0.6694999694824219, + "Acc.kitchen island": 0.6508999633789062, + "Acc.computer": 0.8536000061035156, + "Acc.swivel chair": 0.6912000274658203, + "Acc.boat": 0.8320999908447265, + "Acc.bar": 0.7558000183105469, + "Acc.arcade machine": 0.5752999877929688, + "Acc.hovel": 0.6165000152587891, + "Acc.bus": 0.9079000091552735, + "Acc.towel": 0.6994999694824219, + "Acc.light": 0.3352000045776367, + "Acc.truck": 0.28889999389648435, + "Acc.tower": 0.3833000183105469, + "Acc.chandelier": 0.7552999877929687, + "Acc.awning": 0.46580001831054685, + "Acc.streetlight": 0.17959999084472655, + "Acc.booth": 0.40939998626708984, + "Acc.television receiver": 0.7597000122070312, + "Acc.airplane": 0.6597000122070312, + "Acc.dirt track": 0.1315999984741211, + "Acc.apparel": 0.5177000045776368, + "Acc.pole": 0.21290000915527343, + "Acc.land": 0.030199999809265136, + "Acc.bannister": 0.15399999618530275, + "Acc.escalator": 0.39619998931884765, + "Acc.ottoman": 0.5733000183105469, + "Acc.bottle": 0.5715999984741211, + "Acc.buffet": 0.4583000183105469, + "Acc.poster": 0.40040000915527346, + "Acc.stage": 0.36880001068115237, + "Acc.van": 0.49779998779296875, + "Acc.ship": 0.6922000122070312, + "Acc.fountain": 0.20889999389648437, + "Acc.conveyer belt": 0.8801999664306641, + "Acc.canopy": 0.29920000076293946, + "Acc.washer": 0.6873999786376953, + "Acc.plaything": 0.40950000762939454, + "Acc.swimming pool": 0.7658000183105469, + "Acc.stool": 0.3763999938964844, + "Acc.barrel": 0.6241999816894531, + "Acc.basket": 0.32099998474121094, + "Acc.waterfall": 0.735, + "Acc.tent": 0.9916000366210938, + "Acc.bag": 0.11449999809265136, + "Acc.minibike": 0.773499984741211, + "Acc.cradle": 0.9729000091552734, + "Acc.oven": 0.523400001525879, + "Acc.ball": 0.4370000076293945, + "Acc.food": 0.6811000061035156, + "Acc.step": 0.13739999771118164, + "Acc.tank": 0.6197000122070313, + "Acc.trade name": 0.26110000610351564, + "Acc.microwave": 0.3818999862670898, + "Acc.pot": 0.4127000045776367, + "Acc.animal": 0.6615000152587891, + "Acc.bicycle": 0.6993000030517578, + "Acc.lake": 0.6955999755859374, + "Acc.dishwasher": 0.6708999633789062, + "Acc.screen": 0.900199966430664, + "Acc.blanket": 0.11890000343322754, + "Acc.sculpture": 0.6495999908447265, + "Acc.hood": 0.5708000183105468, + "Acc.sconce": 0.41069999694824216, + "Acc.vase": 0.27360000610351565, + "Acc.traffic light": 0.429900016784668, + "Acc.tray": 0.03720000028610229, + "Acc.ashcan": 0.48020000457763673, + "Acc.fan": 0.674000015258789, + "Acc.pier": 0.782300033569336, + "Acc.crt screen": 0.0, + "Acc.plate": 0.5629999923706055, + "Acc.monitor": 0.04809999942779541, + "Acc.bulletin board": 0.4829000091552734, + "Acc.shower": 0.014299999475479126, + "Acc.radiator": 0.5252000045776367, + "Acc.glass": 0.06219999790191651, + "Acc.clock": 0.23809999465942383, + "Acc.flag": 0.4779999923706055 + } + }, + "92": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8234, + "mIoU": 0.4693, + "mAcc": 0.5866, + "IoU.wall": 0.7609999847412109, + "IoU.building": 0.8290000152587891, + "IoU.sky": 0.9370999908447266, + "IoU.floor": 0.8030999755859375, + "IoU.tree": 0.742699966430664, + "IoU.ceiling": 0.8275, + "IoU.road": 0.8233000183105469, + "IoU.bed ": 0.8679000091552734, + "IoU.windowpane": 0.6018999862670898, + "IoU.grass": 0.6520999908447266, + "IoU.cabinet": 0.5986999893188476, + "IoU.sidewalk": 0.6361999893188477, + "IoU.person": 0.7876999664306641, + "IoU.earth": 0.34650001525878904, + "IoU.door": 0.47080001831054685, + "IoU.table": 0.5936000061035156, + "IoU.mountain": 0.5806000137329101, + "IoU.plant": 0.515, + "IoU.curtain": 0.7369000244140625, + "IoU.chair": 0.5293000030517578, + "IoU.car": 0.8222000122070312, + "IoU.water": 0.5588000106811524, + "IoU.painting": 0.7113999938964843, + "IoU.sofa": 0.6469999694824219, + "IoU.shelf": 0.43209999084472656, + "IoU.house": 0.5188000106811523, + "IoU.sea": 0.6268999862670899, + "IoU.mirror": 0.6477999877929688, + "IoU.rug": 0.6525, + "IoU.field": 0.29690000534057615, + "IoU.armchair": 0.38540000915527345, + "IoU.seat": 0.6279999923706054, + "IoU.fence": 0.36130001068115236, + "IoU.desk": 0.4779999923706055, + "IoU.rock": 0.46180000305175783, + "IoU.wardrobe": 0.5222000122070313, + "IoU.lamp": 0.5722999954223633, + "IoU.bathtub": 0.7491999816894531, + "IoU.railing": 0.35880001068115236, + "IoU.cushion": 0.5588999938964844, + "IoU.base": 0.31670000076293947, + "IoU.box": 0.2520000076293945, + "IoU.column": 0.4784999847412109, + "IoU.signboard": 0.34099998474121096, + "IoU.chest of drawers": 0.33529998779296877, + "IoU.counter": 0.3143000030517578, + "IoU.sand": 0.42, + "IoU.sink": 0.6620999908447266, + "IoU.skyscraper": 0.6470999908447266, + "IoU.fireplace": 0.6948999786376953, + "IoU.refrigerator": 0.7605000305175781, + "IoU.grandstand": 0.44220001220703126, + "IoU.path": 0.211200008392334, + "IoU.stairs": 0.27790000915527346, + "IoU.runway": 0.6694000244140625, + "IoU.case": 0.5518999862670898, + "IoU.pool table": 0.9138999938964844, + "IoU.pillow": 0.5584000015258789, + "IoU.screen door": 0.445099983215332, + "IoU.stairway": 0.3320000076293945, + "IoU.river": 0.13300000190734862, + "IoU.bridge": 0.6297000122070312, + "IoU.bookcase": 0.35389999389648436, + "IoU.blind": 0.4402000045776367, + "IoU.coffee table": 0.5924000167846679, + "IoU.toilet": 0.8144999694824219, + "IoU.flower": 0.3511000061035156, + "IoU.book": 0.44970001220703126, + "IoU.hill": 0.12170000076293945, + "IoU.bench": 0.4690999984741211, + "IoU.countertop": 0.474900016784668, + "IoU.stove": 0.6898999786376954, + "IoU.palm": 0.48619998931884767, + "IoU.kitchen island": 0.3634000015258789, + "IoU.computer": 0.7275, + "IoU.swivel chair": 0.5120999908447266, + "IoU.boat": 0.4861000061035156, + "IoU.bar": 0.5620999908447266, + "IoU.arcade machine": 0.41009998321533203, + "IoU.hovel": 0.5540000152587891, + "IoU.bus": 0.7694000244140625, + "IoU.towel": 0.5786000061035156, + "IoU.light": 0.32849998474121095, + "IoU.truck": 0.2719000053405762, + "IoU.tower": 0.30370000839233396, + "IoU.chandelier": 0.6191999816894531, + "IoU.awning": 0.4222999954223633, + "IoU.streetlight": 0.15869999885559083, + "IoU.booth": 0.32380001068115233, + "IoU.television receiver": 0.6609999847412109, + "IoU.airplane": 0.571500015258789, + "IoU.dirt track": 0.08909999847412109, + "IoU.apparel": 0.3491999816894531, + "IoU.pole": 0.15369999885559082, + "IoU.land": 0.07710000038146973, + "IoU.bannister": 0.10520000457763672, + "IoU.escalator": 0.278799991607666, + "IoU.ottoman": 0.4291999816894531, + "IoU.bottle": 0.2075, + "IoU.buffet": 0.38470001220703126, + "IoU.poster": 0.29690000534057615, + "IoU.stage": 0.18850000381469725, + "IoU.van": 0.43560001373291013, + "IoU.ship": 0.32990001678466796, + "IoU.fountain": 0.20030000686645508, + "IoU.conveyer belt": 0.71, + "IoU.canopy": 0.2588999938964844, + "IoU.washer": 0.7172000122070312, + "IoU.plaything": 0.25059999465942384, + "IoU.swimming pool": 0.6748000335693359, + "IoU.stool": 0.30700000762939456, + "IoU.barrel": 0.5358000183105469, + "IoU.basket": 0.24049999237060546, + "IoU.waterfall": 0.634900016784668, + "IoU.tent": 0.9262000274658203, + "IoU.bag": 0.12850000381469726, + "IoU.minibike": 0.6381999969482421, + "IoU.cradle": 0.7976999664306641, + "IoU.oven": 0.18299999237060546, + "IoU.ball": 0.45060001373291014, + "IoU.food": 0.5122000122070313, + "IoU.step": 0.07110000133514405, + "IoU.tank": 0.5411000061035156, + "IoU.trade name": 0.22059999465942381, + "IoU.microwave": 0.34869998931884766, + "IoU.pot": 0.40310001373291016, + "IoU.animal": 0.6191999816894531, + "IoU.bicycle": 0.5311000061035156, + "IoU.lake": 0.615, + "IoU.dishwasher": 0.5886000061035156, + "IoU.screen": 0.5766999816894531, + "IoU.blanket": 0.1472000026702881, + "IoU.sculpture": 0.5043999862670898, + "IoU.hood": 0.5375, + "IoU.sconce": 0.3597999954223633, + "IoU.vase": 0.2813999938964844, + "IoU.traffic light": 0.23889999389648436, + "IoU.tray": 0.03700000047683716, + "IoU.ashcan": 0.36790000915527343, + "IoU.fan": 0.5075, + "IoU.pier": 0.2804000091552734, + "IoU.crt screen": 0.020199999809265137, + "IoU.plate": 0.47959999084472654, + "IoU.monitor": 0.0734000015258789, + "IoU.bulletin board": 0.4540999984741211, + "IoU.shower": 0.0018000000715255738, + "IoU.radiator": 0.5204999923706055, + "IoU.glass": 0.07360000133514405, + "IoU.clock": 0.20920000076293946, + "IoU.flag": 0.41319999694824217, + "Acc.wall": 0.8770999908447266, + "Acc.building": 0.9244000244140625, + "Acc.sky": 0.9766000366210937, + "Acc.floor": 0.9041000366210937, + "Acc.tree": 0.8644999694824219, + "Acc.ceiling": 0.9188999938964844, + "Acc.road": 0.9022000122070313, + "Acc.bed ": 0.9513999938964843, + "Acc.windowpane": 0.7480999755859375, + "Acc.grass": 0.8058999633789062, + "Acc.cabinet": 0.7102999877929688, + "Acc.sidewalk": 0.7826999664306641, + "Acc.person": 0.9130999755859375, + "Acc.earth": 0.47810001373291017, + "Acc.door": 0.6391999816894531, + "Acc.table": 0.7362000274658204, + "Acc.mountain": 0.7180999755859375, + "Acc.plant": 0.6286999893188476, + "Acc.curtain": 0.8605999755859375, + "Acc.chair": 0.6530000305175782, + "Acc.car": 0.9026000213623047, + "Acc.water": 0.7173000335693359, + "Acc.painting": 0.8506999969482422, + "Acc.sofa": 0.8073999786376953, + "Acc.shelf": 0.6459999847412109, + "Acc.house": 0.6675, + "Acc.sea": 0.8488999938964844, + "Acc.mirror": 0.7252999877929688, + "Acc.rug": 0.7119000244140625, + "Acc.field": 0.5586999893188477, + "Acc.armchair": 0.5933000183105469, + "Acc.seat": 0.8112999725341797, + "Acc.fence": 0.4829999923706055, + "Acc.desk": 0.6833000183105469, + "Acc.rock": 0.6883000183105469, + "Acc.wardrobe": 0.6488999938964843, + "Acc.lamp": 0.7072000122070312, + "Acc.bathtub": 0.8312000274658203, + "Acc.railing": 0.5068999862670899, + "Acc.cushion": 0.6877999877929688, + "Acc.base": 0.46880001068115235, + "Acc.box": 0.3493000030517578, + "Acc.column": 0.6084000015258789, + "Acc.signboard": 0.4395000076293945, + "Acc.chest of drawers": 0.600999984741211, + "Acc.counter": 0.4493999862670898, + "Acc.sand": 0.5604000091552734, + "Acc.sink": 0.7413999938964844, + "Acc.skyscraper": 0.7423999786376954, + "Acc.fireplace": 0.9026999664306641, + "Acc.refrigerator": 0.8529000091552734, + "Acc.grandstand": 0.7619000244140625, + "Acc.path": 0.28299999237060547, + "Acc.stairs": 0.36209999084472655, + "Acc.runway": 0.8105999755859375, + "Acc.case": 0.7426000213623047, + "Acc.pool table": 0.9693000030517578, + "Acc.pillow": 0.6554000091552734, + "Acc.screen door": 0.5304000091552734, + "Acc.stairway": 0.44740001678466795, + "Acc.river": 0.24329999923706055, + "Acc.bridge": 0.7722000122070313, + "Acc.bookcase": 0.5808000183105468, + "Acc.blind": 0.5281000137329102, + "Acc.coffee table": 0.8069000244140625, + "Acc.toilet": 0.8887000274658203, + "Acc.flower": 0.5399000167846679, + "Acc.book": 0.6172999954223632, + "Acc.hill": 0.2336000061035156, + "Acc.bench": 0.5481000137329102, + "Acc.countertop": 0.6147000122070313, + "Acc.stove": 0.7881999969482422, + "Acc.palm": 0.6904000091552734, + "Acc.kitchen island": 0.6412000274658203, + "Acc.computer": 0.8613999938964844, + "Acc.swivel chair": 0.6729000091552735, + "Acc.boat": 0.6056999969482422, + "Acc.bar": 0.7334999847412109, + "Acc.arcade machine": 0.43540000915527344, + "Acc.hovel": 0.6505000305175781, + "Acc.bus": 0.9316999816894531, + "Acc.towel": 0.7316000366210937, + "Acc.light": 0.35950000762939455, + "Acc.truck": 0.35939998626708985, + "Acc.tower": 0.45970001220703127, + "Acc.chandelier": 0.777300033569336, + "Acc.awning": 0.5115999984741211, + "Acc.streetlight": 0.19520000457763673, + "Acc.booth": 0.43150001525878906, + "Acc.television receiver": 0.785199966430664, + "Acc.airplane": 0.6433000183105468, + "Acc.dirt track": 0.19010000228881835, + "Acc.apparel": 0.47209999084472654, + "Acc.pole": 0.19209999084472656, + "Acc.land": 0.10850000381469727, + "Acc.bannister": 0.14449999809265138, + "Acc.escalator": 0.32529998779296876, + "Acc.ottoman": 0.5645999908447266, + "Acc.bottle": 0.2827000045776367, + "Acc.buffet": 0.4293000030517578, + "Acc.poster": 0.3759000015258789, + "Acc.stage": 0.34150001525878904, + "Acc.van": 0.5304999923706055, + "Acc.ship": 0.47189998626708984, + "Acc.fountain": 0.20670000076293946, + "Acc.conveyer belt": 0.9248000335693359, + "Acc.canopy": 0.36630001068115237, + "Acc.washer": 0.7222000122070312, + "Acc.plaything": 0.41119998931884766, + "Acc.swimming pool": 0.7752999877929687, + "Acc.stool": 0.3890999984741211, + "Acc.barrel": 0.6134999847412109, + "Acc.basket": 0.3113999938964844, + "Acc.waterfall": 0.699800033569336, + "Acc.tent": 0.9906999969482422, + "Acc.bag": 0.16549999237060548, + "Acc.minibike": 0.775, + "Acc.cradle": 0.9730999755859375, + "Acc.oven": 0.5002000045776367, + "Acc.ball": 0.5568999862670898, + "Acc.food": 0.602599983215332, + "Acc.step": 0.09359999656677247, + "Acc.tank": 0.6412999725341797, + "Acc.trade name": 0.2545000076293945, + "Acc.microwave": 0.3890999984741211, + "Acc.pot": 0.4759000015258789, + "Acc.animal": 0.6802999877929687, + "Acc.bicycle": 0.7116999816894531, + "Acc.lake": 0.6661000061035156, + "Acc.dishwasher": 0.6741000366210937, + "Acc.screen": 0.9055000305175781, + "Acc.blanket": 0.15850000381469725, + "Acc.sculpture": 0.6598000335693359, + "Acc.hood": 0.5950999832153321, + "Acc.sconce": 0.42900001525878906, + "Acc.vase": 0.38790000915527345, + "Acc.traffic light": 0.39790000915527346, + "Acc.tray": 0.050399999618530276, + "Acc.ashcan": 0.49520000457763674, + "Acc.fan": 0.673499984741211, + "Acc.pier": 0.44279998779296875, + "Acc.crt screen": 0.04929999828338623, + "Acc.plate": 0.6306000137329102, + "Acc.monitor": 0.08119999885559082, + "Acc.bulletin board": 0.6431999969482421, + "Acc.shower": 0.01600000023841858, + "Acc.radiator": 0.5911999893188477, + "Acc.glass": 0.07679999828338623, + "Acc.clock": 0.2315999984741211, + "Acc.flag": 0.4790999984741211 + } + }, + "93": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8273, + "mIoU": 0.48109999999999997, + "mAcc": 0.6004, + "IoU.wall": 0.7648999786376953, + "IoU.building": 0.8319999694824218, + "IoU.sky": 0.9383000183105469, + "IoU.floor": 0.8162000274658203, + "IoU.tree": 0.740999984741211, + "IoU.ceiling": 0.8281999969482422, + "IoU.road": 0.8312999725341796, + "IoU.bed ": 0.8776999664306641, + "IoU.windowpane": 0.6143999862670898, + "IoU.grass": 0.6519000244140625, + "IoU.cabinet": 0.6154999923706055, + "IoU.sidewalk": 0.6506999969482422, + "IoU.person": 0.7969000244140625, + "IoU.earth": 0.38709999084472657, + "IoU.door": 0.4825, + "IoU.table": 0.577599983215332, + "IoU.mountain": 0.577400016784668, + "IoU.plant": 0.5097999954223633, + "IoU.curtain": 0.7358999633789063, + "IoU.chair": 0.5420000076293945, + "IoU.car": 0.8376000213623047, + "IoU.water": 0.5263000106811524, + "IoU.painting": 0.6930000305175781, + "IoU.sofa": 0.6591000366210937, + "IoU.shelf": 0.4433000183105469, + "IoU.house": 0.48470001220703124, + "IoU.sea": 0.6197000122070313, + "IoU.mirror": 0.635, + "IoU.rug": 0.6591999816894532, + "IoU.field": 0.29700000762939455, + "IoU.armchair": 0.4093000030517578, + "IoU.seat": 0.6552999877929687, + "IoU.fence": 0.4065999984741211, + "IoU.desk": 0.5238000106811523, + "IoU.rock": 0.4418999862670898, + "IoU.wardrobe": 0.5315999984741211, + "IoU.lamp": 0.5654999923706054, + "IoU.bathtub": 0.7972000122070313, + "IoU.railing": 0.3606999969482422, + "IoU.cushion": 0.5516999816894531, + "IoU.base": 0.3245000076293945, + "IoU.box": 0.26540000915527345, + "IoU.column": 0.4766999816894531, + "IoU.signboard": 0.35369998931884766, + "IoU.chest of drawers": 0.33799999237060546, + "IoU.counter": 0.33349998474121095, + "IoU.sand": 0.523400001525879, + "IoU.sink": 0.667300033569336, + "IoU.skyscraper": 0.6583999633789063, + "IoU.fireplace": 0.6794000244140626, + "IoU.refrigerator": 0.7541000366210937, + "IoU.grandstand": 0.4766999816894531, + "IoU.path": 0.21670000076293947, + "IoU.stairs": 0.2695000076293945, + "IoU.runway": 0.582400016784668, + "IoU.case": 0.5691999816894531, + "IoU.pool table": 0.9248999786376954, + "IoU.pillow": 0.557599983215332, + "IoU.screen door": 0.5047999954223633, + "IoU.stairway": 0.36779998779296874, + "IoU.river": 0.13489999771118164, + "IoU.bridge": 0.6938999938964844, + "IoU.bookcase": 0.34119998931884765, + "IoU.blind": 0.4, + "IoU.coffee table": 0.566500015258789, + "IoU.toilet": 0.7658000183105469, + "IoU.flower": 0.3340000152587891, + "IoU.book": 0.46029998779296877, + "IoU.hill": 0.11970000267028809, + "IoU.bench": 0.47470001220703123, + "IoU.countertop": 0.4640000152587891, + "IoU.stove": 0.7, + "IoU.palm": 0.4856000137329102, + "IoU.kitchen island": 0.35209999084472654, + "IoU.computer": 0.7575, + "IoU.swivel chair": 0.5547999954223632, + "IoU.boat": 0.7113999938964843, + "IoU.bar": 0.457599983215332, + "IoU.arcade machine": 0.5579999923706055, + "IoU.hovel": 0.5700999832153321, + "IoU.bus": 0.8833000183105468, + "IoU.towel": 0.5974000167846679, + "IoU.light": 0.30920000076293946, + "IoU.truck": 0.3384000015258789, + "IoU.tower": 0.31709999084472656, + "IoU.chandelier": 0.6156999969482422, + "IoU.awning": 0.4045999908447266, + "IoU.streetlight": 0.1921999931335449, + "IoU.booth": 0.3215000152587891, + "IoU.television receiver": 0.6493000030517578, + "IoU.airplane": 0.6473999786376953, + "IoU.dirt track": 0.11170000076293946, + "IoU.apparel": 0.36779998779296874, + "IoU.pole": 0.17239999771118164, + "IoU.land": 0.030799999237060546, + "IoU.bannister": 0.11789999961853027, + "IoU.escalator": 0.4070999908447266, + "IoU.ottoman": 0.45220001220703127, + "IoU.bottle": 0.27969999313354493, + "IoU.buffet": 0.4375, + "IoU.poster": 0.29079999923706057, + "IoU.stage": 0.15020000457763671, + "IoU.van": 0.42, + "IoU.ship": 0.5520000076293945, + "IoU.fountain": 0.20450000762939452, + "IoU.conveyer belt": 0.7662999725341797, + "IoU.canopy": 0.2513999938964844, + "IoU.washer": 0.7191000366210938, + "IoU.plaything": 0.3136000061035156, + "IoU.swimming pool": 0.765199966430664, + "IoU.stool": 0.35279998779296873, + "IoU.barrel": 0.429900016784668, + "IoU.basket": 0.235, + "IoU.waterfall": 0.7123999786376953, + "IoU.tent": 0.9530999755859375, + "IoU.bag": 0.12579999923706053, + "IoU.minibike": 0.5622000122070312, + "IoU.cradle": 0.7875, + "IoU.oven": 0.20670000076293946, + "IoU.ball": 0.5041999816894531, + "IoU.food": 0.4847999954223633, + "IoU.step": 0.07110000133514405, + "IoU.tank": 0.5525, + "IoU.trade name": 0.268799991607666, + "IoU.microwave": 0.43459999084472656, + "IoU.pot": 0.4070999908447266, + "IoU.animal": 0.6152999877929688, + "IoU.bicycle": 0.4940999984741211, + "IoU.lake": 0.5743000030517578, + "IoU.dishwasher": 0.514900016784668, + "IoU.screen": 0.5900999832153321, + "IoU.blanket": 0.16190000534057616, + "IoU.sculpture": 0.5497000122070312, + "IoU.hood": 0.4940999984741211, + "IoU.sconce": 0.3265000152587891, + "IoU.vase": 0.28959999084472654, + "IoU.traffic light": 0.2468000030517578, + "IoU.tray": 0.02369999885559082, + "IoU.ashcan": 0.3708000183105469, + "IoU.fan": 0.535099983215332, + "IoU.pier": 0.30870000839233397, + "IoU.crt screen": 0.026600000858306886, + "IoU.plate": 0.46169998168945314, + "IoU.monitor": 0.06369999885559081, + "IoU.bulletin board": 0.4615000152587891, + "IoU.shower": 0.0010000000149011613, + "IoU.radiator": 0.537400016784668, + "IoU.glass": 0.09, + "IoU.clock": 0.24959999084472656, + "IoU.flag": 0.38709999084472657, + "Acc.wall": 0.88, + "Acc.building": 0.9286000061035157, + "Acc.sky": 0.9762000274658204, + "Acc.floor": 0.9083000183105469, + "Acc.tree": 0.8677999877929687, + "Acc.ceiling": 0.9156999969482422, + "Acc.road": 0.8976999664306641, + "Acc.bed ": 0.9508999633789063, + "Acc.windowpane": 0.7569000244140625, + "Acc.grass": 0.7901000213623047, + "Acc.cabinet": 0.7158000183105468, + "Acc.sidewalk": 0.7941999816894532, + "Acc.person": 0.9180999755859375, + "Acc.earth": 0.5297999954223633, + "Acc.door": 0.6631999969482422, + "Acc.table": 0.7284999847412109, + "Acc.mountain": 0.6718000030517578, + "Acc.plant": 0.625, + "Acc.curtain": 0.8501000213623047, + "Acc.chair": 0.6644000244140625, + "Acc.car": 0.923499984741211, + "Acc.water": 0.6741999816894532, + "Acc.painting": 0.8637999725341797, + "Acc.sofa": 0.8070999908447266, + "Acc.shelf": 0.6729000091552735, + "Acc.house": 0.638499984741211, + "Acc.sea": 0.9004000091552734, + "Acc.mirror": 0.7326000213623047, + "Acc.rug": 0.73, + "Acc.field": 0.5577000045776367, + "Acc.armchair": 0.6334000015258789, + "Acc.seat": 0.8401000213623047, + "Acc.fence": 0.5384000015258789, + "Acc.desk": 0.7269999694824218, + "Acc.rock": 0.6894000244140625, + "Acc.wardrobe": 0.6306000137329102, + "Acc.lamp": 0.7011000061035156, + "Acc.bathtub": 0.8612000274658204, + "Acc.railing": 0.5120999908447266, + "Acc.cushion": 0.6937000274658203, + "Acc.base": 0.4690999984741211, + "Acc.box": 0.357400016784668, + "Acc.column": 0.6043999862670898, + "Acc.signboard": 0.47299999237060547, + "Acc.chest of drawers": 0.6225, + "Acc.counter": 0.43970001220703125, + "Acc.sand": 0.7052999877929688, + "Acc.sink": 0.752300033569336, + "Acc.skyscraper": 0.7438999938964844, + "Acc.fireplace": 0.9276999664306641, + "Acc.refrigerator": 0.8527999877929687, + "Acc.grandstand": 0.7444000244140625, + "Acc.path": 0.28219999313354494, + "Acc.stairs": 0.3615999984741211, + "Acc.runway": 0.752300033569336, + "Acc.case": 0.7308999633789063, + "Acc.pool table": 0.9712000274658203, + "Acc.pillow": 0.6675, + "Acc.screen door": 0.5783000183105469, + "Acc.stairway": 0.533499984741211, + "Acc.river": 0.21540000915527344, + "Acc.bridge": 0.8366000366210937, + "Acc.bookcase": 0.5559999847412109, + "Acc.blind": 0.46830001831054685, + "Acc.coffee table": 0.8201999664306641, + "Acc.toilet": 0.8898999786376953, + "Acc.flower": 0.5508000183105469, + "Acc.book": 0.6426000213623047, + "Acc.hill": 0.24059999465942383, + "Acc.bench": 0.542400016784668, + "Acc.countertop": 0.5886000061035156, + "Acc.stove": 0.8019999694824219, + "Acc.palm": 0.6704000091552734, + "Acc.kitchen island": 0.5968999862670898, + "Acc.computer": 0.899000015258789, + "Acc.swivel chair": 0.734800033569336, + "Acc.boat": 0.8561000061035157, + "Acc.bar": 0.620999984741211, + "Acc.arcade machine": 0.5954999923706055, + "Acc.hovel": 0.6233000183105468, + "Acc.bus": 0.9361000061035156, + "Acc.towel": 0.7495999908447266, + "Acc.light": 0.33430000305175783, + "Acc.truck": 0.4711000061035156, + "Acc.tower": 0.41700000762939454, + "Acc.chandelier": 0.7944000244140625, + "Acc.awning": 0.5093000030517578, + "Acc.streetlight": 0.22510000228881835, + "Acc.booth": 0.4125, + "Acc.television receiver": 0.7948000335693359, + "Acc.airplane": 0.7227999877929687, + "Acc.dirt track": 0.1840999984741211, + "Acc.apparel": 0.5358000183105469, + "Acc.pole": 0.2246999931335449, + "Acc.land": 0.045, + "Acc.bannister": 0.15640000343322755, + "Acc.escalator": 0.4684000015258789, + "Acc.ottoman": 0.6011999893188477, + "Acc.bottle": 0.3856999969482422, + "Acc.buffet": 0.5302999877929687, + "Acc.poster": 0.35069999694824217, + "Acc.stage": 0.24149999618530274, + "Acc.van": 0.5161999893188477, + "Acc.ship": 0.6177000045776367, + "Acc.fountain": 0.21420000076293946, + "Acc.conveyer belt": 0.8963999938964844, + "Acc.canopy": 0.3906999969482422, + "Acc.washer": 0.7255999755859375, + "Acc.plaything": 0.5695999908447266, + "Acc.swimming pool": 0.8504000091552735, + "Acc.stool": 0.47080001831054685, + "Acc.barrel": 0.629000015258789, + "Acc.basket": 0.3233000183105469, + "Acc.waterfall": 0.7783000183105468, + "Acc.tent": 0.9851000213623047, + "Acc.bag": 0.1493000030517578, + "Acc.minibike": 0.6823999786376953, + "Acc.cradle": 0.977699966430664, + "Acc.oven": 0.5377000045776367, + "Acc.ball": 0.555, + "Acc.food": 0.5877999877929687, + "Acc.step": 0.09220000267028809, + "Acc.tank": 0.6419999694824219, + "Acc.trade name": 0.32049999237060545, + "Acc.microwave": 0.48880001068115236, + "Acc.pot": 0.4570000076293945, + "Acc.animal": 0.6580999755859375, + "Acc.bicycle": 0.7205999755859375, + "Acc.lake": 0.6277999877929688, + "Acc.dishwasher": 0.6713999938964844, + "Acc.screen": 0.9168000030517578, + "Acc.blanket": 0.17760000228881836, + "Acc.sculpture": 0.7909999847412109, + "Acc.hood": 0.5609999847412109, + "Acc.sconce": 0.4079000091552734, + "Acc.vase": 0.4013999938964844, + "Acc.traffic light": 0.39240001678466796, + "Acc.tray": 0.03160000085830689, + "Acc.ashcan": 0.5056999969482422, + "Acc.fan": 0.6677999877929688, + "Acc.pier": 0.467599983215332, + "Acc.crt screen": 0.06730000019073486, + "Acc.plate": 0.6418000030517578, + "Acc.monitor": 0.07679999828338623, + "Acc.bulletin board": 0.6165000152587891, + "Acc.shower": 0.008500000238418579, + "Acc.radiator": 0.5908000183105468, + "Acc.glass": 0.09550000190734863, + "Acc.clock": 0.29329999923706057, + "Acc.flag": 0.44779998779296876 + } + }, + "94": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8314, + "mIoU": 0.4901, + "mAcc": 0.6046, + "IoU.wall": 0.7691000366210937, + "IoU.building": 0.83, + "IoU.sky": 0.9376999664306641, + "IoU.floor": 0.8205000305175781, + "IoU.tree": 0.7472000122070312, + "IoU.ceiling": 0.8344999694824219, + "IoU.road": 0.832699966430664, + "IoU.bed ": 0.8879000091552735, + "IoU.windowpane": 0.6224000167846679, + "IoU.grass": 0.6827999877929688, + "IoU.cabinet": 0.6195000076293945, + "IoU.sidewalk": 0.6587000274658203, + "IoU.person": 0.8005000305175781, + "IoU.earth": 0.4009000015258789, + "IoU.door": 0.49770000457763675, + "IoU.table": 0.5940000152587891, + "IoU.mountain": 0.5872000122070312, + "IoU.plant": 0.5166999816894531, + "IoU.curtain": 0.7313999938964844, + "IoU.chair": 0.5415000152587891, + "IoU.car": 0.8387999725341797, + "IoU.water": 0.5358000183105469, + "IoU.painting": 0.6898999786376954, + "IoU.sofa": 0.6913999938964843, + "IoU.shelf": 0.44400001525878907, + "IoU.house": 0.5041999816894531, + "IoU.sea": 0.6091999816894531, + "IoU.mirror": 0.6681999969482422, + "IoU.rug": 0.6618000030517578, + "IoU.field": 0.3243000030517578, + "IoU.armchair": 0.45930000305175783, + "IoU.seat": 0.6625, + "IoU.fence": 0.3981999969482422, + "IoU.desk": 0.4752999877929687, + "IoU.rock": 0.4761000061035156, + "IoU.wardrobe": 0.5829999923706055, + "IoU.lamp": 0.5566999816894531, + "IoU.bathtub": 0.7727999877929688, + "IoU.railing": 0.3763999938964844, + "IoU.cushion": 0.5697999954223633, + "IoU.base": 0.3036000061035156, + "IoU.box": 0.23170000076293945, + "IoU.column": 0.49209999084472655, + "IoU.signboard": 0.3529999923706055, + "IoU.chest of drawers": 0.3763999938964844, + "IoU.counter": 0.41900001525878905, + "IoU.sand": 0.5636999893188477, + "IoU.sink": 0.6723999786376953, + "IoU.skyscraper": 0.563499984741211, + "IoU.fireplace": 0.701500015258789, + "IoU.refrigerator": 0.7794999694824218, + "IoU.grandstand": 0.4859000015258789, + "IoU.path": 0.21379999160766602, + "IoU.stairs": 0.2944000053405762, + "IoU.runway": 0.5950999832153321, + "IoU.case": 0.6576000213623047, + "IoU.pool table": 0.9269999694824219, + "IoU.pillow": 0.5790000152587891, + "IoU.screen door": 0.5265999984741211, + "IoU.stairway": 0.3484000015258789, + "IoU.river": 0.17709999084472655, + "IoU.bridge": 0.6572000122070313, + "IoU.bookcase": 0.3402000045776367, + "IoU.blind": 0.42, + "IoU.coffee table": 0.6136999893188476, + "IoU.toilet": 0.7759999847412109, + "IoU.flower": 0.3795000076293945, + "IoU.book": 0.45220001220703127, + "IoU.hill": 0.13399999618530273, + "IoU.bench": 0.48889999389648436, + "IoU.countertop": 0.5170000076293946, + "IoU.stove": 0.6880999755859375, + "IoU.palm": 0.472400016784668, + "IoU.kitchen island": 0.41630001068115235, + "IoU.computer": 0.7340000152587891, + "IoU.swivel chair": 0.4640999984741211, + "IoU.boat": 0.6937999725341797, + "IoU.bar": 0.6222999954223633, + "IoU.arcade machine": 0.3502999877929687, + "IoU.hovel": 0.5693000030517578, + "IoU.bus": 0.9029000091552735, + "IoU.towel": 0.617599983215332, + "IoU.light": 0.3515999984741211, + "IoU.truck": 0.3225, + "IoU.tower": 0.3370999908447266, + "IoU.chandelier": 0.6268999862670899, + "IoU.awning": 0.2713999938964844, + "IoU.streetlight": 0.15720000267028808, + "IoU.booth": 0.3636000061035156, + "IoU.television receiver": 0.6875, + "IoU.airplane": 0.6231000137329101, + "IoU.dirt track": 0.022200000286102296, + "IoU.apparel": 0.3325, + "IoU.pole": 0.1715999984741211, + "IoU.land": 0.03839999914169311, + "IoU.bannister": 0.11170000076293946, + "IoU.escalator": 0.58, + "IoU.ottoman": 0.44919998168945313, + "IoU.bottle": 0.23329999923706055, + "IoU.buffet": 0.36209999084472655, + "IoU.poster": 0.2859000015258789, + "IoU.stage": 0.14350000381469727, + "IoU.van": 0.3808000183105469, + "IoU.ship": 0.7445999908447266, + "IoU.fountain": 0.1775, + "IoU.conveyer belt": 0.7655000305175781, + "IoU.canopy": 0.18790000915527344, + "IoU.washer": 0.7261000061035157, + "IoU.plaything": 0.32529998779296876, + "IoU.swimming pool": 0.6809999847412109, + "IoU.stool": 0.34569999694824216, + "IoU.barrel": 0.5693000030517578, + "IoU.basket": 0.25209999084472656, + "IoU.waterfall": 0.5656000137329101, + "IoU.tent": 0.9447000122070313, + "IoU.bag": 0.12270000457763672, + "IoU.minibike": 0.6808999633789062, + "IoU.cradle": 0.8140000152587891, + "IoU.oven": 0.23030000686645508, + "IoU.ball": 0.504900016784668, + "IoU.food": 0.5770000076293945, + "IoU.step": 0.09699999809265136, + "IoU.tank": 0.5620999908447266, + "IoU.trade name": 0.2780999946594238, + "IoU.microwave": 0.4386999893188477, + "IoU.pot": 0.41950000762939454, + "IoU.animal": 0.6491999816894531, + "IoU.bicycle": 0.5499000167846679, + "IoU.lake": 0.5804999923706055, + "IoU.dishwasher": 0.5297000122070312, + "IoU.screen": 0.5675, + "IoU.blanket": 0.13039999961853027, + "IoU.sculpture": 0.5700999832153321, + "IoU.hood": 0.5604000091552734, + "IoU.sconce": 0.3145000076293945, + "IoU.vase": 0.30809999465942384, + "IoU.traffic light": 0.22700000762939454, + "IoU.tray": 0.013600000143051148, + "IoU.ashcan": 0.3375, + "IoU.fan": 0.5229999923706055, + "IoU.pier": 0.316200008392334, + "IoU.crt screen": 0.046900000572204587, + "IoU.plate": 0.494900016784668, + "IoU.monitor": 0.28350000381469725, + "IoU.bulletin board": 0.4834000015258789, + "IoU.shower": 0.0009000000357627869, + "IoU.radiator": 0.6025, + "IoU.glass": 0.08779999732971192, + "IoU.clock": 0.273799991607666, + "IoU.flag": 0.4184999847412109, + "Acc.wall": 0.8784999847412109, + "Acc.building": 0.927300033569336, + "Acc.sky": 0.975999984741211, + "Acc.floor": 0.9125, + "Acc.tree": 0.8638999938964844, + "Acc.ceiling": 0.9202999877929687, + "Acc.road": 0.893499984741211, + "Acc.bed ": 0.9554000091552735, + "Acc.windowpane": 0.7752999877929687, + "Acc.grass": 0.7941999816894532, + "Acc.cabinet": 0.7238999938964844, + "Acc.sidewalk": 0.8020999908447266, + "Acc.person": 0.9165000152587891, + "Acc.earth": 0.5904999923706055, + "Acc.door": 0.6801000213623047, + "Acc.table": 0.7472000122070312, + "Acc.mountain": 0.704800033569336, + "Acc.plant": 0.6465000152587891, + "Acc.curtain": 0.8531999969482422, + "Acc.chair": 0.6583000183105469, + "Acc.car": 0.925, + "Acc.water": 0.7113999938964843, + "Acc.painting": 0.8663999938964844, + "Acc.sofa": 0.832699966430664, + "Acc.shelf": 0.6513999938964844, + "Acc.house": 0.6956999969482421, + "Acc.sea": 0.8094999694824219, + "Acc.mirror": 0.7551000213623047, + "Acc.rug": 0.7394000244140625, + "Acc.field": 0.5393000030517578, + "Acc.armchair": 0.6873999786376953, + "Acc.seat": 0.8302999877929688, + "Acc.fence": 0.5422999954223633, + "Acc.desk": 0.6630000305175782, + "Acc.rock": 0.7369999694824219, + "Acc.wardrobe": 0.6941000366210938, + "Acc.lamp": 0.6879000091552734, + "Acc.bathtub": 0.8277999877929687, + "Acc.railing": 0.5404999923706054, + "Acc.cushion": 0.707699966430664, + "Acc.base": 0.46619998931884765, + "Acc.box": 0.31239999771118165, + "Acc.column": 0.5993000030517578, + "Acc.signboard": 0.46240001678466797, + "Acc.chest of drawers": 0.6313999938964844, + "Acc.counter": 0.5072000122070313, + "Acc.sand": 0.7323999786376953, + "Acc.sink": 0.7419999694824219, + "Acc.skyscraper": 0.6523999786376953, + "Acc.fireplace": 0.8975, + "Acc.refrigerator": 0.8412000274658203, + "Acc.grandstand": 0.7183000183105469, + "Acc.path": 0.29639999389648436, + "Acc.stairs": 0.3945999908447266, + "Acc.runway": 0.7598999786376953, + "Acc.case": 0.8263999938964843, + "Acc.pool table": 0.9655000305175782, + "Acc.pillow": 0.6759999847412109, + "Acc.screen door": 0.6034999847412109, + "Acc.stairway": 0.452599983215332, + "Acc.river": 0.2981999969482422, + "Acc.bridge": 0.7927999877929688, + "Acc.bookcase": 0.5484000015258789, + "Acc.blind": 0.47119998931884766, + "Acc.coffee table": 0.7898000335693359, + "Acc.toilet": 0.8886000061035156, + "Acc.flower": 0.5747000122070313, + "Acc.book": 0.6570999908447266, + "Acc.hill": 0.23469999313354492, + "Acc.bench": 0.5777000045776367, + "Acc.countertop": 0.6794999694824219, + "Acc.stove": 0.7948000335693359, + "Acc.palm": 0.683499984741211, + "Acc.kitchen island": 0.6812999725341797, + "Acc.computer": 0.8690000152587891, + "Acc.swivel chair": 0.6780999755859375, + "Acc.boat": 0.849800033569336, + "Acc.bar": 0.7976999664306641, + "Acc.arcade machine": 0.37759998321533206, + "Acc.hovel": 0.6615000152587891, + "Acc.bus": 0.9444000244140625, + "Acc.towel": 0.7441999816894531, + "Acc.light": 0.39110000610351564, + "Acc.truck": 0.4615999984741211, + "Acc.tower": 0.4625, + "Acc.chandelier": 0.7906999969482422, + "Acc.awning": 0.31549999237060544, + "Acc.streetlight": 0.18309999465942384, + "Acc.booth": 0.41259998321533203, + "Acc.television receiver": 0.825999984741211, + "Acc.airplane": 0.6941000366210938, + "Acc.dirt track": 0.09270000457763672, + "Acc.apparel": 0.46430000305175784, + "Acc.pole": 0.223799991607666, + "Acc.land": 0.06059999942779541, + "Acc.bannister": 0.15239999771118165, + "Acc.escalator": 0.7755999755859375, + "Acc.ottoman": 0.6118000030517579, + "Acc.bottle": 0.3114999961853027, + "Acc.buffet": 0.42330001831054687, + "Acc.poster": 0.37909999847412107, + "Acc.stage": 0.21540000915527344, + "Acc.van": 0.49259998321533205, + "Acc.ship": 0.8230000305175781, + "Acc.fountain": 0.21610000610351562, + "Acc.conveyer belt": 0.9154000091552734, + "Acc.canopy": 0.2875, + "Acc.washer": 0.7338999938964844, + "Acc.plaything": 0.47560001373291017, + "Acc.swimming pool": 0.8063999938964844, + "Acc.stool": 0.40560001373291016, + "Acc.barrel": 0.6436000061035156, + "Acc.basket": 0.3420999908447266, + "Acc.waterfall": 0.6525, + "Acc.tent": 0.9845999908447266, + "Acc.bag": 0.1411999988555908, + "Acc.minibike": 0.7793000030517578, + "Acc.cradle": 0.9701999664306641, + "Acc.oven": 0.6061999893188477, + "Acc.ball": 0.5456000137329101, + "Acc.food": 0.6916000366210937, + "Acc.step": 0.13520000457763673, + "Acc.tank": 0.6444000244140625, + "Acc.trade name": 0.3171999931335449, + "Acc.microwave": 0.4872999954223633, + "Acc.pot": 0.4931999969482422, + "Acc.animal": 0.695, + "Acc.bicycle": 0.7411000061035157, + "Acc.lake": 0.6127999877929687, + "Acc.dishwasher": 0.6530999755859375, + "Acc.screen": 0.7380000305175781, + "Acc.blanket": 0.14239999771118164, + "Acc.sculpture": 0.7095999908447266, + "Acc.hood": 0.6122999954223632, + "Acc.sconce": 0.40759998321533203, + "Acc.vase": 0.40310001373291016, + "Acc.traffic light": 0.33619998931884765, + "Acc.tray": 0.01759999990463257, + "Acc.ashcan": 0.47200000762939454, + "Acc.fan": 0.6688999938964844, + "Acc.pier": 0.45799999237060546, + "Acc.crt screen": 0.09520000457763672, + "Acc.plate": 0.6556999969482422, + "Acc.monitor": 0.4059000015258789, + "Acc.bulletin board": 0.6368000030517578, + "Acc.shower": 0.00800000011920929, + "Acc.radiator": 0.6981999969482422, + "Acc.glass": 0.09050000190734864, + "Acc.clock": 0.3277000045776367, + "Acc.flag": 0.46560001373291016 + } + }, + "95": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8334999999999999, + "mIoU": 0.48619999999999997, + "mAcc": 0.601, + "IoU.wall": 0.7755000305175781, + "IoU.building": 0.8286000061035156, + "IoU.sky": 0.9395999908447266, + "IoU.floor": 0.819800033569336, + "IoU.tree": 0.7561000061035156, + "IoU.ceiling": 0.8369000244140625, + "IoU.road": 0.832699966430664, + "IoU.bed ": 0.8937999725341796, + "IoU.windowpane": 0.6222000122070312, + "IoU.grass": 0.6934999847412109, + "IoU.cabinet": 0.6191999816894531, + "IoU.sidewalk": 0.6548999786376953, + "IoU.person": 0.8044000244140626, + "IoU.earth": 0.38349998474121094, + "IoU.door": 0.5095000076293945, + "IoU.table": 0.5968999862670898, + "IoU.mountain": 0.5834000015258789, + "IoU.plant": 0.5441999816894532, + "IoU.curtain": 0.7354000091552735, + "IoU.chair": 0.5615999984741211, + "IoU.car": 0.8302999877929688, + "IoU.water": 0.575, + "IoU.painting": 0.6994000244140625, + "IoU.sofa": 0.6825, + "IoU.shelf": 0.4370000076293945, + "IoU.house": 0.4881999969482422, + "IoU.sea": 0.6877999877929688, + "IoU.mirror": 0.6716999816894531, + "IoU.rug": 0.6334000015258789, + "IoU.field": 0.37290000915527344, + "IoU.armchair": 0.44349998474121094, + "IoU.seat": 0.6519000244140625, + "IoU.fence": 0.46549999237060546, + "IoU.desk": 0.49639999389648437, + "IoU.rock": 0.48400001525878905, + "IoU.wardrobe": 0.5533000183105469, + "IoU.lamp": 0.5583000183105469, + "IoU.bathtub": 0.8379000091552734, + "IoU.railing": 0.37790000915527344, + "IoU.cushion": 0.5820999908447265, + "IoU.base": 0.3334000015258789, + "IoU.box": 0.2231999969482422, + "IoU.column": 0.47619998931884766, + "IoU.signboard": 0.3625, + "IoU.chest of drawers": 0.3475, + "IoU.counter": 0.40180000305175784, + "IoU.sand": 0.46689998626708984, + "IoU.sink": 0.6766999816894531, + "IoU.skyscraper": 0.5116999816894531, + "IoU.fireplace": 0.7061000061035156, + "IoU.refrigerator": 0.7691999816894531, + "IoU.grandstand": 0.5195999908447265, + "IoU.path": 0.18450000762939453, + "IoU.stairs": 0.2795999908447266, + "IoU.runway": 0.6202999877929688, + "IoU.case": 0.6352000045776367, + "IoU.pool table": 0.9305000305175781, + "IoU.pillow": 0.5877000045776367, + "IoU.screen door": 0.6004999923706055, + "IoU.stairway": 0.3561000061035156, + "IoU.river": 0.16420000076293945, + "IoU.bridge": 0.6052999877929688, + "IoU.bookcase": 0.33169998168945314, + "IoU.blind": 0.41150001525878904, + "IoU.coffee table": 0.6038000106811523, + "IoU.toilet": 0.7654000091552734, + "IoU.flower": 0.3990999984741211, + "IoU.book": 0.4502000045776367, + "IoU.hill": 0.15510000228881837, + "IoU.bench": 0.4518999862670898, + "IoU.countertop": 0.5740000152587891, + "IoU.stove": 0.7113999938964843, + "IoU.palm": 0.48459999084472655, + "IoU.kitchen island": 0.46849998474121096, + "IoU.computer": 0.7401000213623047, + "IoU.swivel chair": 0.5377000045776367, + "IoU.boat": 0.6516000366210938, + "IoU.bar": 0.5690000152587891, + "IoU.arcade machine": 0.7594000244140625, + "IoU.hovel": 0.5952999877929688, + "IoU.bus": 0.8869999694824219, + "IoU.towel": 0.6241999816894531, + "IoU.light": 0.3511000061035156, + "IoU.truck": 0.2043000030517578, + "IoU.tower": 0.2638999938964844, + "IoU.chandelier": 0.6311000061035156, + "IoU.awning": 0.33619998931884765, + "IoU.streetlight": 0.16049999237060547, + "IoU.booth": 0.44029998779296875, + "IoU.television receiver": 0.7105999755859375, + "IoU.airplane": 0.6304000091552734, + "IoU.dirt track": 0.007699999809265137, + "IoU.apparel": 0.34669998168945315, + "IoU.pole": 0.19170000076293944, + "IoU.land": 0.020299999713897704, + "IoU.bannister": 0.08960000038146973, + "IoU.escalator": 0.5690000152587891, + "IoU.ottoman": 0.48060001373291017, + "IoU.bottle": 0.35959999084472655, + "IoU.buffet": 0.3920000076293945, + "IoU.poster": 0.2922999954223633, + "IoU.stage": 0.12529999732971192, + "IoU.van": 0.35529998779296873, + "IoU.ship": 0.10800000190734864, + "IoU.fountain": 0.19139999389648438, + "IoU.conveyer belt": 0.7987000274658204, + "IoU.canopy": 0.1915999984741211, + "IoU.washer": 0.6818000030517578, + "IoU.plaything": 0.3856999969482422, + "IoU.swimming pool": 0.6726999664306641, + "IoU.stool": 0.3093000030517578, + "IoU.barrel": 0.26549999237060545, + "IoU.basket": 0.32169998168945313, + "IoU.waterfall": 0.49380001068115237, + "IoU.tent": 0.9413999938964843, + "IoU.bag": 0.12069999694824218, + "IoU.minibike": 0.6213999938964844, + "IoU.cradle": 0.8333000183105469, + "IoU.oven": 0.3690999984741211, + "IoU.ball": 0.32119998931884763, + "IoU.food": 0.5654999923706054, + "IoU.step": 0.0909000015258789, + "IoU.tank": 0.5790000152587891, + "IoU.trade name": 0.27649999618530274, + "IoU.microwave": 0.7013999938964843, + "IoU.pot": 0.4341999816894531, + "IoU.animal": 0.619900016784668, + "IoU.bicycle": 0.51, + "IoU.lake": 0.10279999732971191, + "IoU.dishwasher": 0.5413999938964844, + "IoU.screen": 0.5220999908447266, + "IoU.blanket": 0.14739999771118165, + "IoU.sculpture": 0.5858000183105468, + "IoU.hood": 0.5668999862670898, + "IoU.sconce": 0.33610000610351565, + "IoU.vase": 0.31409999847412107, + "IoU.traffic light": 0.24979999542236328, + "IoU.tray": 0.017100000381469728, + "IoU.ashcan": 0.36279998779296874, + "IoU.fan": 0.5006999969482422, + "IoU.pier": 0.3475, + "IoU.crt screen": 0.038499999046325686, + "IoU.plate": 0.47939998626708985, + "IoU.monitor": 0.25190000534057616, + "IoU.bulletin board": 0.3788000106811523, + "IoU.shower": 0.010099999904632569, + "IoU.radiator": 0.5395000076293945, + "IoU.glass": 0.10029999732971191, + "IoU.clock": 0.2670999908447266, + "IoU.flag": 0.4481999969482422, + "Acc.wall": 0.8806999969482422, + "Acc.building": 0.924000015258789, + "Acc.sky": 0.9762999725341797, + "Acc.floor": 0.9120999908447266, + "Acc.tree": 0.8669000244140626, + "Acc.ceiling": 0.9262999725341797, + "Acc.road": 0.8980999755859375, + "Acc.bed ": 0.9544000244140625, + "Acc.windowpane": 0.7705000305175781, + "Acc.grass": 0.8158000183105468, + "Acc.cabinet": 0.7255000305175782, + "Acc.sidewalk": 0.7952999877929687, + "Acc.person": 0.9179000091552735, + "Acc.earth": 0.5766999816894531, + "Acc.door": 0.7030000305175781, + "Acc.table": 0.7591000366210937, + "Acc.mountain": 0.7093000030517578, + "Acc.plant": 0.6679000091552735, + "Acc.curtain": 0.8537000274658203, + "Acc.chair": 0.6890000152587891, + "Acc.car": 0.9143000030517578, + "Acc.water": 0.7648000335693359, + "Acc.painting": 0.8604000091552735, + "Acc.sofa": 0.8169999694824219, + "Acc.shelf": 0.6070999908447265, + "Acc.house": 0.7373999786376954, + "Acc.sea": 0.8698999786376953, + "Acc.mirror": 0.7623999786376953, + "Acc.rug": 0.7161000061035157, + "Acc.field": 0.5718000030517578, + "Acc.armchair": 0.678499984741211, + "Acc.seat": 0.8048999786376954, + "Acc.fence": 0.6397000122070312, + "Acc.desk": 0.6886000061035156, + "Acc.rock": 0.7063999938964843, + "Acc.wardrobe": 0.6730999755859375, + "Acc.lamp": 0.6933000183105469, + "Acc.bathtub": 0.8886000061035156, + "Acc.railing": 0.5163999938964844, + "Acc.cushion": 0.7318000030517579, + "Acc.base": 0.5504999923706054, + "Acc.box": 0.313700008392334, + "Acc.column": 0.6068000030517579, + "Acc.signboard": 0.46810001373291016, + "Acc.chest of drawers": 0.5652999877929688, + "Acc.counter": 0.500999984741211, + "Acc.sand": 0.6372000122070313, + "Acc.sink": 0.7498000335693359, + "Acc.skyscraper": 0.5963999938964843, + "Acc.fireplace": 0.8988999938964843, + "Acc.refrigerator": 0.8679000091552734, + "Acc.grandstand": 0.745, + "Acc.path": 0.24610000610351562, + "Acc.stairs": 0.3731999969482422, + "Acc.runway": 0.7968000030517578, + "Acc.case": 0.8069999694824219, + "Acc.pool table": 0.9679000091552734, + "Acc.pillow": 0.6863999938964844, + "Acc.screen door": 0.6986000061035156, + "Acc.stairway": 0.44720001220703126, + "Acc.river": 0.2673999977111816, + "Acc.bridge": 0.7276000213623047, + "Acc.bookcase": 0.5690999984741211, + "Acc.blind": 0.45479999542236327, + "Acc.coffee table": 0.7862000274658203, + "Acc.toilet": 0.8987999725341796, + "Acc.flower": 0.5918000030517578, + "Acc.book": 0.6275999832153321, + "Acc.hill": 0.24760000228881837, + "Acc.bench": 0.5295000076293945, + "Acc.countertop": 0.7261000061035157, + "Acc.stove": 0.8158000183105468, + "Acc.palm": 0.6881999969482422, + "Acc.kitchen island": 0.735, + "Acc.computer": 0.8766999816894532, + "Acc.swivel chair": 0.6975, + "Acc.boat": 0.8398999786376953, + "Acc.bar": 0.7140000152587891, + "Acc.arcade machine": 0.8129000091552734, + "Acc.hovel": 0.6620999908447266, + "Acc.bus": 0.935, + "Acc.towel": 0.7694000244140625, + "Acc.light": 0.37900001525878907, + "Acc.truck": 0.2806999969482422, + "Acc.tower": 0.3675, + "Acc.chandelier": 0.788499984741211, + "Acc.awning": 0.4063999938964844, + "Acc.streetlight": 0.18979999542236328, + "Acc.booth": 0.4884000015258789, + "Acc.television receiver": 0.8351000213623047, + "Acc.airplane": 0.7076000213623047, + "Acc.dirt track": 0.03460000038146973, + "Acc.apparel": 0.4754999923706055, + "Acc.pole": 0.25190000534057616, + "Acc.land": 0.032899999618530275, + "Acc.bannister": 0.1397000026702881, + "Acc.escalator": 0.811500015258789, + "Acc.ottoman": 0.6597000122070312, + "Acc.bottle": 0.6236000061035156, + "Acc.buffet": 0.4620999908447266, + "Acc.poster": 0.36520000457763674, + "Acc.stage": 0.22290000915527344, + "Acc.van": 0.45919998168945314, + "Acc.ship": 0.1168000030517578, + "Acc.fountain": 0.21770000457763672, + "Acc.conveyer belt": 0.9191999816894532, + "Acc.canopy": 0.2553000068664551, + "Acc.washer": 0.7451000213623047, + "Acc.plaything": 0.5984000015258789, + "Acc.swimming pool": 0.8251999664306641, + "Acc.stool": 0.3845000076293945, + "Acc.barrel": 0.6416999816894531, + "Acc.basket": 0.39040000915527345, + "Acc.waterfall": 0.5702999877929688, + "Acc.tent": 0.9844000244140625, + "Acc.bag": 0.13829999923706054, + "Acc.minibike": 0.6886000061035156, + "Acc.cradle": 0.9690000152587891, + "Acc.oven": 0.6265000152587891, + "Acc.ball": 0.3333000183105469, + "Acc.food": 0.6461000061035156, + "Acc.step": 0.12229999542236328, + "Acc.tank": 0.6570999908447266, + "Acc.trade name": 0.31290000915527344, + "Acc.microwave": 0.7629000091552735, + "Acc.pot": 0.51, + "Acc.animal": 0.6598999786376953, + "Acc.bicycle": 0.7166000366210937, + "Acc.lake": 0.1084000015258789, + "Acc.dishwasher": 0.620099983215332, + "Acc.screen": 0.7254000091552735, + "Acc.blanket": 0.16079999923706054, + "Acc.sculpture": 0.7370999908447265, + "Acc.hood": 0.6443000030517578, + "Acc.sconce": 0.425, + "Acc.vase": 0.41209999084472654, + "Acc.traffic light": 0.35400001525878905, + "Acc.tray": 0.019199999570846556, + "Acc.ashcan": 0.4677000045776367, + "Acc.fan": 0.6068999862670899, + "Acc.pier": 0.4620000076293945, + "Acc.crt screen": 0.09680000305175782, + "Acc.plate": 0.655, + "Acc.monitor": 0.33180000305175783, + "Acc.bulletin board": 0.534099998474121, + "Acc.shower": 0.05010000228881836, + "Acc.radiator": 0.5984999847412109, + "Acc.glass": 0.10439999580383301, + "Acc.clock": 0.30510000228881834, + "Acc.flag": 0.5006999969482422 + } + }, + "96": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8339, + "mIoU": 0.49229999999999996, + "mAcc": 0.6026, + "IoU.wall": 0.7766999816894531, + "IoU.building": 0.8283999633789062, + "IoU.sky": 0.9380000305175781, + "IoU.floor": 0.8168000030517578, + "IoU.tree": 0.7483999633789062, + "IoU.ceiling": 0.8369999694824218, + "IoU.road": 0.8306999969482421, + "IoU.bed ": 0.8987999725341796, + "IoU.windowpane": 0.619900016784668, + "IoU.grass": 0.6856999969482422, + "IoU.cabinet": 0.6365000152587891, + "IoU.sidewalk": 0.6545999908447265, + "IoU.person": 0.8077999877929688, + "IoU.earth": 0.38529998779296876, + "IoU.door": 0.5193999862670898, + "IoU.table": 0.6, + "IoU.mountain": 0.587400016784668, + "IoU.plant": 0.5172000122070313, + "IoU.curtain": 0.7372000122070312, + "IoU.chair": 0.5695999908447266, + "IoU.car": 0.8423999786376953, + "IoU.water": 0.5640000152587891, + "IoU.painting": 0.7087000274658203, + "IoU.sofa": 0.6737000274658204, + "IoU.shelf": 0.43, + "IoU.house": 0.4777000045776367, + "IoU.sea": 0.6656999969482422, + "IoU.mirror": 0.6869000244140625, + "IoU.rug": 0.6341999816894531, + "IoU.field": 0.39240001678466796, + "IoU.armchair": 0.4281999969482422, + "IoU.seat": 0.6719000244140625, + "IoU.fence": 0.45540000915527346, + "IoU.desk": 0.48689998626708986, + "IoU.rock": 0.495, + "IoU.wardrobe": 0.5643000030517578, + "IoU.lamp": 0.5568000030517578, + "IoU.bathtub": 0.8431999969482422, + "IoU.railing": 0.3858000183105469, + "IoU.cushion": 0.5984000015258789, + "IoU.base": 0.2968000030517578, + "IoU.box": 0.225, + "IoU.column": 0.49040000915527343, + "IoU.signboard": 0.35650001525878905, + "IoU.chest of drawers": 0.34880001068115235, + "IoU.counter": 0.3959000015258789, + "IoU.sand": 0.44779998779296876, + "IoU.sink": 0.6993000030517578, + "IoU.skyscraper": 0.5175999832153321, + "IoU.fireplace": 0.7211000061035157, + "IoU.refrigerator": 0.782300033569336, + "IoU.grandstand": 0.5, + "IoU.path": 0.19510000228881835, + "IoU.stairs": 0.2510000038146973, + "IoU.runway": 0.6052999877929688, + "IoU.case": 0.6038999938964844, + "IoU.pool table": 0.9329000091552735, + "IoU.pillow": 0.5720999908447265, + "IoU.screen door": 0.5647000122070313, + "IoU.stairway": 0.3759000015258789, + "IoU.river": 0.15329999923706056, + "IoU.bridge": 0.6658000183105469, + "IoU.bookcase": 0.34720001220703123, + "IoU.blind": 0.41, + "IoU.coffee table": 0.6190000152587891, + "IoU.toilet": 0.8358000183105468, + "IoU.flower": 0.43139999389648437, + "IoU.book": 0.4318000030517578, + "IoU.hill": 0.13600000381469726, + "IoU.bench": 0.4356999969482422, + "IoU.countertop": 0.5888000106811524, + "IoU.stove": 0.7633999633789063, + "IoU.palm": 0.4841999816894531, + "IoU.kitchen island": 0.43200000762939456, + "IoU.computer": 0.7044000244140625, + "IoU.swivel chair": 0.5438999938964844, + "IoU.boat": 0.6587000274658203, + "IoU.bar": 0.5331000137329102, + "IoU.arcade machine": 0.48720001220703124, + "IoU.hovel": 0.29520000457763673, + "IoU.bus": 0.9058999633789062, + "IoU.towel": 0.6002000045776367, + "IoU.light": 0.3057999992370605, + "IoU.truck": 0.22389999389648438, + "IoU.tower": 0.22670000076293945, + "IoU.chandelier": 0.6379999923706055, + "IoU.awning": 0.2513999938964844, + "IoU.streetlight": 0.16799999237060548, + "IoU.booth": 0.3529000091552734, + "IoU.television receiver": 0.7238999938964844, + "IoU.airplane": 0.615, + "IoU.dirt track": 0.04820000171661377, + "IoU.apparel": 0.3243999862670898, + "IoU.pole": 0.16520000457763673, + "IoU.land": 0.09850000381469727, + "IoU.bannister": 0.04630000114440918, + "IoU.escalator": 0.5511000061035156, + "IoU.ottoman": 0.4847999954223633, + "IoU.bottle": 0.36790000915527343, + "IoU.buffet": 0.46169998168945314, + "IoU.poster": 0.35889999389648436, + "IoU.stage": 0.13119999885559083, + "IoU.van": 0.34900001525878904, + "IoU.ship": 0.7637999725341796, + "IoU.fountain": 0.22489999771118163, + "IoU.conveyer belt": 0.7719000244140625, + "IoU.canopy": 0.2510000038146973, + "IoU.washer": 0.7019999694824218, + "IoU.plaything": 0.41319999694824217, + "IoU.swimming pool": 0.7629000091552735, + "IoU.stool": 0.37810001373291013, + "IoU.barrel": 0.278700008392334, + "IoU.basket": 0.26979999542236327, + "IoU.waterfall": 0.49459999084472656, + "IoU.tent": 0.9531999969482422, + "IoU.bag": 0.1425, + "IoU.minibike": 0.7168000030517578, + "IoU.cradle": 0.7545999908447265, + "IoU.oven": 0.4122999954223633, + "IoU.ball": 0.4386000061035156, + "IoU.food": 0.5141999816894531, + "IoU.step": 0.0825, + "IoU.tank": 0.552599983215332, + "IoU.trade name": 0.2592000007629395, + "IoU.microwave": 0.8001000213623047, + "IoU.pot": 0.4981999969482422, + "IoU.animal": 0.6168999862670899, + "IoU.bicycle": 0.5590000152587891, + "IoU.lake": 0.6552999877929687, + "IoU.dishwasher": 0.609900016784668, + "IoU.screen": 0.5258000183105469, + "IoU.blanket": 0.15520000457763672, + "IoU.sculpture": 0.6445999908447265, + "IoU.hood": 0.4997999954223633, + "IoU.sconce": 0.26899999618530274, + "IoU.vase": 0.30790000915527344, + "IoU.traffic light": 0.23579999923706055, + "IoU.tray": 0.024700000286102294, + "IoU.ashcan": 0.31489999771118166, + "IoU.fan": 0.43590000152587893, + "IoU.pier": 0.29329999923706057, + "IoU.crt screen": 0.02119999885559082, + "IoU.plate": 0.5093000030517578, + "IoU.monitor": 0.17239999771118164, + "IoU.bulletin board": 0.39240001678466796, + "IoU.shower": 0.007400000095367431, + "IoU.radiator": 0.5531000137329102, + "IoU.glass": 0.10319999694824218, + "IoU.clock": 0.2815999984741211, + "IoU.flag": 0.4254999923706055, + "Acc.wall": 0.8883999633789063, + "Acc.building": 0.9238999938964844, + "Acc.sky": 0.9773999786376953, + "Acc.floor": 0.9066999816894531, + "Acc.tree": 0.8687999725341797, + "Acc.ceiling": 0.9283999633789063, + "Acc.road": 0.897300033569336, + "Acc.bed ": 0.9595999908447266, + "Acc.windowpane": 0.7762999725341797, + "Acc.grass": 0.7991000366210937, + "Acc.cabinet": 0.7481999969482422, + "Acc.sidewalk": 0.794000015258789, + "Acc.person": 0.9169000244140625, + "Acc.earth": 0.5670000076293945, + "Acc.door": 0.6956999969482421, + "Acc.table": 0.7631999969482421, + "Acc.mountain": 0.7086000061035156, + "Acc.plant": 0.6583000183105469, + "Acc.curtain": 0.8504000091552735, + "Acc.chair": 0.6944999694824219, + "Acc.car": 0.9291999816894532, + "Acc.water": 0.7231999969482422, + "Acc.painting": 0.8376000213623047, + "Acc.sofa": 0.8269000244140625, + "Acc.shelf": 0.6091999816894531, + "Acc.house": 0.7076000213623047, + "Acc.sea": 0.8731999969482422, + "Acc.mirror": 0.7683999633789063, + "Acc.rug": 0.7218000030517578, + "Acc.field": 0.6118999862670899, + "Acc.armchair": 0.6190000152587891, + "Acc.seat": 0.8452999877929688, + "Acc.fence": 0.6138999938964844, + "Acc.desk": 0.6669000244140625, + "Acc.rock": 0.7179000091552734, + "Acc.wardrobe": 0.68, + "Acc.lamp": 0.6675, + "Acc.bathtub": 0.8834999847412109, + "Acc.railing": 0.5122999954223633, + "Acc.cushion": 0.7486000061035156, + "Acc.base": 0.5008000183105469, + "Acc.box": 0.28899999618530275, + "Acc.column": 0.5981999969482422, + "Acc.signboard": 0.4677999877929688, + "Acc.chest of drawers": 0.5627000045776367, + "Acc.counter": 0.5236999893188476, + "Acc.sand": 0.5847999954223633, + "Acc.sink": 0.7706999969482422, + "Acc.skyscraper": 0.6168999862670899, + "Acc.fireplace": 0.9086000061035157, + "Acc.refrigerator": 0.8554000091552735, + "Acc.grandstand": 0.7443000030517578, + "Acc.path": 0.26610000610351564, + "Acc.stairs": 0.335099983215332, + "Acc.runway": 0.7744000244140625, + "Acc.case": 0.7494000244140625, + "Acc.pool table": 0.9630999755859375, + "Acc.pillow": 0.653499984741211, + "Acc.screen door": 0.639000015258789, + "Acc.stairway": 0.5052999877929687, + "Acc.river": 0.2954999923706055, + "Acc.bridge": 0.7805000305175781, + "Acc.bookcase": 0.590900001525879, + "Acc.blind": 0.45279998779296876, + "Acc.coffee table": 0.7698000335693359, + "Acc.toilet": 0.8955999755859375, + "Acc.flower": 0.6172999954223632, + "Acc.book": 0.5918000030517578, + "Acc.hill": 0.23370000839233399, + "Acc.bench": 0.5279000091552735, + "Acc.countertop": 0.7443000030517578, + "Acc.stove": 0.8326000213623047, + "Acc.palm": 0.6708000183105469, + "Acc.kitchen island": 0.5918000030517578, + "Acc.computer": 0.8387999725341797, + "Acc.swivel chair": 0.6816999816894531, + "Acc.boat": 0.8351999664306641, + "Acc.bar": 0.6504000091552734, + "Acc.arcade machine": 0.518499984741211, + "Acc.hovel": 0.332599983215332, + "Acc.bus": 0.9512000274658203, + "Acc.towel": 0.747699966430664, + "Acc.light": 0.32240001678466795, + "Acc.truck": 0.3103000068664551, + "Acc.tower": 0.36970001220703125, + "Acc.chandelier": 0.7768000030517578, + "Acc.awning": 0.29399999618530276, + "Acc.streetlight": 0.19610000610351563, + "Acc.booth": 0.44669998168945313, + "Acc.television receiver": 0.8158999633789062, + "Acc.airplane": 0.685199966430664, + "Acc.dirt track": 0.2272999954223633, + "Acc.apparel": 0.43990001678466795, + "Acc.pole": 0.20809999465942383, + "Acc.land": 0.1256999969482422, + "Acc.bannister": 0.06039999961853027, + "Acc.escalator": 0.7105999755859375, + "Acc.ottoman": 0.6448000335693359, + "Acc.bottle": 0.5808000183105468, + "Acc.buffet": 0.5236000061035156, + "Acc.poster": 0.5441999816894532, + "Acc.stage": 0.21100000381469727, + "Acc.van": 0.42380001068115236, + "Acc.ship": 0.7972000122070313, + "Acc.fountain": 0.22889999389648438, + "Acc.conveyer belt": 0.9163999938964844, + "Acc.canopy": 0.2975, + "Acc.washer": 0.7366999816894532, + "Acc.plaything": 0.6390999984741211, + "Acc.swimming pool": 0.8737000274658203, + "Acc.stool": 0.4456999969482422, + "Acc.barrel": 0.6405999755859375, + "Acc.basket": 0.3425, + "Acc.waterfall": 0.5797999954223633, + "Acc.tent": 0.9787999725341797, + "Acc.bag": 0.16860000610351564, + "Acc.minibike": 0.8031999969482422, + "Acc.cradle": 0.9612999725341796, + "Acc.oven": 0.49139999389648437, + "Acc.ball": 0.48880001068115236, + "Acc.food": 0.5975, + "Acc.step": 0.10930000305175781, + "Acc.tank": 0.6481999969482422, + "Acc.trade name": 0.28739999771118163, + "Acc.microwave": 0.8865000152587891, + "Acc.pot": 0.5861999893188476, + "Acc.animal": 0.6538999938964843, + "Acc.bicycle": 0.6902999877929688, + "Acc.lake": 0.6844000244140624, + "Acc.dishwasher": 0.7120999908447265, + "Acc.screen": 0.735999984741211, + "Acc.blanket": 0.18069999694824218, + "Acc.sculpture": 0.7981999969482422, + "Acc.hood": 0.6354000091552734, + "Acc.sconce": 0.3384000015258789, + "Acc.vase": 0.42439998626708986, + "Acc.traffic light": 0.3472999954223633, + "Acc.tray": 0.027799999713897704, + "Acc.ashcan": 0.45630001068115233, + "Acc.fan": 0.4859999847412109, + "Acc.pier": 0.425, + "Acc.crt screen": 0.05179999828338623, + "Acc.plate": 0.6788999938964844, + "Acc.monitor": 0.23979999542236327, + "Acc.bulletin board": 0.523499984741211, + "Acc.shower": 0.049000000953674315, + "Acc.radiator": 0.6175, + "Acc.glass": 0.10659999847412109, + "Acc.clock": 0.3046999931335449, + "Acc.flag": 0.4527000045776367 + } + }, + "97": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8345, + "mIoU": 0.48950000000000005, + "mAcc": 0.5901, + "IoU.wall": 0.774000015258789, + "IoU.building": 0.831500015258789, + "IoU.sky": 0.9369999694824219, + "IoU.floor": 0.8154000091552734, + "IoU.tree": 0.7441999816894531, + "IoU.ceiling": 0.835, + "IoU.road": 0.8306999969482421, + "IoU.bed ": 0.8963999938964844, + "IoU.windowpane": 0.6227000045776367, + "IoU.grass": 0.6633000183105469, + "IoU.cabinet": 0.6327999877929688, + "IoU.sidewalk": 0.6579000091552735, + "IoU.person": 0.8083000183105469, + "IoU.earth": 0.3856999969482422, + "IoU.door": 0.5204999923706055, + "IoU.table": 0.605999984741211, + "IoU.mountain": 0.5927000045776367, + "IoU.plant": 0.530099983215332, + "IoU.curtain": 0.7455999755859375, + "IoU.chair": 0.562599983215332, + "IoU.car": 0.8433000183105469, + "IoU.water": 0.589900016784668, + "IoU.painting": 0.7277999877929687, + "IoU.sofa": 0.685, + "IoU.shelf": 0.4365999984741211, + "IoU.house": 0.47450000762939454, + "IoU.sea": 0.6787000274658204, + "IoU.mirror": 0.7044999694824219, + "IoU.rug": 0.6354000091552734, + "IoU.field": 0.36080001831054687, + "IoU.armchair": 0.42439998626708986, + "IoU.seat": 0.6672000122070313, + "IoU.fence": 0.4293000030517578, + "IoU.desk": 0.5077999877929688, + "IoU.rock": 0.4872999954223633, + "IoU.wardrobe": 0.5590999984741211, + "IoU.lamp": 0.5650999832153321, + "IoU.bathtub": 0.8466000366210937, + "IoU.railing": 0.39439998626708983, + "IoU.cushion": 0.6068000030517579, + "IoU.base": 0.31610000610351563, + "IoU.box": 0.2359000015258789, + "IoU.column": 0.49380001068115237, + "IoU.signboard": 0.37040000915527344, + "IoU.chest of drawers": 0.3484000015258789, + "IoU.counter": 0.397599983215332, + "IoU.sand": 0.4986000061035156, + "IoU.sink": 0.7055000305175781, + "IoU.skyscraper": 0.5247000122070312, + "IoU.fireplace": 0.7266999816894532, + "IoU.refrigerator": 0.7670999908447266, + "IoU.grandstand": 0.5166999816894531, + "IoU.path": 0.20899999618530274, + "IoU.stairs": 0.22209999084472656, + "IoU.runway": 0.6259000015258789, + "IoU.case": 0.5643999862670899, + "IoU.pool table": 0.9294999694824219, + "IoU.pillow": 0.5620999908447266, + "IoU.screen door": 0.6930000305175781, + "IoU.stairway": 0.31540000915527344, + "IoU.river": 0.17479999542236327, + "IoU.bridge": 0.5293999862670898, + "IoU.bookcase": 0.335, + "IoU.blind": 0.40630001068115235, + "IoU.coffee table": 0.6320999908447266, + "IoU.toilet": 0.8419999694824218, + "IoU.flower": 0.38990001678466796, + "IoU.book": 0.45680000305175783, + "IoU.hill": 0.14199999809265137, + "IoU.bench": 0.4077999877929688, + "IoU.countertop": 0.5877000045776367, + "IoU.stove": 0.7373000335693359, + "IoU.palm": 0.48369998931884767, + "IoU.kitchen island": 0.4670999908447266, + "IoU.computer": 0.6980000305175781, + "IoU.swivel chair": 0.5075, + "IoU.boat": 0.7240000152587891, + "IoU.bar": 0.5650999832153321, + "IoU.arcade machine": 0.4613999938964844, + "IoU.hovel": 0.4715999984741211, + "IoU.bus": 0.9043000030517578, + "IoU.towel": 0.6379999923706055, + "IoU.light": 0.32630001068115233, + "IoU.truck": 0.18639999389648437, + "IoU.tower": 0.18469999313354493, + "IoU.chandelier": 0.6415000152587891, + "IoU.awning": 0.24159999847412109, + "IoU.streetlight": 0.15880000114440918, + "IoU.booth": 0.36020000457763673, + "IoU.television receiver": 0.7190000152587891, + "IoU.airplane": 0.575900001525879, + "IoU.dirt track": 0.09880000114440918, + "IoU.apparel": 0.35639999389648436, + "IoU.pole": 0.14399999618530274, + "IoU.land": 0.059600000381469724, + "IoU.bannister": 0.08180000305175782, + "IoU.escalator": 0.6027000045776367, + "IoU.ottoman": 0.47310001373291016, + "IoU.bottle": 0.3009000015258789, + "IoU.buffet": 0.34419998168945315, + "IoU.poster": 0.2894000053405762, + "IoU.stage": 0.149399995803833, + "IoU.van": 0.40029998779296877, + "IoU.ship": 0.12630000114440917, + "IoU.fountain": 0.16850000381469726, + "IoU.conveyer belt": 0.7337999725341797, + "IoU.canopy": 0.1775, + "IoU.washer": 0.7287000274658203, + "IoU.plaything": 0.29, + "IoU.swimming pool": 0.7470999908447266, + "IoU.stool": 0.4072999954223633, + "IoU.barrel": 0.5841999816894531, + "IoU.basket": 0.33630001068115234, + "IoU.waterfall": 0.4484000015258789, + "IoU.tent": 0.9544000244140625, + "IoU.bag": 0.16440000534057617, + "IoU.minibike": 0.7044999694824219, + "IoU.cradle": 0.8187000274658203, + "IoU.oven": 0.20780000686645508, + "IoU.ball": 0.5747999954223633, + "IoU.food": 0.5152000045776367, + "IoU.step": 0.08739999771118163, + "IoU.tank": 0.5738000106811524, + "IoU.trade name": 0.2577000045776367, + "IoU.microwave": 0.4997999954223633, + "IoU.pot": 0.48270000457763673, + "IoU.animal": 0.5841999816894531, + "IoU.bicycle": 0.5690999984741211, + "IoU.lake": 0.43259998321533205, + "IoU.dishwasher": 0.6448999786376953, + "IoU.screen": 0.5888000106811524, + "IoU.blanket": 0.14989999771118165, + "IoU.sculpture": 0.6741999816894532, + "IoU.hood": 0.5058000183105469, + "IoU.sconce": 0.2851000022888184, + "IoU.vase": 0.3325, + "IoU.traffic light": 0.2427000045776367, + "IoU.tray": 0.033399999141693115, + "IoU.ashcan": 0.3547999954223633, + "IoU.fan": 0.4356999969482422, + "IoU.pier": 0.2940999984741211, + "IoU.crt screen": 0.06460000038146972, + "IoU.plate": 0.5393000030517578, + "IoU.monitor": 0.5213000106811524, + "IoU.bulletin board": 0.43349998474121093, + "IoU.shower": 0.019500000476837157, + "IoU.radiator": 0.5511999893188476, + "IoU.glass": 0.05489999771118164, + "IoU.clock": 0.26549999237060545, + "IoU.flag": 0.367400016784668, + "Acc.wall": 0.8993000030517578, + "Acc.building": 0.9312999725341797, + "Acc.sky": 0.9777999877929687, + "Acc.floor": 0.9143000030517578, + "Acc.tree": 0.8687000274658203, + "Acc.ceiling": 0.9309999847412109, + "Acc.road": 0.9056999969482422, + "Acc.bed ": 0.9566000366210937, + "Acc.windowpane": 0.7737000274658203, + "Acc.grass": 0.7856999969482422, + "Acc.cabinet": 0.7379000091552734, + "Acc.sidewalk": 0.7870999908447266, + "Acc.person": 0.9138999938964844, + "Acc.earth": 0.5540999984741211, + "Acc.door": 0.6729000091552735, + "Acc.table": 0.763499984741211, + "Acc.mountain": 0.7241999816894531, + "Acc.plant": 0.6568000030517578, + "Acc.curtain": 0.8522000122070312, + "Acc.chair": 0.6702999877929687, + "Acc.car": 0.9241000366210937, + "Acc.water": 0.7606999969482422, + "Acc.painting": 0.8469000244140625, + "Acc.sofa": 0.8806999969482422, + "Acc.shelf": 0.6325999832153321, + "Acc.house": 0.6279000091552734, + "Acc.sea": 0.8631999969482422, + "Acc.mirror": 0.7731999969482422, + "Acc.rug": 0.7258999633789063, + "Acc.field": 0.6118999862670899, + "Acc.armchair": 0.5879999923706055, + "Acc.seat": 0.8344999694824219, + "Acc.fence": 0.5663999938964843, + "Acc.desk": 0.6941000366210938, + "Acc.rock": 0.6681999969482422, + "Acc.wardrobe": 0.6537000274658203, + "Acc.lamp": 0.6598000335693359, + "Acc.bathtub": 0.8855999755859375, + "Acc.railing": 0.5291999816894531, + "Acc.cushion": 0.7295999908447266, + "Acc.base": 0.4988999938964844, + "Acc.box": 0.30920000076293946, + "Acc.column": 0.5934000015258789, + "Acc.signboard": 0.46180000305175783, + "Acc.chest of drawers": 0.542599983215332, + "Acc.counter": 0.5093999862670898, + "Acc.sand": 0.66, + "Acc.sink": 0.7701000213623047, + "Acc.skyscraper": 0.6070999908447265, + "Acc.fireplace": 0.8655999755859375, + "Acc.refrigerator": 0.8273000335693359, + "Acc.grandstand": 0.7638999938964843, + "Acc.path": 0.291200008392334, + "Acc.stairs": 0.29399999618530276, + "Acc.runway": 0.8052999877929687, + "Acc.case": 0.7533999633789062, + "Acc.pool table": 0.9644999694824219, + "Acc.pillow": 0.6354999923706055, + "Acc.screen door": 0.768499984741211, + "Acc.stairway": 0.4565999984741211, + "Acc.river": 0.3118000030517578, + "Acc.bridge": 0.5997999954223633, + "Acc.bookcase": 0.5609000015258789, + "Acc.blind": 0.4384000015258789, + "Acc.coffee table": 0.7791999816894531, + "Acc.toilet": 0.8880999755859375, + "Acc.flower": 0.5313999938964844, + "Acc.book": 0.6018000030517578, + "Acc.hill": 0.22620000839233398, + "Acc.bench": 0.4740999984741211, + "Acc.countertop": 0.7208000183105469, + "Acc.stove": 0.8180999755859375, + "Acc.palm": 0.645199966430664, + "Acc.kitchen island": 0.6256999969482422, + "Acc.computer": 0.7916999816894531, + "Acc.swivel chair": 0.6341999816894531, + "Acc.boat": 0.8162000274658203, + "Acc.bar": 0.6840000152587891, + "Acc.arcade machine": 0.4825, + "Acc.hovel": 0.5079999923706054, + "Acc.bus": 0.9319999694824219, + "Acc.towel": 0.7405000305175782, + "Acc.light": 0.35009998321533203, + "Acc.truck": 0.24459999084472656, + "Acc.tower": 0.30709999084472656, + "Acc.chandelier": 0.7891000366210937, + "Acc.awning": 0.2680999946594238, + "Acc.streetlight": 0.19280000686645507, + "Acc.booth": 0.3768000030517578, + "Acc.television receiver": 0.7927999877929688, + "Acc.airplane": 0.6256000137329102, + "Acc.dirt track": 0.4193000030517578, + "Acc.apparel": 0.48650001525878905, + "Acc.pole": 0.17290000915527343, + "Acc.land": 0.07369999885559082, + "Acc.bannister": 0.09810000419616699, + "Acc.escalator": 0.7709999847412109, + "Acc.ottoman": 0.6106000137329102, + "Acc.bottle": 0.41009998321533203, + "Acc.buffet": 0.40799999237060547, + "Acc.poster": 0.5238000106811523, + "Acc.stage": 0.20459999084472658, + "Acc.van": 0.48319999694824217, + "Acc.ship": 0.13600000381469726, + "Acc.fountain": 0.17049999237060548, + "Acc.conveyer belt": 0.9223999786376953, + "Acc.canopy": 0.19020000457763672, + "Acc.washer": 0.7494000244140625, + "Acc.plaything": 0.36790000915527343, + "Acc.swimming pool": 0.8962999725341797, + "Acc.stool": 0.4884999847412109, + "Acc.barrel": 0.64, + "Acc.basket": 0.415, + "Acc.waterfall": 0.555, + "Acc.tent": 0.9741999816894531, + "Acc.bag": 0.19180000305175782, + "Acc.minibike": 0.7833000183105469, + "Acc.cradle": 0.9575, + "Acc.oven": 0.485, + "Acc.ball": 0.650199966430664, + "Acc.food": 0.5802999877929688, + "Acc.step": 0.11340000152587891, + "Acc.tank": 0.6416999816894531, + "Acc.trade name": 0.281299991607666, + "Acc.microwave": 0.5497999954223632, + "Acc.pot": 0.5475, + "Acc.animal": 0.6068999862670899, + "Acc.bicycle": 0.6815000152587891, + "Acc.lake": 0.5386000061035157, + "Acc.dishwasher": 0.6873999786376953, + "Acc.screen": 0.7093000030517578, + "Acc.blanket": 0.16780000686645508, + "Acc.sculpture": 0.7625, + "Acc.hood": 0.6379999923706055, + "Acc.sconce": 0.34819999694824216, + "Acc.vase": 0.40450000762939453, + "Acc.traffic light": 0.3290000152587891, + "Acc.tray": 0.03859999895095825, + "Acc.ashcan": 0.49720001220703125, + "Acc.fan": 0.49290000915527343, + "Acc.pier": 0.412400016784668, + "Acc.crt screen": 0.09390000343322753, + "Acc.plate": 0.6805999755859375, + "Acc.monitor": 0.7112999725341796, + "Acc.bulletin board": 0.5136999893188476, + "Acc.shower": 0.04320000171661377, + "Acc.radiator": 0.5966999816894532, + "Acc.glass": 0.05590000152587891, + "Acc.clock": 0.27719999313354493, + "Acc.flag": 0.40939998626708984 + } + }, + "98": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8319, + "mIoU": 0.4792, + "mAcc": 0.5697, + "IoU.wall": 0.7719000244140625, + "IoU.building": 0.8256999969482421, + "IoU.sky": 0.9287000274658204, + "IoU.floor": 0.8080999755859375, + "IoU.tree": 0.7256999969482422, + "IoU.ceiling": 0.8334999847412109, + "IoU.road": 0.8266999816894531, + "IoU.bed ": 0.8922000122070313, + "IoU.windowpane": 0.617599983215332, + "IoU.grass": 0.6662000274658203, + "IoU.cabinet": 0.6404000091552734, + "IoU.sidewalk": 0.6512000274658203, + "IoU.person": 0.8076000213623047, + "IoU.earth": 0.3831999969482422, + "IoU.door": 0.507400016784668, + "IoU.table": 0.6106999969482422, + "IoU.mountain": 0.5854999923706055, + "IoU.plant": 0.5245000076293945, + "IoU.curtain": 0.742699966430664, + "IoU.chair": 0.555099983215332, + "IoU.car": 0.8369999694824218, + "IoU.water": 0.5879999923706055, + "IoU.painting": 0.7138999938964844, + "IoU.sofa": 0.6687000274658204, + "IoU.shelf": 0.43959999084472656, + "IoU.house": 0.4659000015258789, + "IoU.sea": 0.6776000213623047, + "IoU.mirror": 0.6762999725341797, + "IoU.rug": 0.6206000137329102, + "IoU.field": 0.37209999084472656, + "IoU.armchair": 0.3820000076293945, + "IoU.seat": 0.6802999877929687, + "IoU.fence": 0.42619998931884767, + "IoU.desk": 0.4836000061035156, + "IoU.rock": 0.5004000091552734, + "IoU.wardrobe": 0.5679000091552734, + "IoU.lamp": 0.5413999938964844, + "IoU.bathtub": 0.8311000061035156, + "IoU.railing": 0.39279998779296876, + "IoU.cushion": 0.590999984741211, + "IoU.base": 0.29739999771118164, + "IoU.box": 0.24440000534057618, + "IoU.column": 0.46130001068115234, + "IoU.signboard": 0.34400001525878904, + "IoU.chest of drawers": 0.36200000762939455, + "IoU.counter": 0.37849998474121094, + "IoU.sand": 0.4772999954223633, + "IoU.sink": 0.7001000213623046, + "IoU.skyscraper": 0.4988000106811523, + "IoU.fireplace": 0.7305000305175782, + "IoU.refrigerator": 0.7558999633789063, + "IoU.grandstand": 0.5145000076293945, + "IoU.path": 0.2259000015258789, + "IoU.stairs": 0.16350000381469726, + "IoU.runway": 0.6297000122070312, + "IoU.case": 0.5322999954223633, + "IoU.pool table": 0.9262999725341797, + "IoU.pillow": 0.5075999832153321, + "IoU.screen door": 0.7033000183105469, + "IoU.stairway": 0.29809999465942383, + "IoU.river": 0.18049999237060546, + "IoU.bridge": 0.5995000076293945, + "IoU.bookcase": 0.3103000068664551, + "IoU.blind": 0.3915999984741211, + "IoU.coffee table": 0.6284999847412109, + "IoU.toilet": 0.8313999938964843, + "IoU.flower": 0.3934000015258789, + "IoU.book": 0.4333000183105469, + "IoU.hill": 0.13289999961853027, + "IoU.bench": 0.4702999877929688, + "IoU.countertop": 0.5645999908447266, + "IoU.stove": 0.7468000030517579, + "IoU.palm": 0.4356999969482422, + "IoU.kitchen island": 0.45619998931884764, + "IoU.computer": 0.754000015258789, + "IoU.swivel chair": 0.5008000183105469, + "IoU.boat": 0.6122000122070312, + "IoU.bar": 0.5758000183105468, + "IoU.arcade machine": 0.37139999389648437, + "IoU.hovel": 0.291200008392334, + "IoU.bus": 0.9075, + "IoU.towel": 0.6491000366210937, + "IoU.light": 0.32930000305175783, + "IoU.truck": 0.17889999389648437, + "IoU.tower": 0.16459999084472657, + "IoU.chandelier": 0.6081999969482422, + "IoU.awning": 0.21760000228881837, + "IoU.streetlight": 0.16510000228881835, + "IoU.booth": 0.3491999816894531, + "IoU.television receiver": 0.7031999969482422, + "IoU.airplane": 0.5745999908447266, + "IoU.dirt track": 0.10369999885559082, + "IoU.apparel": 0.3334000015258789, + "IoU.pole": 0.09829999923706055, + "IoU.land": 0.06260000228881836, + "IoU.bannister": 0.10329999923706054, + "IoU.escalator": 0.5472999954223633, + "IoU.ottoman": 0.4468999862670898, + "IoU.bottle": 0.1972999954223633, + "IoU.buffet": 0.33560001373291015, + "IoU.poster": 0.18520000457763672, + "IoU.stage": 0.13680000305175782, + "IoU.van": 0.34029998779296877, + "IoU.ship": 0.07960000038146972, + "IoU.fountain": 0.22829999923706054, + "IoU.conveyer belt": 0.7398999786376953, + "IoU.canopy": 0.14199999809265137, + "IoU.washer": 0.7227999877929687, + "IoU.plaything": 0.3429000091552734, + "IoU.swimming pool": 0.7633000183105468, + "IoU.stool": 0.39849998474121096, + "IoU.barrel": 0.5893000030517578, + "IoU.basket": 0.35830001831054686, + "IoU.waterfall": 0.4438000106811523, + "IoU.tent": 0.9504000091552735, + "IoU.bag": 0.14760000228881837, + "IoU.minibike": 0.6455000305175781, + "IoU.cradle": 0.8066000366210937, + "IoU.oven": 0.26, + "IoU.ball": 0.5477999877929688, + "IoU.food": 0.44470001220703126, + "IoU.step": 0.08529999732971191, + "IoU.tank": 0.5427999877929688, + "IoU.trade name": 0.18270000457763672, + "IoU.microwave": 0.7194000244140625, + "IoU.pot": 0.47119998931884766, + "IoU.animal": 0.5890999984741211, + "IoU.bicycle": 0.5197999954223633, + "IoU.lake": 0.4286999893188477, + "IoU.dishwasher": 0.5879000091552734, + "IoU.screen": 0.5761999893188476, + "IoU.blanket": 0.12800000190734864, + "IoU.sculpture": 0.6994000244140625, + "IoU.hood": 0.5086000061035156, + "IoU.sconce": 0.25739999771118166, + "IoU.vase": 0.32279998779296876, + "IoU.traffic light": 0.236200008392334, + "IoU.tray": 0.05190000057220459, + "IoU.ashcan": 0.33360000610351564, + "IoU.fan": 0.469900016784668, + "IoU.pier": 0.28889999389648435, + "IoU.crt screen": 0.013899999856948852, + "IoU.plate": 0.5165000152587891, + "IoU.monitor": 0.5302000045776367, + "IoU.bulletin board": 0.2968000030517578, + "IoU.shower": 0.018700000047683716, + "IoU.radiator": 0.600999984741211, + "IoU.glass": 0.10149999618530274, + "IoU.clock": 0.27649999618530274, + "IoU.flag": 0.38810001373291014, + "Acc.wall": 0.8980999755859375, + "Acc.building": 0.9455999755859374, + "Acc.sky": 0.9843000030517578, + "Acc.floor": 0.9245999908447265, + "Acc.tree": 0.8208999633789062, + "Acc.ceiling": 0.9354000091552734, + "Acc.road": 0.9112999725341797, + "Acc.bed ": 0.9598999786376953, + "Acc.windowpane": 0.7998000335693359, + "Acc.grass": 0.8029000091552735, + "Acc.cabinet": 0.7630000305175781, + "Acc.sidewalk": 0.7908999633789062, + "Acc.person": 0.8866000366210938, + "Acc.earth": 0.5518999862670898, + "Acc.door": 0.6286999893188476, + "Acc.table": 0.7879000091552735, + "Acc.mountain": 0.7201999664306641, + "Acc.plant": 0.6191999816894531, + "Acc.curtain": 0.8501000213623047, + "Acc.chair": 0.6494000244140625, + "Acc.car": 0.9020999908447266, + "Acc.water": 0.7843000030517578, + "Acc.painting": 0.8252999877929688, + "Acc.sofa": 0.8826000213623046, + "Acc.shelf": 0.6313999938964844, + "Acc.house": 0.5950999832153321, + "Acc.sea": 0.8751999664306641, + "Acc.mirror": 0.7425, + "Acc.rug": 0.7073000335693359, + "Acc.field": 0.6261000061035156, + "Acc.armchair": 0.4727999877929687, + "Acc.seat": 0.847699966430664, + "Acc.fence": 0.5629999923706055, + "Acc.desk": 0.6116999816894532, + "Acc.rock": 0.6815000152587891, + "Acc.wardrobe": 0.6836000061035157, + "Acc.lamp": 0.6136999893188476, + "Acc.bathtub": 0.861500015258789, + "Acc.railing": 0.5365999984741211, + "Acc.cushion": 0.6973999786376953, + "Acc.base": 0.39939998626708983, + "Acc.box": 0.3, + "Acc.column": 0.534000015258789, + "Acc.signboard": 0.4490000152587891, + "Acc.chest of drawers": 0.5308000183105469, + "Acc.counter": 0.5015000152587891, + "Acc.sand": 0.6097000122070313, + "Acc.sink": 0.7558999633789063, + "Acc.skyscraper": 0.5595000076293946, + "Acc.fireplace": 0.8280999755859375, + "Acc.refrigerator": 0.7870999908447266, + "Acc.grandstand": 0.7437999725341797, + "Acc.path": 0.2989999961853027, + "Acc.stairs": 0.20860000610351562, + "Acc.runway": 0.8327999877929687, + "Acc.case": 0.7366000366210937, + "Acc.pool table": 0.9616000366210937, + "Acc.pillow": 0.5733000183105469, + "Acc.screen door": 0.7620999908447266, + "Acc.stairway": 0.4872999954223633, + "Acc.river": 0.2772999954223633, + "Acc.bridge": 0.6922000122070312, + "Acc.bookcase": 0.5004999923706055, + "Acc.blind": 0.41369998931884766, + "Acc.coffee table": 0.7638999938964843, + "Acc.toilet": 0.8708000183105469, + "Acc.flower": 0.5577999877929688, + "Acc.book": 0.605999984741211, + "Acc.hill": 0.19420000076293945, + "Acc.bench": 0.5177000045776368, + "Acc.countertop": 0.7186000061035156, + "Acc.stove": 0.8104000091552734, + "Acc.palm": 0.545099983215332, + "Acc.kitchen island": 0.6788999938964844, + "Acc.computer": 0.8505000305175782, + "Acc.swivel chair": 0.6013000106811524, + "Acc.boat": 0.6608999633789062, + "Acc.bar": 0.6737999725341797, + "Acc.arcade machine": 0.38599998474121094, + "Acc.hovel": 0.3096999931335449, + "Acc.bus": 0.9358999633789062, + "Acc.towel": 0.7741000366210937, + "Acc.light": 0.36939998626708986, + "Acc.truck": 0.21760000228881837, + "Acc.tower": 0.21450000762939453, + "Acc.chandelier": 0.7677999877929688, + "Acc.awning": 0.23450000762939452, + "Acc.streetlight": 0.208700008392334, + "Acc.booth": 0.37139999389648437, + "Acc.television receiver": 0.7497000122070312, + "Acc.airplane": 0.6154999923706055, + "Acc.dirt track": 0.26629999160766604, + "Acc.apparel": 0.42650001525878906, + "Acc.pole": 0.11470000267028808, + "Acc.land": 0.0884000015258789, + "Acc.bannister": 0.13020000457763672, + "Acc.escalator": 0.6848000335693359, + "Acc.ottoman": 0.5840000152587891, + "Acc.bottle": 0.23850000381469727, + "Acc.buffet": 0.3977000045776367, + "Acc.poster": 0.2559000015258789, + "Acc.stage": 0.18549999237060547, + "Acc.van": 0.39779998779296877, + "Acc.ship": 0.09279999732971192, + "Acc.fountain": 0.23139999389648438, + "Acc.conveyer belt": 0.9220999908447266, + "Acc.canopy": 0.14670000076293946, + "Acc.washer": 0.7481999969482422, + "Acc.plaything": 0.46110000610351565, + "Acc.swimming pool": 0.877300033569336, + "Acc.stool": 0.47139999389648435, + "Acc.barrel": 0.6386000061035156, + "Acc.basket": 0.46419998168945314, + "Acc.waterfall": 0.5743999862670899, + "Acc.tent": 0.9594000244140625, + "Acc.bag": 0.17510000228881836, + "Acc.minibike": 0.6783999633789063, + "Acc.cradle": 0.9495999908447266, + "Acc.oven": 0.3683000183105469, + "Acc.ball": 0.6455000305175781, + "Acc.food": 0.5011999893188477, + "Acc.step": 0.10829999923706055, + "Acc.tank": 0.6056999969482422, + "Acc.trade name": 0.19180000305175782, + "Acc.microwave": 0.8005999755859375, + "Acc.pot": 0.5197000122070312, + "Acc.animal": 0.6047000122070313, + "Acc.bicycle": 0.6122000122070312, + "Acc.lake": 0.4791999816894531, + "Acc.dishwasher": 0.6655999755859375, + "Acc.screen": 0.7658999633789062, + "Acc.blanket": 0.15579999923706056, + "Acc.sculpture": 0.7294999694824219, + "Acc.hood": 0.6063000106811524, + "Acc.sconce": 0.29809999465942383, + "Acc.vase": 0.39299999237060546, + "Acc.traffic light": 0.3011000061035156, + "Acc.tray": 0.06360000133514404, + "Acc.ashcan": 0.4672000122070312, + "Acc.fan": 0.525099983215332, + "Acc.pier": 0.3863999938964844, + "Acc.crt screen": 0.02109999895095825, + "Acc.plate": 0.6327999877929688, + "Acc.monitor": 0.6206999969482422, + "Acc.bulletin board": 0.38229999542236326, + "Acc.shower": 0.044800000190734865, + "Acc.radiator": 0.664000015258789, + "Acc.glass": 0.10680000305175781, + "Acc.clock": 0.29319999694824217, + "Acc.flag": 0.41880001068115236 + } + }, + "99": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8181, + "mIoU": 0.45539999999999997, + "mAcc": 0.5718, + "IoU.wall": 0.7541000366210937, + "IoU.building": 0.8194000244140625, + "IoU.sky": 0.9379000091552734, + "IoU.floor": 0.7980000305175782, + "IoU.tree": 0.7408999633789063, + "IoU.ceiling": 0.8226000213623047, + "IoU.road": 0.8187999725341797, + "IoU.bed ": 0.8591999816894531, + "IoU.windowpane": 0.5940000152587891, + "IoU.grass": 0.6604000091552734, + "IoU.cabinet": 0.5872000122070312, + "IoU.sidewalk": 0.6333000183105468, + "IoU.person": 0.7644999694824218, + "IoU.earth": 0.3516999816894531, + "IoU.door": 0.4493000030517578, + "IoU.table": 0.5452999877929687, + "IoU.mountain": 0.5779999923706055, + "IoU.plant": 0.5022999954223633, + "IoU.curtain": 0.7263999938964844, + "IoU.chair": 0.5161000061035156, + "IoU.car": 0.8123999786376953, + "IoU.water": 0.6177000045776367, + "IoU.painting": 0.670199966430664, + "IoU.sofa": 0.6166999816894532, + "IoU.shelf": 0.40919998168945315, + "IoU.house": 0.48520000457763673, + "IoU.sea": 0.6655000305175781, + "IoU.mirror": 0.6179999923706054, + "IoU.rug": 0.6476999664306641, + "IoU.field": 0.31010000228881834, + "IoU.armchair": 0.3820000076293945, + "IoU.seat": 0.5843999862670899, + "IoU.fence": 0.4013999938964844, + "IoU.desk": 0.43200000762939456, + "IoU.rock": 0.39680000305175783, + "IoU.wardrobe": 0.5434000015258789, + "IoU.lamp": 0.527400016784668, + "IoU.bathtub": 0.7330999755859375, + "IoU.railing": 0.3125, + "IoU.cushion": 0.5247000122070312, + "IoU.base": 0.2656999969482422, + "IoU.box": 0.23180000305175782, + "IoU.column": 0.44779998779296876, + "IoU.signboard": 0.318700008392334, + "IoU.chest of drawers": 0.3215999984741211, + "IoU.counter": 0.27040000915527346, + "IoU.sand": 0.40630001068115235, + "IoU.sink": 0.6523999786376953, + "IoU.skyscraper": 0.5738000106811524, + "IoU.fireplace": 0.6791999816894532, + "IoU.refrigerator": 0.7343000030517578, + "IoU.grandstand": 0.40619998931884765, + "IoU.path": 0.23850000381469727, + "IoU.stairs": 0.2745999908447266, + "IoU.runway": 0.6302000045776367, + "IoU.case": 0.5145000076293945, + "IoU.pool table": 0.9044000244140625, + "IoU.pillow": 0.5458000183105469, + "IoU.screen door": 0.5416999816894531, + "IoU.stairway": 0.31540000915527344, + "IoU.river": 0.2153000068664551, + "IoU.bridge": 0.6698000335693359, + "IoU.bookcase": 0.36009998321533204, + "IoU.blind": 0.4259000015258789, + "IoU.coffee table": 0.552400016784668, + "IoU.toilet": 0.7930999755859375, + "IoU.flower": 0.3390999984741211, + "IoU.book": 0.42459999084472655, + "IoU.hill": 0.10649999618530273, + "IoU.bench": 0.39669998168945314, + "IoU.countertop": 0.5068999862670899, + "IoU.stove": 0.7054000091552735, + "IoU.palm": 0.46130001068115234, + "IoU.kitchen island": 0.33439998626708983, + "IoU.computer": 0.6020999908447265, + "IoU.swivel chair": 0.528400001525879, + "IoU.boat": 0.662300033569336, + "IoU.bar": 0.5613000106811523, + "IoU.arcade machine": 0.5650999832153321, + "IoU.hovel": 0.45810001373291015, + "IoU.bus": 0.8047000122070312, + "IoU.towel": 0.529000015258789, + "IoU.light": 0.2929999923706055, + "IoU.truck": 0.20329999923706055, + "IoU.tower": 0.2645000076293945, + "IoU.chandelier": 0.5995999908447266, + "IoU.awning": 0.39180000305175783, + "IoU.streetlight": 0.13420000076293945, + "IoU.booth": 0.3171999931335449, + "IoU.television receiver": 0.577599983215332, + "IoU.airplane": 0.5727000045776367, + "IoU.dirt track": 0.08829999923706054, + "IoU.apparel": 0.3127000045776367, + "IoU.pole": 0.15130000114440917, + "IoU.land": 0.01190000057220459, + "IoU.bannister": 0.11590000152587891, + "IoU.escalator": 0.30700000762939456, + "IoU.ottoman": 0.45779998779296877, + "IoU.bottle": 0.2876000022888184, + "IoU.buffet": 0.34830001831054686, + "IoU.poster": 0.23559999465942383, + "IoU.stage": 0.14619999885559082, + "IoU.van": 0.35380001068115235, + "IoU.ship": 0.5720000076293945, + "IoU.fountain": 0.2059000015258789, + "IoU.conveyer belt": 0.68, + "IoU.canopy": 0.21719999313354493, + "IoU.washer": 0.6862000274658203, + "IoU.plaything": 0.22459999084472657, + "IoU.swimming pool": 0.6068000030517579, + "IoU.stool": 0.2543000030517578, + "IoU.barrel": 0.48770000457763674, + "IoU.basket": 0.22799999237060548, + "IoU.waterfall": 0.6166999816894532, + "IoU.tent": 0.9147000122070312, + "IoU.bag": 0.09920000076293946, + "IoU.minibike": 0.6366999816894531, + "IoU.cradle": 0.777300033569336, + "IoU.oven": 0.20440000534057617, + "IoU.ball": 0.32229999542236326, + "IoU.food": 0.5468999862670898, + "IoU.step": 0.10329999923706054, + "IoU.tank": 0.49220001220703125, + "IoU.trade name": 0.22719999313354491, + "IoU.microwave": 0.3388999938964844, + "IoU.pot": 0.33419998168945314, + "IoU.animal": 0.5770000076293945, + "IoU.bicycle": 0.5059000015258789, + "IoU.lake": 0.6365000152587891, + "IoU.dishwasher": 0.6031000137329101, + "IoU.screen": 0.6031000137329101, + "IoU.blanket": 0.09039999961853028, + "IoU.sculpture": 0.44959999084472657, + "IoU.hood": 0.4793000030517578, + "IoU.sconce": 0.3231999969482422, + "IoU.vase": 0.22579999923706054, + "IoU.traffic light": 0.26290000915527345, + "IoU.tray": 0.012799999713897704, + "IoU.ashcan": 0.34380001068115235, + "IoU.fan": 0.5045000076293945, + "IoU.pier": 0.4493000030517578, + "IoU.crt screen": 0.0, + "IoU.plate": 0.4552000045776367, + "IoU.monitor": 0.023299999237060547, + "IoU.bulletin board": 0.3085000038146973, + "IoU.shower": 0.0020000000298023225, + "IoU.radiator": 0.425, + "IoU.glass": 0.050999999046325684, + "IoU.clock": 0.19959999084472657, + "IoU.flag": 0.39860000610351565, + "Acc.wall": 0.8751999664306641, + "Acc.building": 0.9188999938964844, + "Acc.sky": 0.9768000030517578, + "Acc.floor": 0.9016000366210938, + "Acc.tree": 0.8636000061035156, + "Acc.ceiling": 0.918499984741211, + "Acc.road": 0.8988999938964843, + "Acc.bed ": 0.9475, + "Acc.windowpane": 0.7547000122070312, + "Acc.grass": 0.8055999755859375, + "Acc.cabinet": 0.7055999755859375, + "Acc.sidewalk": 0.7844999694824218, + "Acc.person": 0.9105999755859375, + "Acc.earth": 0.49959999084472656, + "Acc.door": 0.5872000122070312, + "Acc.table": 0.6905999755859376, + "Acc.mountain": 0.7275, + "Acc.plant": 0.6347000122070312, + "Acc.curtain": 0.8326000213623047, + "Acc.chair": 0.6476000213623047, + "Acc.car": 0.9030999755859375, + "Acc.water": 0.7655999755859375, + "Acc.painting": 0.8455000305175782, + "Acc.sofa": 0.7866999816894531, + "Acc.shelf": 0.5943999862670899, + "Acc.house": 0.5977000045776367, + "Acc.sea": 0.8390000152587891, + "Acc.mirror": 0.6905999755859376, + "Acc.rug": 0.715999984741211, + "Acc.field": 0.538499984741211, + "Acc.armchair": 0.5881000137329102, + "Acc.seat": 0.7983999633789063, + "Acc.fence": 0.5527000045776367, + "Acc.desk": 0.635, + "Acc.rock": 0.615099983215332, + "Acc.wardrobe": 0.6613999938964844, + "Acc.lamp": 0.6519999694824219, + "Acc.bathtub": 0.8044999694824219, + "Acc.railing": 0.460099983215332, + "Acc.cushion": 0.6655000305175781, + "Acc.base": 0.4595000076293945, + "Acc.box": 0.3302000045776367, + "Acc.column": 0.5604999923706054, + "Acc.signboard": 0.4072000122070312, + "Acc.chest of drawers": 0.5468000030517578, + "Acc.counter": 0.3915999984741211, + "Acc.sand": 0.547599983215332, + "Acc.sink": 0.7362999725341797, + "Acc.skyscraper": 0.7130000305175781, + "Acc.fireplace": 0.8919999694824219, + "Acc.refrigerator": 0.855, + "Acc.grandstand": 0.6891000366210938, + "Acc.path": 0.30270000457763674, + "Acc.stairs": 0.3477999877929687, + "Acc.runway": 0.8287999725341797, + "Acc.case": 0.6938999938964844, + "Acc.pool table": 0.9580999755859375, + "Acc.pillow": 0.6541000366210937, + "Acc.screen door": 0.6823000335693359, + "Acc.stairway": 0.4166999816894531, + "Acc.river": 0.4102000045776367, + "Acc.bridge": 0.8279000091552734, + "Acc.bookcase": 0.5495000076293945, + "Acc.blind": 0.4702999877929688, + "Acc.coffee table": 0.7455999755859375, + "Acc.toilet": 0.8811000061035156, + "Acc.flower": 0.5197999954223633, + "Acc.book": 0.5868000030517578, + "Acc.hill": 0.1711000061035156, + "Acc.bench": 0.4633000183105469, + "Acc.countertop": 0.6427999877929688, + "Acc.stove": 0.7777999877929688, + "Acc.palm": 0.6531999969482422, + "Acc.kitchen island": 0.6352000045776367, + "Acc.computer": 0.732699966430664, + "Acc.swivel chair": 0.6829000091552735, + "Acc.boat": 0.8430000305175781, + "Acc.bar": 0.764000015258789, + "Acc.arcade machine": 0.6181000137329101, + "Acc.hovel": 0.5206999969482422, + "Acc.bus": 0.9219999694824219, + "Acc.towel": 0.6916999816894531, + "Acc.light": 0.31309999465942384, + "Acc.truck": 0.2645000076293945, + "Acc.tower": 0.37759998321533206, + "Acc.chandelier": 0.7562999725341797, + "Acc.awning": 0.4506999969482422, + "Acc.streetlight": 0.15600000381469725, + "Acc.booth": 0.432400016784668, + "Acc.television receiver": 0.7223000335693359, + "Acc.airplane": 0.6730999755859375, + "Acc.dirt track": 0.09189999580383301, + "Acc.apparel": 0.4493000030517578, + "Acc.pole": 0.19350000381469726, + "Acc.land": 0.01659999966621399, + "Acc.bannister": 0.1615999984741211, + "Acc.escalator": 0.37150001525878906, + "Acc.ottoman": 0.5727999877929687, + "Acc.bottle": 0.4509000015258789, + "Acc.buffet": 0.4040999984741211, + "Acc.poster": 0.30590000152587893, + "Acc.stage": 0.30670000076293946, + "Acc.van": 0.4413999938964844, + "Acc.ship": 0.7369000244140625, + "Acc.fountain": 0.21110000610351562, + "Acc.conveyer belt": 0.8597000122070313, + "Acc.canopy": 0.3231999969482422, + "Acc.washer": 0.7041000366210938, + "Acc.plaything": 0.37349998474121093, + "Acc.swimming pool": 0.7716000366210938, + "Acc.stool": 0.32779998779296876, + "Acc.barrel": 0.6159000015258789, + "Acc.basket": 0.28319999694824216, + "Acc.waterfall": 0.6983999633789062, + "Acc.tent": 0.9911000061035157, + "Acc.bag": 0.12369999885559083, + "Acc.minibike": 0.7765000152587891, + "Acc.cradle": 0.9720999908447265, + "Acc.oven": 0.5102999877929687, + "Acc.ball": 0.35330001831054686, + "Acc.food": 0.691500015258789, + "Acc.step": 0.11949999809265137, + "Acc.tank": 0.5890999984741211, + "Acc.trade name": 0.2695000076293945, + "Acc.microwave": 0.37509998321533206, + "Acc.pot": 0.39689998626708983, + "Acc.animal": 0.6165000152587891, + "Acc.bicycle": 0.6880999755859375, + "Acc.lake": 0.658499984741211, + "Acc.dishwasher": 0.6668000030517578, + "Acc.screen": 0.8997000122070312, + "Acc.blanket": 0.09880000114440918, + "Acc.sculpture": 0.6444999694824218, + "Acc.hood": 0.542599983215332, + "Acc.sconce": 0.37720001220703125, + "Acc.vase": 0.29739999771118164, + "Acc.traffic light": 0.41689998626708985, + "Acc.tray": 0.01659999966621399, + "Acc.ashcan": 0.4911999893188477, + "Acc.fan": 0.6618000030517578, + "Acc.pier": 0.7848000335693359, + "Acc.crt screen": 0.0, + "Acc.plate": 0.563499984741211, + "Acc.monitor": 0.025199999809265138, + "Acc.bulletin board": 0.4077999877929688, + "Acc.shower": 0.012300000190734864, + "Acc.radiator": 0.48509998321533204, + "Acc.glass": 0.05300000190734863, + "Acc.clock": 0.21209999084472655, + "Acc.flag": 0.45479999542236327 + } + }, + "100": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8198000000000001, + "mIoU": 0.46, + "mAcc": 0.5755, + "IoU.wall": 0.7558000183105469, + "IoU.building": 0.8204000091552734, + "IoU.sky": 0.9361000061035156, + "IoU.floor": 0.7994000244140625, + "IoU.tree": 0.7408000183105469, + "IoU.ceiling": 0.8237000274658203, + "IoU.road": 0.8209999847412109, + "IoU.bed ": 0.8645999908447266, + "IoU.windowpane": 0.6036000061035156, + "IoU.grass": 0.6537000274658203, + "IoU.cabinet": 0.5929000091552734, + "IoU.sidewalk": 0.630099983215332, + "IoU.person": 0.7798000335693359, + "IoU.earth": 0.3634999847412109, + "IoU.door": 0.45919998168945314, + "IoU.table": 0.5797999954223633, + "IoU.mountain": 0.5704999923706054, + "IoU.plant": 0.5054000091552734, + "IoU.curtain": 0.7333000183105469, + "IoU.chair": 0.5163999938964844, + "IoU.car": 0.8205999755859374, + "IoU.water": 0.5468000030517578, + "IoU.painting": 0.7008999633789063, + "IoU.sofa": 0.6336999893188476, + "IoU.shelf": 0.4204999923706055, + "IoU.house": 0.47119998931884766, + "IoU.sea": 0.6070000076293945, + "IoU.mirror": 0.6329000091552734, + "IoU.rug": 0.627400016784668, + "IoU.field": 0.2918000030517578, + "IoU.armchair": 0.39099998474121095, + "IoU.seat": 0.6281000137329101, + "IoU.fence": 0.35520000457763673, + "IoU.desk": 0.4809999847412109, + "IoU.rock": 0.44529998779296875, + "IoU.wardrobe": 0.5227000045776368, + "IoU.lamp": 0.5529999923706055, + "IoU.bathtub": 0.755, + "IoU.railing": 0.35700000762939454, + "IoU.cushion": 0.5504999923706054, + "IoU.base": 0.29770000457763673, + "IoU.box": 0.2595000076293945, + "IoU.column": 0.4727000045776367, + "IoU.signboard": 0.3306999969482422, + "IoU.chest of drawers": 0.33380001068115234, + "IoU.counter": 0.3039999961853027, + "IoU.sand": 0.4229999923706055, + "IoU.sink": 0.6512000274658203, + "IoU.skyscraper": 0.6693000030517579, + "IoU.fireplace": 0.6825, + "IoU.refrigerator": 0.7502999877929688, + "IoU.grandstand": 0.47130001068115235, + "IoU.path": 0.2297999954223633, + "IoU.stairs": 0.26549999237060545, + "IoU.runway": 0.655199966430664, + "IoU.case": 0.5602999877929687, + "IoU.pool table": 0.9070999908447266, + "IoU.pillow": 0.5427000045776367, + "IoU.screen door": 0.41619998931884766, + "IoU.stairway": 0.3561000061035156, + "IoU.river": 0.15359999656677245, + "IoU.bridge": 0.6247999954223633, + "IoU.bookcase": 0.3693000030517578, + "IoU.blind": 0.4433000183105469, + "IoU.coffee table": 0.5647999954223633, + "IoU.toilet": 0.8098999786376954, + "IoU.flower": 0.32880001068115233, + "IoU.book": 0.44540000915527345, + "IoU.hill": 0.12460000038146973, + "IoU.bench": 0.4306999969482422, + "IoU.countertop": 0.47880001068115235, + "IoU.stove": 0.6906999969482421, + "IoU.palm": 0.47889999389648436, + "IoU.kitchen island": 0.34650001525878904, + "IoU.computer": 0.7118000030517578, + "IoU.swivel chair": 0.5108000183105469, + "IoU.boat": 0.5736999893188477, + "IoU.bar": 0.5354000091552734, + "IoU.arcade machine": 0.40099998474121096, + "IoU.hovel": 0.4070999908447266, + "IoU.bus": 0.8444999694824219, + "IoU.towel": 0.558499984741211, + "IoU.light": 0.32110000610351563, + "IoU.truck": 0.27579999923706056, + "IoU.tower": 0.301299991607666, + "IoU.chandelier": 0.6166999816894532, + "IoU.awning": 0.3970999908447266, + "IoU.streetlight": 0.14659999847412108, + "IoU.booth": 0.281200008392334, + "IoU.television receiver": 0.6306999969482422, + "IoU.airplane": 0.5697999954223633, + "IoU.dirt track": 0.12300000190734864, + "IoU.apparel": 0.31479999542236325, + "IoU.pole": 0.14079999923706055, + "IoU.land": 0.031700000762939454, + "IoU.bannister": 0.10689999580383301, + "IoU.escalator": 0.2844000053405762, + "IoU.ottoman": 0.4275, + "IoU.bottle": 0.18190000534057618, + "IoU.buffet": 0.35389999389648436, + "IoU.poster": 0.2901000022888184, + "IoU.stage": 0.19540000915527345, + "IoU.van": 0.43740001678466794, + "IoU.ship": 0.2669000053405762, + "IoU.fountain": 0.1906999969482422, + "IoU.conveyer belt": 0.7213999938964843, + "IoU.canopy": 0.26299999237060545, + "IoU.washer": 0.7194999694824219, + "IoU.plaything": 0.2665999984741211, + "IoU.swimming pool": 0.607599983215332, + "IoU.stool": 0.24760000228881837, + "IoU.barrel": 0.4975, + "IoU.basket": 0.2134000015258789, + "IoU.waterfall": 0.6311000061035156, + "IoU.tent": 0.9336000061035157, + "IoU.bag": 0.10989999771118164, + "IoU.minibike": 0.6318000030517578, + "IoU.cradle": 0.7886000061035157, + "IoU.oven": 0.1584000015258789, + "IoU.ball": 0.40819999694824216, + "IoU.food": 0.4988999938964844, + "IoU.step": 0.09489999771118164, + "IoU.tank": 0.5456999969482422, + "IoU.trade name": 0.2240999984741211, + "IoU.microwave": 0.34810001373291016, + "IoU.pot": 0.3631999969482422, + "IoU.animal": 0.6018000030517578, + "IoU.bicycle": 0.5075999832153321, + "IoU.lake": 0.6505000305175781, + "IoU.dishwasher": 0.5684000015258789, + "IoU.screen": 0.6233000183105468, + "IoU.blanket": 0.08989999771118164, + "IoU.sculpture": 0.44290000915527344, + "IoU.hood": 0.5240999984741211, + "IoU.sconce": 0.33939998626708984, + "IoU.vase": 0.26209999084472657, + "IoU.traffic light": 0.24079999923706055, + "IoU.tray": 0.030399999618530273, + "IoU.ashcan": 0.38919998168945313, + "IoU.fan": 0.48830001831054687, + "IoU.pier": 0.26579999923706055, + "IoU.crt screen": 0.028499999046325684, + "IoU.plate": 0.4772999954223633, + "IoU.monitor": 0.06690000057220459, + "IoU.bulletin board": 0.402599983215332, + "IoU.shower": 9.999999776482581e-05, + "IoU.radiator": 0.49540000915527344, + "IoU.glass": 0.0665999984741211, + "IoU.clock": 0.175, + "IoU.flag": 0.34549999237060547, + "Acc.wall": 0.8751000213623047, + "Acc.building": 0.9194999694824219, + "Acc.sky": 0.9766000366210937, + "Acc.floor": 0.9045999908447265, + "Acc.tree": 0.8644000244140625, + "Acc.ceiling": 0.9165000152587891, + "Acc.road": 0.902300033569336, + "Acc.bed ": 0.9511000061035156, + "Acc.windowpane": 0.7480999755859375, + "Acc.grass": 0.797300033569336, + "Acc.cabinet": 0.7093000030517578, + "Acc.sidewalk": 0.7795999908447265, + "Acc.person": 0.9125, + "Acc.earth": 0.5088000106811523, + "Acc.door": 0.6102000045776367, + "Acc.table": 0.7283999633789062, + "Acc.mountain": 0.7144999694824219, + "Acc.plant": 0.6234999847412109, + "Acc.curtain": 0.8522000122070312, + "Acc.chair": 0.6479000091552735, + "Acc.car": 0.9031999969482422, + "Acc.water": 0.6956999969482421, + "Acc.painting": 0.8441000366210938, + "Acc.sofa": 0.7986000061035157, + "Acc.shelf": 0.6265000152587891, + "Acc.house": 0.6320000076293946, + "Acc.sea": 0.8211000061035156, + "Acc.mirror": 0.7094000244140625, + "Acc.rug": 0.6876000213623047, + "Acc.field": 0.5349000167846679, + "Acc.armchair": 0.5906999969482422, + "Acc.seat": 0.8018000030517578, + "Acc.fence": 0.4793000030517578, + "Acc.desk": 0.6709999847412109, + "Acc.rock": 0.6676000213623047, + "Acc.wardrobe": 0.6506999969482422, + "Acc.lamp": 0.6876999664306641, + "Acc.bathtub": 0.8281999969482422, + "Acc.railing": 0.49939998626708987, + "Acc.cushion": 0.6752999877929687, + "Acc.base": 0.46689998626708984, + "Acc.box": 0.36259998321533204, + "Acc.column": 0.6013000106811524, + "Acc.signboard": 0.42459999084472655, + "Acc.chest of drawers": 0.5879000091552734, + "Acc.counter": 0.4452000045776367, + "Acc.sand": 0.5786999893188477, + "Acc.sink": 0.7408000183105469, + "Acc.skyscraper": 0.7745999908447265, + "Acc.fireplace": 0.8962000274658203, + "Acc.refrigerator": 0.8480000305175781, + "Acc.grandstand": 0.7597000122070312, + "Acc.path": 0.30170000076293946, + "Acc.stairs": 0.3459000015258789, + "Acc.runway": 0.7912999725341797, + "Acc.case": 0.7591999816894531, + "Acc.pool table": 0.9588999938964844, + "Acc.pillow": 0.6519000244140625, + "Acc.screen door": 0.49029998779296874, + "Acc.stairway": 0.4652000045776367, + "Acc.river": 0.32360000610351564, + "Acc.bridge": 0.795199966430664, + "Acc.bookcase": 0.5816999816894531, + "Acc.blind": 0.5191999816894531, + "Acc.coffee table": 0.7733000183105468, + "Acc.toilet": 0.8848000335693359, + "Acc.flower": 0.5225999832153321, + "Acc.book": 0.6041999816894531, + "Acc.hill": 0.22680000305175782, + "Acc.bench": 0.507400016784668, + "Acc.countertop": 0.6381999969482421, + "Acc.stove": 0.778499984741211, + "Acc.palm": 0.6984999847412109, + "Acc.kitchen island": 0.6120000076293945, + "Acc.computer": 0.8441000366210938, + "Acc.swivel chair": 0.6708000183105469, + "Acc.boat": 0.7030999755859375, + "Acc.bar": 0.6955999755859374, + "Acc.arcade machine": 0.42700000762939455, + "Acc.hovel": 0.545099983215332, + "Acc.bus": 0.9341000366210938, + "Acc.towel": 0.7288999938964844, + "Acc.light": 0.3515999984741211, + "Acc.truck": 0.3586000061035156, + "Acc.tower": 0.4020999908447266, + "Acc.chandelier": 0.777300033569336, + "Acc.awning": 0.47720001220703123, + "Acc.streetlight": 0.178700008392334, + "Acc.booth": 0.4070999908447266, + "Acc.television receiver": 0.754000015258789, + "Acc.airplane": 0.6512999725341797, + "Acc.dirt track": 0.18299999237060546, + "Acc.apparel": 0.4416999816894531, + "Acc.pole": 0.17659999847412108, + "Acc.land": 0.04639999866485596, + "Acc.bannister": 0.146899995803833, + "Acc.escalator": 0.33, + "Acc.ottoman": 0.5511999893188476, + "Acc.bottle": 0.24319999694824218, + "Acc.buffet": 0.4008000183105469, + "Acc.poster": 0.352400016784668, + "Acc.stage": 0.29959999084472655, + "Acc.van": 0.5468000030517578, + "Acc.ship": 0.3718000030517578, + "Acc.fountain": 0.20889999389648437, + "Acc.conveyer belt": 0.909000015258789, + "Acc.canopy": 0.38150001525878907, + "Acc.washer": 0.7255000305175782, + "Acc.plaything": 0.4625, + "Acc.swimming pool": 0.7715000152587891, + "Acc.stool": 0.3097999954223633, + "Acc.barrel": 0.6124000167846679, + "Acc.basket": 0.2784000015258789, + "Acc.waterfall": 0.6891000366210938, + "Acc.tent": 0.99, + "Acc.bag": 0.14, + "Acc.minibike": 0.773499984741211, + "Acc.cradle": 0.9695999908447266, + "Acc.oven": 0.45220001220703127, + "Acc.ball": 0.4679000091552734, + "Acc.food": 0.6011999893188477, + "Acc.step": 0.11800000190734863, + "Acc.tank": 0.6281000137329101, + "Acc.trade name": 0.25790000915527345, + "Acc.microwave": 0.38799999237060545, + "Acc.pot": 0.43709999084472656, + "Acc.animal": 0.6676000213623047, + "Acc.bicycle": 0.6952999877929688, + "Acc.lake": 0.6837000274658203, + "Acc.dishwasher": 0.6463999938964844, + "Acc.screen": 0.913499984741211, + "Acc.blanket": 0.10210000038146973, + "Acc.sculpture": 0.639000015258789, + "Acc.hood": 0.585999984741211, + "Acc.sconce": 0.4122999954223633, + "Acc.vase": 0.359900016784668, + "Acc.traffic light": 0.40029998779296877, + "Acc.tray": 0.04010000228881836, + "Acc.ashcan": 0.5086999893188476, + "Acc.fan": 0.6561000061035156, + "Acc.pier": 0.4291999816894531, + "Acc.crt screen": 0.07070000171661377, + "Acc.plate": 0.6138999938964844, + "Acc.monitor": 0.07630000114440919, + "Acc.bulletin board": 0.5697000122070313, + "Acc.shower": 0.0012999999523162842, + "Acc.radiator": 0.5672999954223633, + "Acc.glass": 0.06929999828338623, + "Acc.clock": 0.19260000228881835, + "Acc.flag": 0.38459999084472657 + } + }, + "101": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8233, + "mIoU": 0.46880000000000005, + "mAcc": 0.5879, + "IoU.wall": 0.7583999633789062, + "IoU.building": 0.8295999908447266, + "IoU.sky": 0.9376000213623047, + "IoU.floor": 0.810199966430664, + "IoU.tree": 0.7412999725341797, + "IoU.ceiling": 0.8236000061035156, + "IoU.road": 0.8272000122070312, + "IoU.bed ": 0.8783999633789062, + "IoU.windowpane": 0.6081000137329101, + "IoU.grass": 0.6597000122070312, + "IoU.cabinet": 0.6056999969482422, + "IoU.sidewalk": 0.6416999816894531, + "IoU.person": 0.790199966430664, + "IoU.earth": 0.3920999908447266, + "IoU.door": 0.47299999237060547, + "IoU.table": 0.5695000076293946, + "IoU.mountain": 0.5558000183105469, + "IoU.plant": 0.4988999938964844, + "IoU.curtain": 0.7251000213623047, + "IoU.chair": 0.5306999969482422, + "IoU.car": 0.8305999755859375, + "IoU.water": 0.5125999832153321, + "IoU.painting": 0.6826000213623047, + "IoU.sofa": 0.6419000244140625, + "IoU.shelf": 0.43099998474121093, + "IoU.house": 0.5004999923706055, + "IoU.sea": 0.6188999938964844, + "IoU.mirror": 0.6377000045776368, + "IoU.rug": 0.6479000091552735, + "IoU.field": 0.2922999954223633, + "IoU.armchair": 0.3809000015258789, + "IoU.seat": 0.6311000061035156, + "IoU.fence": 0.3956999969482422, + "IoU.desk": 0.4981999969482422, + "IoU.rock": 0.42689998626708986, + "IoU.wardrobe": 0.5277999877929688, + "IoU.lamp": 0.5558000183105469, + "IoU.bathtub": 0.7780000305175782, + "IoU.railing": 0.3522999954223633, + "IoU.cushion": 0.5413999938964844, + "IoU.base": 0.30059999465942383, + "IoU.box": 0.263700008392334, + "IoU.column": 0.4779999923706055, + "IoU.signboard": 0.3527000045776367, + "IoU.chest of drawers": 0.35130001068115235, + "IoU.counter": 0.3397000122070313, + "IoU.sand": 0.5072999954223633, + "IoU.sink": 0.669000015258789, + "IoU.skyscraper": 0.6388999938964843, + "IoU.fireplace": 0.6911000061035156, + "IoU.refrigerator": 0.7486000061035156, + "IoU.grandstand": 0.47959999084472654, + "IoU.path": 0.2556999969482422, + "IoU.stairs": 0.26190000534057617, + "IoU.runway": 0.5916999816894531, + "IoU.case": 0.5940000152587891, + "IoU.pool table": 0.9220999908447266, + "IoU.pillow": 0.5465000152587891, + "IoU.screen door": 0.5006999969482422, + "IoU.stairway": 0.3813000106811523, + "IoU.river": 0.13930000305175783, + "IoU.bridge": 0.6923000335693359, + "IoU.bookcase": 0.34349998474121096, + "IoU.blind": 0.3683000183105469, + "IoU.coffee table": 0.5525, + "IoU.toilet": 0.7997000122070312, + "IoU.flower": 0.3263999938964844, + "IoU.book": 0.4513999938964844, + "IoU.hill": 0.12319999694824219, + "IoU.bench": 0.4456999969482422, + "IoU.countertop": 0.47939998626708985, + "IoU.stove": 0.7023999786376953, + "IoU.palm": 0.5034999847412109, + "IoU.kitchen island": 0.32849998474121095, + "IoU.computer": 0.6487999725341796, + "IoU.swivel chair": 0.5358000183105469, + "IoU.boat": 0.7148999786376953, + "IoU.bar": 0.3740999984741211, + "IoU.arcade machine": 0.5206000137329102, + "IoU.hovel": 0.5547999954223632, + "IoU.bus": 0.8837000274658203, + "IoU.towel": 0.5777999877929687, + "IoU.light": 0.30709999084472656, + "IoU.truck": 0.3075, + "IoU.tower": 0.32990001678466796, + "IoU.chandelier": 0.6163999938964844, + "IoU.awning": 0.37979999542236326, + "IoU.streetlight": 0.16030000686645507, + "IoU.booth": 0.2943000030517578, + "IoU.television receiver": 0.6333000183105468, + "IoU.airplane": 0.6195000076293945, + "IoU.dirt track": 0.12979999542236328, + "IoU.apparel": 0.33419998168945314, + "IoU.pole": 0.15869999885559083, + "IoU.land": 0.025799999237060545, + "IoU.bannister": 0.09810000419616699, + "IoU.escalator": 0.36290000915527343, + "IoU.ottoman": 0.43150001525878906, + "IoU.bottle": 0.295, + "IoU.buffet": 0.3893000030517578, + "IoU.poster": 0.175, + "IoU.stage": 0.1621999931335449, + "IoU.van": 0.41810001373291017, + "IoU.ship": 0.36540000915527343, + "IoU.fountain": 0.18709999084472656, + "IoU.conveyer belt": 0.7170999908447265, + "IoU.canopy": 0.2577000045776367, + "IoU.washer": 0.7212999725341797, + "IoU.plaything": 0.2734000015258789, + "IoU.swimming pool": 0.7666000366210938, + "IoU.stool": 0.3215000152587891, + "IoU.barrel": 0.3192000007629395, + "IoU.basket": 0.22809999465942382, + "IoU.waterfall": 0.7316000366210937, + "IoU.tent": 0.9583000183105469, + "IoU.bag": 0.1425, + "IoU.minibike": 0.5750999832153321, + "IoU.cradle": 0.7319999694824219, + "IoU.oven": 0.20850000381469727, + "IoU.ball": 0.43090000152587893, + "IoU.food": 0.46439998626708984, + "IoU.step": 0.0719000005722046, + "IoU.tank": 0.5463000106811523, + "IoU.trade name": 0.2538999938964844, + "IoU.microwave": 0.3970999908447266, + "IoU.pot": 0.3868000030517578, + "IoU.animal": 0.5834000015258789, + "IoU.bicycle": 0.48060001373291017, + "IoU.lake": 0.5761000061035156, + "IoU.dishwasher": 0.5095000076293945, + "IoU.screen": 0.585, + "IoU.blanket": 0.11739999771118165, + "IoU.sculpture": 0.4438999938964844, + "IoU.hood": 0.5261999893188477, + "IoU.sconce": 0.31549999237060544, + "IoU.vase": 0.29350000381469726, + "IoU.traffic light": 0.2409000015258789, + "IoU.tray": 0.0225, + "IoU.ashcan": 0.3915000152587891, + "IoU.fan": 0.5113999938964844, + "IoU.pier": 0.3540999984741211, + "IoU.crt screen": 0.04519999980926514, + "IoU.plate": 0.46650001525878904, + "IoU.monitor": 0.056700000762939455, + "IoU.bulletin board": 0.41330001831054686, + "IoU.shower": 0.0010000000149011613, + "IoU.radiator": 0.5459999847412109, + "IoU.glass": 0.08090000152587891, + "IoU.clock": 0.23719999313354492, + "IoU.flag": 0.34939998626708985, + "Acc.wall": 0.8766999816894532, + "Acc.building": 0.928499984741211, + "Acc.sky": 0.9763999938964844, + "Acc.floor": 0.9086000061035157, + "Acc.tree": 0.8670999908447266, + "Acc.ceiling": 0.9143000030517578, + "Acc.road": 0.8941000366210937, + "Acc.bed ": 0.9529000091552734, + "Acc.windowpane": 0.7522000122070313, + "Acc.grass": 0.7954000091552734, + "Acc.cabinet": 0.7151000213623047, + "Acc.sidewalk": 0.7866999816894531, + "Acc.person": 0.9151000213623047, + "Acc.earth": 0.5415000152587891, + "Acc.door": 0.6383000183105468, + "Acc.table": 0.7222000122070312, + "Acc.mountain": 0.6483000183105468, + "Acc.plant": 0.6179999923706054, + "Acc.curtain": 0.8445999908447266, + "Acc.chair": 0.6579000091552735, + "Acc.car": 0.9177999877929688, + "Acc.water": 0.6547000122070312, + "Acc.painting": 0.8587999725341797, + "Acc.sofa": 0.7959999847412109, + "Acc.shelf": 0.6544999694824218, + "Acc.house": 0.638499984741211, + "Acc.sea": 0.9013999938964844, + "Acc.mirror": 0.7319000244140625, + "Acc.rug": 0.710999984741211, + "Acc.field": 0.5363000106811523, + "Acc.armchair": 0.6004999923706055, + "Acc.seat": 0.8288999938964844, + "Acc.fence": 0.5263999938964844, + "Acc.desk": 0.6941000366210938, + "Acc.rock": 0.6805000305175781, + "Acc.wardrobe": 0.645199966430664, + "Acc.lamp": 0.6894999694824219, + "Acc.bathtub": 0.8373000335693359, + "Acc.railing": 0.5022000122070313, + "Acc.cushion": 0.6844999694824219, + "Acc.base": 0.4520999908447266, + "Acc.box": 0.3559999847412109, + "Acc.column": 0.599900016784668, + "Acc.signboard": 0.4633000183105469, + "Acc.chest of drawers": 0.5977000045776367, + "Acc.counter": 0.4736000061035156, + "Acc.sand": 0.6861000061035156, + "Acc.sink": 0.7576999664306641, + "Acc.skyscraper": 0.762300033569336, + "Acc.fireplace": 0.9175, + "Acc.refrigerator": 0.8494999694824219, + "Acc.grandstand": 0.7087999725341797, + "Acc.path": 0.33119998931884764, + "Acc.stairs": 0.34970001220703123, + "Acc.runway": 0.7636000061035156, + "Acc.case": 0.7573999786376953, + "Acc.pool table": 0.9666000366210937, + "Acc.pillow": 0.6648999786376953, + "Acc.screen door": 0.5741999816894531, + "Acc.stairway": 0.542400016784668, + "Acc.river": 0.25559999465942385, + "Acc.bridge": 0.8287999725341797, + "Acc.bookcase": 0.5727999877929687, + "Acc.blind": 0.4386999893188477, + "Acc.coffee table": 0.7837000274658203, + "Acc.toilet": 0.8886000061035156, + "Acc.flower": 0.5525, + "Acc.book": 0.6279000091552734, + "Acc.hill": 0.2338999938964844, + "Acc.bench": 0.5193999862670898, + "Acc.countertop": 0.638400001525879, + "Acc.stove": 0.7865000152587891, + "Acc.palm": 0.704800033569336, + "Acc.kitchen island": 0.5711000061035156, + "Acc.computer": 0.7662000274658203, + "Acc.swivel chair": 0.6959999847412109, + "Acc.boat": 0.856500015258789, + "Acc.bar": 0.482400016784668, + "Acc.arcade machine": 0.5584000015258789, + "Acc.hovel": 0.619900016784668, + "Acc.bus": 0.9363999938964844, + "Acc.towel": 0.7443000030517578, + "Acc.light": 0.3359000015258789, + "Acc.truck": 0.4209999847412109, + "Acc.tower": 0.40560001373291016, + "Acc.chandelier": 0.8052999877929687, + "Acc.awning": 0.5090999984741211, + "Acc.streetlight": 0.19139999389648438, + "Acc.booth": 0.4109000015258789, + "Acc.television receiver": 0.7716000366210938, + "Acc.airplane": 0.7023999786376953, + "Acc.dirt track": 0.18360000610351562, + "Acc.apparel": 0.48889999389648436, + "Acc.pole": 0.20700000762939452, + "Acc.land": 0.037100000381469725, + "Acc.bannister": 0.14199999809265137, + "Acc.escalator": 0.4134999847412109, + "Acc.ottoman": 0.5579000091552735, + "Acc.bottle": 0.42220001220703124, + "Acc.buffet": 0.48069999694824217, + "Acc.poster": 0.20670000076293946, + "Acc.stage": 0.2490999984741211, + "Acc.van": 0.5268000030517578, + "Acc.ship": 0.4122999954223633, + "Acc.fountain": 0.21420000076293946, + "Acc.conveyer belt": 0.8916000366210938, + "Acc.canopy": 0.4027000045776367, + "Acc.washer": 0.7268000030517578, + "Acc.plaything": 0.5202999877929687, + "Acc.swimming pool": 0.8513999938964844, + "Acc.stool": 0.4241999816894531, + "Acc.barrel": 0.6202999877929688, + "Acc.basket": 0.30389999389648437, + "Acc.waterfall": 0.8008000183105469, + "Acc.tent": 0.9831999969482422, + "Acc.bag": 0.1675, + "Acc.minibike": 0.6898999786376954, + "Acc.cradle": 0.9751999664306641, + "Acc.oven": 0.561500015258789, + "Acc.ball": 0.46900001525878904, + "Acc.food": 0.5588000106811524, + "Acc.step": 0.08949999809265137, + "Acc.tank": 0.6304000091552734, + "Acc.trade name": 0.3004999923706055, + "Acc.microwave": 0.45220001220703127, + "Acc.pot": 0.4384000015258789, + "Acc.animal": 0.6286000061035156, + "Acc.bicycle": 0.7069999694824218, + "Acc.lake": 0.629000015258789, + "Acc.dishwasher": 0.6243999862670898, + "Acc.screen": 0.9319999694824219, + "Acc.blanket": 0.1325, + "Acc.sculpture": 0.667300033569336, + "Acc.hood": 0.5640000152587891, + "Acc.sconce": 0.4136000061035156, + "Acc.vase": 0.3990000152587891, + "Acc.traffic light": 0.3768000030517578, + "Acc.tray": 0.029600000381469725, + "Acc.ashcan": 0.5170000076293946, + "Acc.fan": 0.6426000213623047, + "Acc.pier": 0.49450000762939456, + "Acc.crt screen": 0.11220000267028808, + "Acc.plate": 0.6451000213623047, + "Acc.monitor": 0.09140000343322754, + "Acc.bulletin board": 0.5650999832153321, + "Acc.shower": 0.009300000071525573, + "Acc.radiator": 0.6038000106811523, + "Acc.glass": 0.08539999961853027, + "Acc.clock": 0.2722999954223633, + "Acc.flag": 0.4072999954223633 + } + }, + "102": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8294, + "mIoU": 0.4804, + "mAcc": 0.5956, + "IoU.wall": 0.7656999969482422, + "IoU.building": 0.8316999816894531, + "IoU.sky": 0.9379000091552734, + "IoU.floor": 0.814800033569336, + "IoU.tree": 0.7455999755859375, + "IoU.ceiling": 0.8319000244140625, + "IoU.road": 0.8344999694824219, + "IoU.bed ": 0.8802999877929687, + "IoU.windowpane": 0.6265000152587891, + "IoU.grass": 0.677300033569336, + "IoU.cabinet": 0.6156999969482422, + "IoU.sidewalk": 0.6562999725341797, + "IoU.person": 0.7970999908447266, + "IoU.earth": 0.3843000030517578, + "IoU.door": 0.49139999389648437, + "IoU.table": 0.5829999923706055, + "IoU.mountain": 0.5829999923706055, + "IoU.plant": 0.5227000045776368, + "IoU.curtain": 0.7412999725341797, + "IoU.chair": 0.5333000183105469, + "IoU.car": 0.8325, + "IoU.water": 0.5565999984741211, + "IoU.painting": 0.6891999816894532, + "IoU.sofa": 0.6798999786376954, + "IoU.shelf": 0.43139999389648437, + "IoU.house": 0.5258000183105469, + "IoU.sea": 0.6563999938964844, + "IoU.mirror": 0.6663999938964844, + "IoU.rug": 0.6662000274658203, + "IoU.field": 0.31760000228881835, + "IoU.armchair": 0.4254999923706055, + "IoU.seat": 0.6465000152587891, + "IoU.fence": 0.38979999542236327, + "IoU.desk": 0.45049999237060545, + "IoU.rock": 0.4725, + "IoU.wardrobe": 0.5529000091552735, + "IoU.lamp": 0.5415999984741211, + "IoU.bathtub": 0.7758999633789062, + "IoU.railing": 0.3763000106811523, + "IoU.cushion": 0.5722000122070312, + "IoU.base": 0.2979999923706055, + "IoU.box": 0.23200000762939454, + "IoU.column": 0.48459999084472655, + "IoU.signboard": 0.3518000030517578, + "IoU.chest of drawers": 0.3666999816894531, + "IoU.counter": 0.39220001220703127, + "IoU.sand": 0.557400016784668, + "IoU.sink": 0.6765000152587891, + "IoU.skyscraper": 0.6013000106811524, + "IoU.fireplace": 0.7068000030517578, + "IoU.refrigerator": 0.7791000366210937, + "IoU.grandstand": 0.48319999694824217, + "IoU.path": 0.22370000839233398, + "IoU.stairs": 0.2872999954223633, + "IoU.runway": 0.6043999862670898, + "IoU.case": 0.6620999908447266, + "IoU.pool table": 0.9248000335693359, + "IoU.pillow": 0.5838000106811524, + "IoU.screen door": 0.4727000045776367, + "IoU.stairway": 0.33880001068115234, + "IoU.river": 0.1875, + "IoU.bridge": 0.6780999755859375, + "IoU.bookcase": 0.33279998779296877, + "IoU.blind": 0.41459999084472654, + "IoU.coffee table": 0.5997999954223633, + "IoU.toilet": 0.8161000061035156, + "IoU.flower": 0.36080001831054687, + "IoU.book": 0.4456999969482422, + "IoU.hill": 0.13420000076293945, + "IoU.bench": 0.4840999984741211, + "IoU.countertop": 0.5116999816894531, + "IoU.stove": 0.6908999633789062, + "IoU.palm": 0.475, + "IoU.kitchen island": 0.39, + "IoU.computer": 0.6497000122070312, + "IoU.swivel chair": 0.474900016784668, + "IoU.boat": 0.7194999694824219, + "IoU.bar": 0.5868999862670898, + "IoU.arcade machine": 0.432400016784668, + "IoU.hovel": 0.5038000106811523, + "IoU.bus": 0.8979000091552735, + "IoU.towel": 0.6131999969482422, + "IoU.light": 0.3427000045776367, + "IoU.truck": 0.2763999938964844, + "IoU.tower": 0.29950000762939455, + "IoU.chandelier": 0.6093000030517578, + "IoU.awning": 0.2706999969482422, + "IoU.streetlight": 0.1425, + "IoU.booth": 0.3491999816894531, + "IoU.television receiver": 0.6523999786376953, + "IoU.airplane": 0.6013999938964844, + "IoU.dirt track": 0.012400000095367432, + "IoU.apparel": 0.2979000091552734, + "IoU.pole": 0.16860000610351564, + "IoU.land": 0.023499999046325683, + "IoU.bannister": 0.10119999885559082, + "IoU.escalator": 0.5611000061035156, + "IoU.ottoman": 0.43, + "IoU.bottle": 0.2670000076293945, + "IoU.buffet": 0.3543000030517578, + "IoU.poster": 0.21989999771118163, + "IoU.stage": 0.14569999694824218, + "IoU.van": 0.38880001068115233, + "IoU.ship": 0.6666999816894531, + "IoU.fountain": 0.18860000610351563, + "IoU.conveyer belt": 0.7515000152587891, + "IoU.canopy": 0.23860000610351562, + "IoU.washer": 0.7219000244140625, + "IoU.plaything": 0.22850000381469726, + "IoU.swimming pool": 0.6608000183105469, + "IoU.stool": 0.3175, + "IoU.barrel": 0.43959999084472656, + "IoU.basket": 0.23579999923706055, + "IoU.waterfall": 0.538499984741211, + "IoU.tent": 0.8384999847412109, + "IoU.bag": 0.12409999847412109, + "IoU.minibike": 0.6622000122070313, + "IoU.cradle": 0.8093000030517579, + "IoU.oven": 0.24229999542236327, + "IoU.ball": 0.4215999984741211, + "IoU.food": 0.5209000015258789, + "IoU.step": 0.09760000228881836, + "IoU.tank": 0.5613000106811523, + "IoU.trade name": 0.2538999938964844, + "IoU.microwave": 0.43689998626708987, + "IoU.pot": 0.4322999954223633, + "IoU.animal": 0.6356999969482422, + "IoU.bicycle": 0.5288000106811523, + "IoU.lake": 0.5811999893188476, + "IoU.dishwasher": 0.5204000091552734, + "IoU.screen": 0.5529999923706055, + "IoU.blanket": 0.11579999923706055, + "IoU.sculpture": 0.5256000137329102, + "IoU.hood": 0.579900016784668, + "IoU.sconce": 0.3121999931335449, + "IoU.vase": 0.29920000076293946, + "IoU.traffic light": 0.2368000030517578, + "IoU.tray": 0.011100000143051148, + "IoU.ashcan": 0.38970001220703127, + "IoU.fan": 0.4865999984741211, + "IoU.pier": 0.293799991607666, + "IoU.crt screen": 0.06730000019073486, + "IoU.plate": 0.48470001220703124, + "IoU.monitor": 0.31610000610351563, + "IoU.bulletin board": 0.43590000152587893, + "IoU.shower": 0.0009000000357627869, + "IoU.radiator": 0.5559999847412109, + "IoU.glass": 0.084399995803833, + "IoU.clock": 0.26110000610351564, + "IoU.flag": 0.37599998474121094, + "Acc.wall": 0.8758000183105469, + "Acc.building": 0.9297000122070312, + "Acc.sky": 0.9762000274658204, + "Acc.floor": 0.9116999816894531, + "Acc.tree": 0.8656999969482422, + "Acc.ceiling": 0.9206999969482422, + "Acc.road": 0.8936000061035156, + "Acc.bed ": 0.9540000152587891, + "Acc.windowpane": 0.7748999786376953, + "Acc.grass": 0.785199966430664, + "Acc.cabinet": 0.7247000122070313, + "Acc.sidewalk": 0.8019000244140625, + "Acc.person": 0.9148000335693359, + "Acc.earth": 0.5768000030517578, + "Acc.door": 0.6666999816894531, + "Acc.table": 0.7394000244140625, + "Acc.mountain": 0.7006999969482421, + "Acc.plant": 0.6476999664306641, + "Acc.curtain": 0.8586000061035156, + "Acc.chair": 0.6558000183105469, + "Acc.car": 0.9205999755859375, + "Acc.water": 0.7295999908447266, + "Acc.painting": 0.8666000366210938, + "Acc.sofa": 0.8366000366210937, + "Acc.shelf": 0.6341999816894531, + "Acc.house": 0.6894999694824219, + "Acc.sea": 0.849800033569336, + "Acc.mirror": 0.7522000122070313, + "Acc.rug": 0.7384999847412109, + "Acc.field": 0.5404999923706054, + "Acc.armchair": 0.6431999969482421, + "Acc.seat": 0.8318000030517578, + "Acc.fence": 0.5388999938964844, + "Acc.desk": 0.6265999984741211, + "Acc.rock": 0.7480000305175781, + "Acc.wardrobe": 0.6666999816894531, + "Acc.lamp": 0.673499984741211, + "Acc.bathtub": 0.8294000244140625, + "Acc.railing": 0.534000015258789, + "Acc.cushion": 0.6994000244140625, + "Acc.base": 0.4779999923706055, + "Acc.box": 0.3145000076293945, + "Acc.column": 0.5995999908447266, + "Acc.signboard": 0.45790000915527346, + "Acc.chest of drawers": 0.6306000137329102, + "Acc.counter": 0.48209999084472654, + "Acc.sand": 0.7155000305175782, + "Acc.sink": 0.7448000335693359, + "Acc.skyscraper": 0.6936000061035156, + "Acc.fireplace": 0.8918000030517578, + "Acc.refrigerator": 0.8358000183105468, + "Acc.grandstand": 0.6840000152587891, + "Acc.path": 0.2984000015258789, + "Acc.stairs": 0.38799999237060545, + "Acc.runway": 0.772300033569336, + "Acc.case": 0.8579000091552734, + "Acc.pool table": 0.9619999694824218, + "Acc.pillow": 0.6913999938964843, + "Acc.screen door": 0.563499984741211, + "Acc.stairway": 0.44150001525878907, + "Acc.river": 0.3185000038146973, + "Acc.bridge": 0.7822000122070313, + "Acc.bookcase": 0.5527000045776367, + "Acc.blind": 0.4727000045776367, + "Acc.coffee table": 0.7705999755859375, + "Acc.toilet": 0.8906999969482422, + "Acc.flower": 0.5197999954223633, + "Acc.book": 0.6491000366210937, + "Acc.hill": 0.24059999465942383, + "Acc.bench": 0.5656999969482421, + "Acc.countertop": 0.6802999877929687, + "Acc.stove": 0.78, + "Acc.palm": 0.7029000091552734, + "Acc.kitchen island": 0.6381999969482421, + "Acc.computer": 0.7673999786376953, + "Acc.swivel chair": 0.6580999755859375, + "Acc.boat": 0.8483000183105469, + "Acc.bar": 0.7244999694824219, + "Acc.arcade machine": 0.46830001831054685, + "Acc.hovel": 0.5895000076293946, + "Acc.bus": 0.9444000244140625, + "Acc.towel": 0.7433999633789062, + "Acc.light": 0.38159999847412107, + "Acc.truck": 0.3925, + "Acc.tower": 0.4031999969482422, + "Acc.chandelier": 0.7927999877929688, + "Acc.awning": 0.3146999931335449, + "Acc.streetlight": 0.1675, + "Acc.booth": 0.4166999816894531, + "Acc.television receiver": 0.8105999755859375, + "Acc.airplane": 0.6737999725341797, + "Acc.dirt track": 0.05409999847412109, + "Acc.apparel": 0.44220001220703126, + "Acc.pole": 0.22290000915527344, + "Acc.land": 0.03380000114440918, + "Acc.bannister": 0.13890000343322753, + "Acc.escalator": 0.7726000213623047, + "Acc.ottoman": 0.5783000183105469, + "Acc.bottle": 0.3752000045776367, + "Acc.buffet": 0.40400001525878904, + "Acc.poster": 0.281200008392334, + "Acc.stage": 0.2052000045776367, + "Acc.van": 0.5115000152587891, + "Acc.ship": 0.7205999755859375, + "Acc.fountain": 0.22200000762939454, + "Acc.conveyer belt": 0.9125, + "Acc.canopy": 0.36259998321533204, + "Acc.washer": 0.7372000122070312, + "Acc.plaything": 0.35869998931884767, + "Acc.swimming pool": 0.8054000091552734, + "Acc.stool": 0.369900016784668, + "Acc.barrel": 0.6366999816894531, + "Acc.basket": 0.3075, + "Acc.waterfall": 0.6006000137329102, + "Acc.tent": 0.9833000183105469, + "Acc.bag": 0.14210000038146972, + "Acc.minibike": 0.7755000305175781, + "Acc.cradle": 0.9693000030517578, + "Acc.oven": 0.6211000061035157, + "Acc.ball": 0.44619998931884763, + "Acc.food": 0.6215000152587891, + "Acc.step": 0.12159999847412109, + "Acc.tank": 0.6405999755859375, + "Acc.trade name": 0.293799991607666, + "Acc.microwave": 0.492400016784668, + "Acc.pot": 0.5088999938964843, + "Acc.animal": 0.6816000366210937, + "Acc.bicycle": 0.7355999755859375, + "Acc.lake": 0.6158000183105469, + "Acc.dishwasher": 0.6245000076293945, + "Acc.screen": 0.774000015258789, + "Acc.blanket": 0.12859999656677246, + "Acc.sculpture": 0.6184999847412109, + "Acc.hood": 0.6136999893188476, + "Acc.sconce": 0.40430000305175784, + "Acc.vase": 0.395099983215332, + "Acc.traffic light": 0.35200000762939454, + "Acc.tray": 0.013799999952316283, + "Acc.ashcan": 0.5395000076293945, + "Acc.fan": 0.64, + "Acc.pier": 0.44549999237060545, + "Acc.crt screen": 0.11130000114440917, + "Acc.plate": 0.6569000244140625, + "Acc.monitor": 0.5713999938964843, + "Acc.bulletin board": 0.5733000183105469, + "Acc.shower": 0.008299999833106995, + "Acc.radiator": 0.6597000122070312, + "Acc.glass": 0.0875, + "Acc.clock": 0.31040000915527344, + "Acc.flag": 0.41720001220703123 + } + }, + "103": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8316, + "mIoU": 0.47850000000000004, + "mAcc": 0.5926, + "IoU.wall": 0.7720999908447266, + "IoU.building": 0.8268000030517578, + "IoU.sky": 0.9388999938964844, + "IoU.floor": 0.8141999816894532, + "IoU.tree": 0.7558999633789063, + "IoU.ceiling": 0.8341000366210938, + "IoU.road": 0.8272000122070312, + "IoU.bed ": 0.8923999786376953, + "IoU.windowpane": 0.6238000106811523, + "IoU.grass": 0.7045999908447266, + "IoU.cabinet": 0.6202999877929688, + "IoU.sidewalk": 0.6448999786376953, + "IoU.person": 0.8015000152587891, + "IoU.earth": 0.37869998931884763, + "IoU.door": 0.5116999816894531, + "IoU.table": 0.5811000061035156, + "IoU.mountain": 0.5811999893188476, + "IoU.plant": 0.5338999938964843, + "IoU.curtain": 0.7437000274658203, + "IoU.chair": 0.5488999938964844, + "IoU.car": 0.8301999664306641, + "IoU.water": 0.5934000015258789, + "IoU.painting": 0.6934999847412109, + "IoU.sofa": 0.6676000213623047, + "IoU.shelf": 0.4272999954223633, + "IoU.house": 0.4702000045776367, + "IoU.sea": 0.7219999694824218, + "IoU.mirror": 0.6769999694824219, + "IoU.rug": 0.6311000061035156, + "IoU.field": 0.37860000610351563, + "IoU.armchair": 0.42520000457763674, + "IoU.seat": 0.6586000061035157, + "IoU.fence": 0.46279998779296877, + "IoU.desk": 0.48259998321533204, + "IoU.rock": 0.4647999954223633, + "IoU.wardrobe": 0.5481999969482422, + "IoU.lamp": 0.5495999908447265, + "IoU.bathtub": 0.8423999786376953, + "IoU.railing": 0.38159999847412107, + "IoU.cushion": 0.5720000076293945, + "IoU.base": 0.31809999465942385, + "IoU.box": 0.23090000152587892, + "IoU.column": 0.4779000091552734, + "IoU.signboard": 0.3640999984741211, + "IoU.chest of drawers": 0.34830001831054686, + "IoU.counter": 0.40290000915527346, + "IoU.sand": 0.43709999084472656, + "IoU.sink": 0.6813999938964844, + "IoU.skyscraper": 0.5368000030517578, + "IoU.fireplace": 0.7031999969482422, + "IoU.refrigerator": 0.7525, + "IoU.grandstand": 0.524000015258789, + "IoU.path": 0.18069999694824218, + "IoU.stairs": 0.3118000030517578, + "IoU.runway": 0.6170000076293946, + "IoU.case": 0.6097999954223633, + "IoU.pool table": 0.9276000213623047, + "IoU.pillow": 0.5791999816894531, + "IoU.screen door": 0.6618000030517578, + "IoU.stairway": 0.36009998321533204, + "IoU.river": 0.18219999313354493, + "IoU.bridge": 0.576500015258789, + "IoU.bookcase": 0.32, + "IoU.blind": 0.39939998626708983, + "IoU.coffee table": 0.5829000091552734, + "IoU.toilet": 0.7576000213623046, + "IoU.flower": 0.3834000015258789, + "IoU.book": 0.44729999542236326, + "IoU.hill": 0.1518000030517578, + "IoU.bench": 0.4229000091552734, + "IoU.countertop": 0.561500015258789, + "IoU.stove": 0.7151999664306641, + "IoU.palm": 0.4986000061035156, + "IoU.kitchen island": 0.4125, + "IoU.computer": 0.6676999664306641, + "IoU.swivel chair": 0.524900016784668, + "IoU.boat": 0.6170000076293946, + "IoU.bar": 0.5443999862670899, + "IoU.arcade machine": 0.7248999786376953, + "IoU.hovel": 0.518499984741211, + "IoU.bus": 0.8862999725341797, + "IoU.towel": 0.6395999908447265, + "IoU.light": 0.34060001373291016, + "IoU.truck": 0.17200000762939452, + "IoU.tower": 0.2780999946594238, + "IoU.chandelier": 0.6279000091552734, + "IoU.awning": 0.30940000534057616, + "IoU.streetlight": 0.15210000038146973, + "IoU.booth": 0.3940000152587891, + "IoU.television receiver": 0.6487999725341796, + "IoU.airplane": 0.6245999908447266, + "IoU.dirt track": 0.004199999868869781, + "IoU.apparel": 0.3504999923706055, + "IoU.pole": 0.19450000762939454, + "IoU.land": 0.008100000023841859, + "IoU.bannister": 0.0856999969482422, + "IoU.escalator": 0.5345000076293945, + "IoU.ottoman": 0.5006000137329102, + "IoU.bottle": 0.34049999237060546, + "IoU.buffet": 0.3234000015258789, + "IoU.poster": 0.18010000228881837, + "IoU.stage": 0.18209999084472656, + "IoU.van": 0.36029998779296873, + "IoU.ship": 0.09800000190734863, + "IoU.fountain": 0.19680000305175782, + "IoU.conveyer belt": 0.7730000305175782, + "IoU.canopy": 0.2543000030517578, + "IoU.washer": 0.6995999908447266, + "IoU.plaything": 0.3383000183105469, + "IoU.swimming pool": 0.6730999755859375, + "IoU.stool": 0.27420000076293943, + "IoU.barrel": 0.35569999694824217, + "IoU.basket": 0.2640999984741211, + "IoU.waterfall": 0.48400001525878905, + "IoU.tent": 0.9440000152587891, + "IoU.bag": 0.12619999885559083, + "IoU.minibike": 0.5922999954223633, + "IoU.cradle": 0.8109999847412109, + "IoU.oven": 0.4286999893188477, + "IoU.ball": 0.22940000534057617, + "IoU.food": 0.548499984741211, + "IoU.step": 0.08890000343322754, + "IoU.tank": 0.5777999877929687, + "IoU.trade name": 0.2748999977111816, + "IoU.microwave": 0.7047000122070313, + "IoU.pot": 0.4291999816894531, + "IoU.animal": 0.6163000106811524, + "IoU.bicycle": 0.4909000015258789, + "IoU.lake": 0.1322999954223633, + "IoU.dishwasher": 0.5311999893188477, + "IoU.screen": 0.5309000015258789, + "IoU.blanket": 0.11619999885559082, + "IoU.sculpture": 0.5077000045776368, + "IoU.hood": 0.5933000183105469, + "IoU.sconce": 0.3345999908447266, + "IoU.vase": 0.30950000762939456, + "IoU.traffic light": 0.2631999969482422, + "IoU.tray": 0.012899999618530273, + "IoU.ashcan": 0.3668000030517578, + "IoU.fan": 0.487400016784668, + "IoU.pier": 0.33599998474121096, + "IoU.crt screen": 0.03650000095367432, + "IoU.plate": 0.48950000762939455, + "IoU.monitor": 0.24819999694824219, + "IoU.bulletin board": 0.3409000015258789, + "IoU.shower": 0.018799999952316283, + "IoU.radiator": 0.5140999984741211, + "IoU.glass": 0.10449999809265137, + "IoU.clock": 0.2681999969482422, + "IoU.flag": 0.3643000030517578, + "Acc.wall": 0.8777999877929688, + "Acc.building": 0.9256999969482422, + "Acc.sky": 0.9752999877929688, + "Acc.floor": 0.91, + "Acc.tree": 0.8697000122070313, + "Acc.ceiling": 0.9263999938964844, + "Acc.road": 0.895199966430664, + "Acc.bed ": 0.9573000335693359, + "Acc.windowpane": 0.7716999816894531, + "Acc.grass": 0.8301999664306641, + "Acc.cabinet": 0.7294999694824219, + "Acc.sidewalk": 0.7933999633789063, + "Acc.person": 0.9155000305175781, + "Acc.earth": 0.5720999908447265, + "Acc.door": 0.6898999786376954, + "Acc.table": 0.7423000335693359, + "Acc.mountain": 0.7041000366210938, + "Acc.plant": 0.6606999969482422, + "Acc.curtain": 0.8580000305175781, + "Acc.chair": 0.6856999969482422, + "Acc.car": 0.9165000152587891, + "Acc.water": 0.7883999633789063, + "Acc.painting": 0.8551000213623047, + "Acc.sofa": 0.8056999969482422, + "Acc.shelf": 0.6002000045776367, + "Acc.house": 0.6872000122070312, + "Acc.sea": 0.8887000274658203, + "Acc.mirror": 0.7630000305175781, + "Acc.rug": 0.7090000152587891, + "Acc.field": 0.5402000045776367, + "Acc.armchair": 0.6613999938964844, + "Acc.seat": 0.825, + "Acc.fence": 0.6359000015258789, + "Acc.desk": 0.6758999633789062, + "Acc.rock": 0.7044999694824219, + "Acc.wardrobe": 0.6841000366210938, + "Acc.lamp": 0.6816000366210937, + "Acc.bathtub": 0.8916000366210938, + "Acc.railing": 0.5222000122070313, + "Acc.cushion": 0.7106999969482422, + "Acc.base": 0.5418000030517578, + "Acc.box": 0.32209999084472657, + "Acc.column": 0.6106999969482422, + "Acc.signboard": 0.46680000305175784, + "Acc.chest of drawers": 0.5777999877929687, + "Acc.counter": 0.495, + "Acc.sand": 0.6088000106811523, + "Acc.sink": 0.7468000030517579, + "Acc.skyscraper": 0.6243999862670898, + "Acc.fireplace": 0.8988999938964843, + "Acc.refrigerator": 0.8508000183105469, + "Acc.grandstand": 0.7323000335693359, + "Acc.path": 0.23569999694824217, + "Acc.stairs": 0.4002000045776367, + "Acc.runway": 0.7916999816894531, + "Acc.case": 0.7876000213623047, + "Acc.pool table": 0.9640000152587891, + "Acc.pillow": 0.6986000061035156, + "Acc.screen door": 0.7766999816894531, + "Acc.stairway": 0.42779998779296874, + "Acc.river": 0.2890999984741211, + "Acc.bridge": 0.6730999755859375, + "Acc.bookcase": 0.552599983215332, + "Acc.blind": 0.4493000030517578, + "Acc.coffee table": 0.7616999816894531, + "Acc.toilet": 0.8983999633789063, + "Acc.flower": 0.5568999862670898, + "Acc.book": 0.630099983215332, + "Acc.hill": 0.24860000610351562, + "Acc.bench": 0.48889999389648436, + "Acc.countertop": 0.7226000213623047, + "Acc.stove": 0.8137000274658203, + "Acc.palm": 0.7291000366210938, + "Acc.kitchen island": 0.6713999938964844, + "Acc.computer": 0.7908999633789062, + "Acc.swivel chair": 0.6823999786376953, + "Acc.boat": 0.8094999694824219, + "Acc.bar": 0.6818000030517578, + "Acc.arcade machine": 0.7973999786376953, + "Acc.hovel": 0.5872000122070312, + "Acc.bus": 0.9366000366210937, + "Acc.towel": 0.7829000091552735, + "Acc.light": 0.3691999816894531, + "Acc.truck": 0.2418000030517578, + "Acc.tower": 0.36770000457763674, + "Acc.chandelier": 0.7956999969482422, + "Acc.awning": 0.38279998779296875, + "Acc.streetlight": 0.17979999542236327, + "Acc.booth": 0.4779000091552734, + "Acc.television receiver": 0.8137000274658203, + "Acc.airplane": 0.7043000030517578, + "Acc.dirt track": 0.017799999713897705, + "Acc.apparel": 0.4784000015258789, + "Acc.pole": 0.25629999160766603, + "Acc.land": 0.014099999666213989, + "Acc.bannister": 0.13270000457763673, + "Acc.escalator": 0.7794999694824218, + "Acc.ottoman": 0.6533000183105468, + "Acc.bottle": 0.5920000076293945, + "Acc.buffet": 0.3915999984741211, + "Acc.poster": 0.22690000534057617, + "Acc.stage": 0.3043000030517578, + "Acc.van": 0.47639999389648435, + "Acc.ship": 0.10739999771118164, + "Acc.fountain": 0.21959999084472656, + "Acc.conveyer belt": 0.9152999877929687, + "Acc.canopy": 0.34240001678466797, + "Acc.washer": 0.7330999755859375, + "Acc.plaything": 0.5402999877929687, + "Acc.swimming pool": 0.8148999786376954, + "Acc.stool": 0.3434000015258789, + "Acc.barrel": 0.6327000045776368, + "Acc.basket": 0.3208000183105469, + "Acc.waterfall": 0.5538999938964844, + "Acc.tent": 0.9830000305175781, + "Acc.bag": 0.143100004196167, + "Acc.minibike": 0.6519000244140625, + "Acc.cradle": 0.9687000274658203, + "Acc.oven": 0.6644000244140625, + "Acc.ball": 0.23340000152587892, + "Acc.food": 0.6304999923706055, + "Acc.step": 0.11479999542236329, + "Acc.tank": 0.6527999877929688, + "Acc.trade name": 0.30959999084472656, + "Acc.microwave": 0.7676999664306641, + "Acc.pot": 0.5095999908447265, + "Acc.animal": 0.6566000366210938, + "Acc.bicycle": 0.7244999694824219, + "Acc.lake": 0.13800000190734862, + "Acc.dishwasher": 0.6179999923706054, + "Acc.screen": 0.7401999664306641, + "Acc.blanket": 0.1275, + "Acc.sculpture": 0.6608000183105469, + "Acc.hood": 0.6566000366210938, + "Acc.sconce": 0.4241999816894531, + "Acc.vase": 0.4102000045776367, + "Acc.traffic light": 0.3656000137329102, + "Acc.tray": 0.01440000057220459, + "Acc.ashcan": 0.49630001068115237, + "Acc.fan": 0.5906000137329102, + "Acc.pier": 0.44790000915527345, + "Acc.crt screen": 0.07519999980926513, + "Acc.plate": 0.6555000305175781, + "Acc.monitor": 0.40950000762939454, + "Acc.bulletin board": 0.46700000762939453, + "Acc.shower": 0.05, + "Acc.radiator": 0.5665999984741211, + "Acc.glass": 0.10859999656677247, + "Acc.clock": 0.31879999160766603, + "Acc.flag": 0.4022999954223633 + } + }, + "104": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8318000000000001, + "mIoU": 0.4849, + "mAcc": 0.5935, + "IoU.wall": 0.7738999938964843, + "IoU.building": 0.8286000061035156, + "IoU.sky": 0.9373000335693359, + "IoU.floor": 0.8130999755859375, + "IoU.tree": 0.7458000183105469, + "IoU.ceiling": 0.8344000244140625, + "IoU.road": 0.8243000030517578, + "IoU.bed ": 0.8969000244140625, + "IoU.windowpane": 0.6204000091552735, + "IoU.grass": 0.6891000366210938, + "IoU.cabinet": 0.6322999954223633, + "IoU.sidewalk": 0.6441999816894531, + "IoU.person": 0.8047000122070312, + "IoU.earth": 0.3759000015258789, + "IoU.door": 0.5138000106811523, + "IoU.table": 0.601500015258789, + "IoU.mountain": 0.5791999816894531, + "IoU.plant": 0.5129999923706055, + "IoU.curtain": 0.7470999908447266, + "IoU.chair": 0.5529999923706055, + "IoU.car": 0.8377999877929687, + "IoU.water": 0.5611000061035156, + "IoU.painting": 0.7036000061035156, + "IoU.sofa": 0.6586000061035157, + "IoU.shelf": 0.4193000030517578, + "IoU.house": 0.46740001678466797, + "IoU.sea": 0.6691000366210937, + "IoU.mirror": 0.6918000030517578, + "IoU.rug": 0.6347000122070312, + "IoU.field": 0.35380001068115235, + "IoU.armchair": 0.4229999923706055, + "IoU.seat": 0.6526999664306641, + "IoU.fence": 0.4386999893188477, + "IoU.desk": 0.4815999984741211, + "IoU.rock": 0.4695999908447266, + "IoU.wardrobe": 0.5629999923706055, + "IoU.lamp": 0.5468000030517578, + "IoU.bathtub": 0.8312000274658203, + "IoU.railing": 0.39, + "IoU.cushion": 0.5845999908447266, + "IoU.base": 0.2997999954223633, + "IoU.box": 0.22370000839233398, + "IoU.column": 0.48259998321533204, + "IoU.signboard": 0.35939998626708985, + "IoU.chest of drawers": 0.34799999237060547, + "IoU.counter": 0.415, + "IoU.sand": 0.452599983215332, + "IoU.sink": 0.6979000091552734, + "IoU.skyscraper": 0.5436999893188477, + "IoU.fireplace": 0.7194000244140625, + "IoU.refrigerator": 0.7795999908447265, + "IoU.grandstand": 0.5170999908447266, + "IoU.path": 0.17629999160766602, + "IoU.stairs": 0.2823999977111816, + "IoU.runway": 0.5840999984741211, + "IoU.case": 0.6165999984741211, + "IoU.pool table": 0.9287000274658204, + "IoU.pillow": 0.5727000045776367, + "IoU.screen door": 0.6058000183105469, + "IoU.stairway": 0.3704999923706055, + "IoU.river": 0.18780000686645507, + "IoU.bridge": 0.5211999893188477, + "IoU.bookcase": 0.3484000015258789, + "IoU.blind": 0.4159000015258789, + "IoU.coffee table": 0.6027000045776367, + "IoU.toilet": 0.8298999786376953, + "IoU.flower": 0.41389999389648435, + "IoU.book": 0.422400016784668, + "IoU.hill": 0.13949999809265137, + "IoU.bench": 0.417400016784668, + "IoU.countertop": 0.5829999923706055, + "IoU.stove": 0.7677999877929688, + "IoU.palm": 0.4868000030517578, + "IoU.kitchen island": 0.37729999542236325, + "IoU.computer": 0.6397000122070312, + "IoU.swivel chair": 0.524900016784668, + "IoU.boat": 0.6658999633789062, + "IoU.bar": 0.5220999908447266, + "IoU.arcade machine": 0.48779998779296874, + "IoU.hovel": 0.30959999084472656, + "IoU.bus": 0.9030000305175782, + "IoU.towel": 0.6041999816894531, + "IoU.light": 0.29600000381469727, + "IoU.truck": 0.16959999084472657, + "IoU.tower": 0.2659000015258789, + "IoU.chandelier": 0.6284000015258789, + "IoU.awning": 0.23700000762939452, + "IoU.streetlight": 0.15949999809265136, + "IoU.booth": 0.33599998474121096, + "IoU.television receiver": 0.6923999786376953, + "IoU.airplane": 0.5986999893188476, + "IoU.dirt track": 0.03950000047683716, + "IoU.apparel": 0.3327000045776367, + "IoU.pole": 0.1697999954223633, + "IoU.land": 0.055500001907348634, + "IoU.bannister": 0.029100000858306885, + "IoU.escalator": 0.5470000076293945, + "IoU.ottoman": 0.4906000137329102, + "IoU.bottle": 0.34099998474121096, + "IoU.buffet": 0.41759998321533204, + "IoU.poster": 0.28649999618530275, + "IoU.stage": 0.18260000228881837, + "IoU.van": 0.35689998626708985, + "IoU.ship": 0.7383999633789062, + "IoU.fountain": 0.22200000762939454, + "IoU.conveyer belt": 0.7529000091552734, + "IoU.canopy": 0.28600000381469726, + "IoU.washer": 0.7048999786376953, + "IoU.plaything": 0.3085000038146973, + "IoU.swimming pool": 0.7611000061035156, + "IoU.stool": 0.34180000305175784, + "IoU.barrel": 0.30010000228881833, + "IoU.basket": 0.2606999969482422, + "IoU.waterfall": 0.4936000061035156, + "IoU.tent": 0.9537000274658203, + "IoU.bag": 0.12949999809265136, + "IoU.minibike": 0.7073000335693359, + "IoU.cradle": 0.82, + "IoU.oven": 0.45930000305175783, + "IoU.ball": 0.32049999237060545, + "IoU.food": 0.4979999923706055, + "IoU.step": 0.07489999771118164, + "IoU.tank": 0.574900016784668, + "IoU.trade name": 0.2368000030517578, + "IoU.microwave": 0.7906999969482422, + "IoU.pot": 0.4856000137329102, + "IoU.animal": 0.6006000137329102, + "IoU.bicycle": 0.5468999862670898, + "IoU.lake": 0.6443000030517578, + "IoU.dishwasher": 0.577400016784668, + "IoU.screen": 0.5561000061035156, + "IoU.blanket": 0.1381999969482422, + "IoU.sculpture": 0.5920000076293945, + "IoU.hood": 0.5059000015258789, + "IoU.sconce": 0.27430000305175783, + "IoU.vase": 0.3068000030517578, + "IoU.traffic light": 0.23790000915527343, + "IoU.tray": 0.01690000057220459, + "IoU.ashcan": 0.342599983215332, + "IoU.fan": 0.43340000152587893, + "IoU.pier": 0.2844000053405762, + "IoU.crt screen": 0.012599999904632569, + "IoU.plate": 0.5127999877929688, + "IoU.monitor": 0.17020000457763673, + "IoU.bulletin board": 0.34509998321533203, + "IoU.shower": 0.008199999928474427, + "IoU.radiator": 0.5429999923706055, + "IoU.glass": 0.10060000419616699, + "IoU.clock": 0.26600000381469724, + "IoU.flag": 0.41130001068115235, + "Acc.wall": 0.8879000091552735, + "Acc.building": 0.9279000091552735, + "Acc.sky": 0.9763999938964844, + "Acc.floor": 0.9070999908447266, + "Acc.tree": 0.8712000274658203, + "Acc.ceiling": 0.9273999786376953, + "Acc.road": 0.8947000122070312, + "Acc.bed ": 0.96, + "Acc.windowpane": 0.7738999938964843, + "Acc.grass": 0.8140000152587891, + "Acc.cabinet": 0.7483999633789062, + "Acc.sidewalk": 0.7904000091552734, + "Acc.person": 0.9141000366210937, + "Acc.earth": 0.549099998474121, + "Acc.door": 0.6783999633789063, + "Acc.table": 0.7627999877929688, + "Acc.mountain": 0.7062000274658203, + "Acc.plant": 0.6587000274658203, + "Acc.curtain": 0.8522000122070312, + "Acc.chair": 0.6863999938964844, + "Acc.car": 0.927300033569336, + "Acc.water": 0.7195999908447266, + "Acc.painting": 0.8308999633789063, + "Acc.sofa": 0.8234999847412109, + "Acc.shelf": 0.592599983215332, + "Acc.house": 0.6605000305175781, + "Acc.sea": 0.8855999755859375, + "Acc.mirror": 0.7680999755859375, + "Acc.rug": 0.7208999633789063, + "Acc.field": 0.5215000152587891, + "Acc.armchair": 0.6116999816894532, + "Acc.seat": 0.8380999755859375, + "Acc.fence": 0.5850999832153321, + "Acc.desk": 0.655, + "Acc.rock": 0.7030000305175781, + "Acc.wardrobe": 0.678499984741211, + "Acc.lamp": 0.6595999908447265, + "Acc.bathtub": 0.8694000244140625, + "Acc.railing": 0.5183000183105468, + "Acc.cushion": 0.7319999694824219, + "Acc.base": 0.522400016784668, + "Acc.box": 0.2884000015258789, + "Acc.column": 0.5922000122070312, + "Acc.signboard": 0.47, + "Acc.chest of drawers": 0.5683000183105469, + "Acc.counter": 0.5420000076293945, + "Acc.sand": 0.5813000106811523, + "Acc.sink": 0.7694999694824218, + "Acc.skyscraper": 0.634000015258789, + "Acc.fireplace": 0.9062999725341797, + "Acc.refrigerator": 0.8454000091552735, + "Acc.grandstand": 0.7502999877929688, + "Acc.path": 0.23559999465942383, + "Acc.stairs": 0.3618000030517578, + "Acc.runway": 0.7483000183105468, + "Acc.case": 0.7691999816894531, + "Acc.pool table": 0.9580000305175781, + "Acc.pillow": 0.6652999877929687, + "Acc.screen door": 0.6718000030517578, + "Acc.stairway": 0.46939998626708984, + "Acc.river": 0.3554000091552734, + "Acc.bridge": 0.6222999954223633, + "Acc.bookcase": 0.6004999923706055, + "Acc.blind": 0.46419998168945314, + "Acc.coffee table": 0.7443000030517578, + "Acc.toilet": 0.8961000061035156, + "Acc.flower": 0.5697999954223633, + "Acc.book": 0.5838999938964844, + "Acc.hill": 0.23299999237060548, + "Acc.bench": 0.5036999893188476, + "Acc.countertop": 0.7358999633789063, + "Acc.stove": 0.8266999816894531, + "Acc.palm": 0.6887000274658203, + "Acc.kitchen island": 0.5197000122070312, + "Acc.computer": 0.7794000244140625, + "Acc.swivel chair": 0.6531999969482422, + "Acc.boat": 0.836500015258789, + "Acc.bar": 0.629900016784668, + "Acc.arcade machine": 0.5236999893188476, + "Acc.hovel": 0.3484000015258789, + "Acc.bus": 0.9512999725341796, + "Acc.towel": 0.7501999664306641, + "Acc.light": 0.31309999465942384, + "Acc.truck": 0.23459999084472657, + "Acc.tower": 0.3759000015258789, + "Acc.chandelier": 0.7797000122070312, + "Acc.awning": 0.27360000610351565, + "Acc.streetlight": 0.18620000839233397, + "Acc.booth": 0.4431999969482422, + "Acc.television receiver": 0.7855999755859375, + "Acc.airplane": 0.6756999969482422, + "Acc.dirt track": 0.181200008392334, + "Acc.apparel": 0.45119998931884764, + "Acc.pole": 0.21889999389648437, + "Acc.land": 0.0696999979019165, + "Acc.bannister": 0.03940000057220459, + "Acc.escalator": 0.7387000274658203, + "Acc.ottoman": 0.6381999969482421, + "Acc.bottle": 0.552400016784668, + "Acc.buffet": 0.47580001831054686, + "Acc.poster": 0.4470000076293945, + "Acc.stage": 0.2809000015258789, + "Acc.van": 0.44229999542236326, + "Acc.ship": 0.7591999816894531, + "Acc.fountain": 0.2297999954223633, + "Acc.conveyer belt": 0.9112999725341797, + "Acc.canopy": 0.33810001373291015, + "Acc.washer": 0.7166999816894531, + "Acc.plaything": 0.5563000106811523, + "Acc.swimming pool": 0.8651999664306641, + "Acc.stool": 0.41330001831054686, + "Acc.barrel": 0.6375999832153321, + "Acc.basket": 0.33380001068115234, + "Acc.waterfall": 0.5654000091552734, + "Acc.tent": 0.9766999816894532, + "Acc.bag": 0.15140000343322754, + "Acc.minibike": 0.7905999755859375, + "Acc.cradle": 0.961500015258789, + "Acc.oven": 0.5436999893188477, + "Acc.ball": 0.3509999847412109, + "Acc.food": 0.584900016784668, + "Acc.step": 0.09930000305175782, + "Acc.tank": 0.6480999755859375, + "Acc.trade name": 0.2620000076293945, + "Acc.microwave": 0.88, + "Acc.pot": 0.5745999908447266, + "Acc.animal": 0.634900016784668, + "Acc.bicycle": 0.6923999786376953, + "Acc.lake": 0.6837999725341797, + "Acc.dishwasher": 0.7075, + "Acc.screen": 0.7745999908447265, + "Acc.blanket": 0.15920000076293944, + "Acc.sculpture": 0.7769000244140625, + "Acc.hood": 0.6363999938964844, + "Acc.sconce": 0.3484000015258789, + "Acc.vase": 0.4231999969482422, + "Acc.traffic light": 0.3479000091552734, + "Acc.tray": 0.018899999856948853, + "Acc.ashcan": 0.48069999694824217, + "Acc.fan": 0.49150001525878906, + "Acc.pier": 0.4222999954223633, + "Acc.crt screen": 0.028900001049041748, + "Acc.plate": 0.6769999694824219, + "Acc.monitor": 0.2805999946594238, + "Acc.bulletin board": 0.45360000610351564, + "Acc.shower": 0.04719999790191651, + "Acc.radiator": 0.5984000015258789, + "Acc.glass": 0.10390000343322754, + "Acc.clock": 0.28450000762939454, + "Acc.flag": 0.44880001068115233 + } + }, + "105": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8329000000000001, + "mIoU": 0.4824, + "mAcc": 0.5821000000000001, + "IoU.wall": 0.7716999816894531, + "IoU.building": 0.8287000274658203, + "IoU.sky": 0.9362999725341797, + "IoU.floor": 0.8111000061035156, + "IoU.tree": 0.7426000213623047, + "IoU.ceiling": 0.831500015258789, + "IoU.road": 0.8308000183105468, + "IoU.bed ": 0.8948000335693359, + "IoU.windowpane": 0.6218000030517579, + "IoU.grass": 0.6862999725341797, + "IoU.cabinet": 0.6347000122070312, + "IoU.sidewalk": 0.6505999755859375, + "IoU.person": 0.8062000274658203, + "IoU.earth": 0.37799999237060544, + "IoU.door": 0.5209999847412109, + "IoU.table": 0.6029999923706054, + "IoU.mountain": 0.5856999969482422, + "IoU.plant": 0.5265000152587891, + "IoU.curtain": 0.7494999694824219, + "IoU.chair": 0.5491999816894532, + "IoU.car": 0.8405000305175782, + "IoU.water": 0.6040000152587891, + "IoU.painting": 0.7201999664306641, + "IoU.sofa": 0.6844000244140624, + "IoU.shelf": 0.43220001220703125, + "IoU.house": 0.42689998626708986, + "IoU.sea": 0.7016000366210937, + "IoU.mirror": 0.6956999969482421, + "IoU.rug": 0.6329999923706054, + "IoU.field": 0.36580001831054687, + "IoU.armchair": 0.42130001068115236, + "IoU.seat": 0.655199966430664, + "IoU.fence": 0.4013999938964844, + "IoU.desk": 0.4840999984741211, + "IoU.rock": 0.44490001678466795, + "IoU.wardrobe": 0.5800999832153321, + "IoU.lamp": 0.5561999893188476, + "IoU.bathtub": 0.8537000274658203, + "IoU.railing": 0.39599998474121095, + "IoU.cushion": 0.5931000137329101, + "IoU.base": 0.30459999084472655, + "IoU.box": 0.24190000534057618, + "IoU.column": 0.4911999893188477, + "IoU.signboard": 0.36849998474121093, + "IoU.chest of drawers": 0.33950000762939453, + "IoU.counter": 0.38470001220703126, + "IoU.sand": 0.44220001220703126, + "IoU.sink": 0.701500015258789, + "IoU.skyscraper": 0.5399000167846679, + "IoU.fireplace": 0.7158999633789063, + "IoU.refrigerator": 0.782300033569336, + "IoU.grandstand": 0.5120000076293946, + "IoU.path": 0.2234000015258789, + "IoU.stairs": 0.2434000015258789, + "IoU.runway": 0.6179999923706054, + "IoU.case": 0.5702999877929688, + "IoU.pool table": 0.9248999786376954, + "IoU.pillow": 0.562599983215332, + "IoU.screen door": 0.6806999969482422, + "IoU.stairway": 0.32459999084472657, + "IoU.river": 0.19969999313354492, + "IoU.bridge": 0.4843000030517578, + "IoU.bookcase": 0.3363999938964844, + "IoU.blind": 0.41069999694824216, + "IoU.coffee table": 0.6234999847412109, + "IoU.toilet": 0.8433999633789062, + "IoU.flower": 0.37090000152587893, + "IoU.book": 0.4518999862670898, + "IoU.hill": 0.1459000015258789, + "IoU.bench": 0.3806999969482422, + "IoU.countertop": 0.5818999862670898, + "IoU.stove": 0.7522000122070313, + "IoU.palm": 0.5022999954223633, + "IoU.kitchen island": 0.40099998474121096, + "IoU.computer": 0.648499984741211, + "IoU.swivel chair": 0.5156000137329102, + "IoU.boat": 0.7526000213623046, + "IoU.bar": 0.4929999923706055, + "IoU.arcade machine": 0.5495000076293945, + "IoU.hovel": 0.38490001678466795, + "IoU.bus": 0.9048000335693359, + "IoU.towel": 0.6420999908447266, + "IoU.light": 0.31260000228881835, + "IoU.truck": 0.18170000076293946, + "IoU.tower": 0.22360000610351563, + "IoU.chandelier": 0.6322999954223633, + "IoU.awning": 0.23079999923706054, + "IoU.streetlight": 0.15039999961853026, + "IoU.booth": 0.3275, + "IoU.television receiver": 0.6581999969482422, + "IoU.airplane": 0.571500015258789, + "IoU.dirt track": 0.09630000114440918, + "IoU.apparel": 0.359900016784668, + "IoU.pole": 0.14880000114440917, + "IoU.land": 0.03589999914169312, + "IoU.bannister": 0.0671999979019165, + "IoU.escalator": 0.5495000076293945, + "IoU.ottoman": 0.48639999389648436, + "IoU.bottle": 0.2968000030517578, + "IoU.buffet": 0.3086000061035156, + "IoU.poster": 0.21510000228881837, + "IoU.stage": 0.18260000228881837, + "IoU.van": 0.3838000106811523, + "IoU.ship": 0.10680000305175781, + "IoU.fountain": 0.21469999313354493, + "IoU.conveyer belt": 0.7031999969482422, + "IoU.canopy": 0.22149999618530272, + "IoU.washer": 0.7080000305175781, + "IoU.plaything": 0.28, + "IoU.swimming pool": 0.7466999816894532, + "IoU.stool": 0.38549999237060545, + "IoU.barrel": 0.5893000030517578, + "IoU.basket": 0.30950000762939456, + "IoU.waterfall": 0.43590000152587893, + "IoU.tent": 0.9548999786376953, + "IoU.bag": 0.16170000076293944, + "IoU.minibike": 0.6994000244140625, + "IoU.cradle": 0.8148999786376954, + "IoU.oven": 0.24649999618530274, + "IoU.ball": 0.5504999923706054, + "IoU.food": 0.4859999847412109, + "IoU.step": 0.08390000343322754, + "IoU.tank": 0.5668999862670898, + "IoU.trade name": 0.24159999847412109, + "IoU.microwave": 0.46529998779296877, + "IoU.pot": 0.467599983215332, + "IoU.animal": 0.5786000061035156, + "IoU.bicycle": 0.5481000137329102, + "IoU.lake": 0.43020000457763674, + "IoU.dishwasher": 0.6304999923706055, + "IoU.screen": 0.5818999862670898, + "IoU.blanket": 0.12710000038146974, + "IoU.sculpture": 0.6480999755859375, + "IoU.hood": 0.525, + "IoU.sconce": 0.28959999084472654, + "IoU.vase": 0.32869998931884764, + "IoU.traffic light": 0.22809999465942382, + "IoU.tray": 0.02559999942779541, + "IoU.ashcan": 0.41650001525878905, + "IoU.fan": 0.43520000457763675, + "IoU.pier": 0.28579999923706056, + "IoU.crt screen": 0.06840000152587891, + "IoU.plate": 0.5325999832153321, + "IoU.monitor": 0.460099983215332, + "IoU.bulletin board": 0.3820000076293945, + "IoU.shower": 0.005899999737739563, + "IoU.radiator": 0.5275, + "IoU.glass": 0.04550000190734863, + "IoU.clock": 0.2443000030517578, + "IoU.flag": 0.3529999923706055, + "Acc.wall": 0.8988999938964843, + "Acc.building": 0.9315000152587891, + "Acc.sky": 0.9769000244140625, + "Acc.floor": 0.9136000061035157, + "Acc.tree": 0.8723000335693359, + "Acc.ceiling": 0.93, + "Acc.road": 0.9076999664306641, + "Acc.bed ": 0.9570999908447265, + "Acc.windowpane": 0.7708000183105469, + "Acc.grass": 0.8154000091552734, + "Acc.cabinet": 0.7433999633789062, + "Acc.sidewalk": 0.7848000335693359, + "Acc.person": 0.9123999786376953, + "Acc.earth": 0.5463000106811523, + "Acc.door": 0.6558999633789062, + "Acc.table": 0.7604000091552734, + "Acc.mountain": 0.7218000030517578, + "Acc.plant": 0.6561000061035156, + "Acc.curtain": 0.8519999694824218, + "Acc.chair": 0.6626000213623047, + "Acc.car": 0.924000015258789, + "Acc.water": 0.7797000122070312, + "Acc.painting": 0.8383999633789062, + "Acc.sofa": 0.882300033569336, + "Acc.shelf": 0.6220999908447266, + "Acc.house": 0.5679000091552734, + "Acc.sea": 0.8641999816894531, + "Acc.mirror": 0.7598000335693359, + "Acc.rug": 0.7269000244140625, + "Acc.field": 0.5545000076293946, + "Acc.armchair": 0.5879000091552734, + "Acc.seat": 0.8286000061035156, + "Acc.fence": 0.5320000076293945, + "Acc.desk": 0.6612999725341797, + "Acc.rock": 0.6222999954223633, + "Acc.wardrobe": 0.6681999969482422, + "Acc.lamp": 0.6480999755859375, + "Acc.bathtub": 0.8891999816894531, + "Acc.railing": 0.5333000183105469, + "Acc.cushion": 0.7136000061035156, + "Acc.base": 0.4881000137329102, + "Acc.box": 0.3159000015258789, + "Acc.column": 0.5891999816894531, + "Acc.signboard": 0.4540000152587891, + "Acc.chest of drawers": 0.5352999877929687, + "Acc.counter": 0.5022000122070313, + "Acc.sand": 0.6052999877929688, + "Acc.sink": 0.7644000244140625, + "Acc.skyscraper": 0.6172999954223632, + "Acc.fireplace": 0.8652999877929688, + "Acc.refrigerator": 0.8333000183105469, + "Acc.grandstand": 0.7237999725341797, + "Acc.path": 0.29649999618530276, + "Acc.stairs": 0.3156999969482422, + "Acc.runway": 0.7916000366210938, + "Acc.case": 0.7598000335693359, + "Acc.pool table": 0.9591000366210938, + "Acc.pillow": 0.6452999877929687, + "Acc.screen door": 0.7926000213623047, + "Acc.stairway": 0.44810001373291014, + "Acc.river": 0.3634000015258789, + "Acc.bridge": 0.5566999816894531, + "Acc.bookcase": 0.5745000076293946, + "Acc.blind": 0.45369998931884764, + "Acc.coffee table": 0.7641999816894531, + "Acc.toilet": 0.8876999664306641, + "Acc.flower": 0.4933000183105469, + "Acc.book": 0.6, + "Acc.hill": 0.22829999923706054, + "Acc.bench": 0.4416999816894531, + "Acc.countertop": 0.7129000091552734, + "Acc.stove": 0.8216999816894531, + "Acc.palm": 0.6813999938964844, + "Acc.kitchen island": 0.5272000122070313, + "Acc.computer": 0.7408000183105469, + "Acc.swivel chair": 0.6411000061035156, + "Acc.boat": 0.8147000122070313, + "Acc.bar": 0.5961000061035157, + "Acc.arcade machine": 0.5841999816894531, + "Acc.hovel": 0.4134999847412109, + "Acc.bus": 0.9352999877929687, + "Acc.towel": 0.7523999786376954, + "Acc.light": 0.3340000152587891, + "Acc.truck": 0.23649999618530274, + "Acc.tower": 0.34599998474121096, + "Acc.chandelier": 0.7894999694824218, + "Acc.awning": 0.26190000534057617, + "Acc.streetlight": 0.18059999465942383, + "Acc.booth": 0.37720001220703125, + "Acc.television receiver": 0.7355000305175782, + "Acc.airplane": 0.627400016784668, + "Acc.dirt track": 0.3064999961853027, + "Acc.apparel": 0.4906000137329102, + "Acc.pole": 0.18149999618530274, + "Acc.land": 0.04539999961853027, + "Acc.bannister": 0.07920000076293945, + "Acc.escalator": 0.7666000366210938, + "Acc.ottoman": 0.6215999984741211, + "Acc.bottle": 0.4070999908447266, + "Acc.buffet": 0.36450000762939455, + "Acc.poster": 0.39560001373291015, + "Acc.stage": 0.24309999465942383, + "Acc.van": 0.4686000061035156, + "Acc.ship": 0.11350000381469727, + "Acc.fountain": 0.2168000030517578, + "Acc.conveyer belt": 0.9087000274658203, + "Acc.canopy": 0.23670000076293946, + "Acc.washer": 0.7273999786376953, + "Acc.plaything": 0.3686000061035156, + "Acc.swimming pool": 0.8865000152587891, + "Acc.stool": 0.4597999954223633, + "Acc.barrel": 0.6363999938964844, + "Acc.basket": 0.3827000045776367, + "Acc.waterfall": 0.5288999938964843, + "Acc.tent": 0.9708999633789063, + "Acc.bag": 0.1890999984741211, + "Acc.minibike": 0.7711000061035156, + "Acc.cradle": 0.9576000213623047, + "Acc.oven": 0.5845000076293946, + "Acc.ball": 0.6202000045776367, + "Acc.food": 0.5572999954223633, + "Acc.step": 0.10800000190734864, + "Acc.tank": 0.6393999862670898, + "Acc.trade name": 0.2631999969482422, + "Acc.microwave": 0.5118000030517578, + "Acc.pot": 0.5345000076293945, + "Acc.animal": 0.601599998474121, + "Acc.bicycle": 0.6823000335693359, + "Acc.lake": 0.5352000045776367, + "Acc.dishwasher": 0.7012999725341796, + "Acc.screen": 0.711500015258789, + "Acc.blanket": 0.1406999969482422, + "Acc.sculpture": 0.7273000335693359, + "Acc.hood": 0.6572000122070313, + "Acc.sconce": 0.35880001068115236, + "Acc.vase": 0.4097999954223633, + "Acc.traffic light": 0.3111000061035156, + "Acc.tray": 0.029700000286102295, + "Acc.ashcan": 0.5465999984741211, + "Acc.fan": 0.4943000030517578, + "Acc.pier": 0.40419998168945315, + "Acc.crt screen": 0.09960000038146972, + "Acc.plate": 0.6727999877929688, + "Acc.monitor": 0.717699966430664, + "Acc.bulletin board": 0.45110000610351564, + "Acc.shower": 0.009900000095367432, + "Acc.radiator": 0.5747000122070313, + "Acc.glass": 0.04639999866485596, + "Acc.clock": 0.25459999084472656, + "Acc.flag": 0.380099983215332 + } + }, + "106": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8299, + "mIoU": 0.473, + "mAcc": 0.5615, + "IoU.wall": 0.7687000274658203, + "IoU.building": 0.8216999816894531, + "IoU.sky": 0.9277999877929688, + "IoU.floor": 0.8045999908447266, + "IoU.tree": 0.7248999786376953, + "IoU.ceiling": 0.8294999694824219, + "IoU.road": 0.8262000274658203, + "IoU.bed ": 0.8891000366210937, + "IoU.windowpane": 0.6191999816894531, + "IoU.grass": 0.6816999816894531, + "IoU.cabinet": 0.6390999984741211, + "IoU.sidewalk": 0.6437999725341796, + "IoU.person": 0.8037000274658204, + "IoU.earth": 0.3818000030517578, + "IoU.door": 0.5029000091552734, + "IoU.table": 0.6052000045776367, + "IoU.mountain": 0.5768999862670898, + "IoU.plant": 0.5252000045776367, + "IoU.curtain": 0.7469000244140624, + "IoU.chair": 0.5409999847412109, + "IoU.car": 0.834800033569336, + "IoU.water": 0.5947000122070313, + "IoU.painting": 0.7086000061035156, + "IoU.sofa": 0.6698999786376953, + "IoU.shelf": 0.4304999923706055, + "IoU.house": 0.40869998931884766, + "IoU.sea": 0.6901999664306641, + "IoU.mirror": 0.6683999633789063, + "IoU.rug": 0.6118000030517579, + "IoU.field": 0.33610000610351565, + "IoU.armchair": 0.38229999542236326, + "IoU.seat": 0.6641000366210937, + "IoU.fence": 0.40349998474121096, + "IoU.desk": 0.47650001525878904, + "IoU.rock": 0.4588999938964844, + "IoU.wardrobe": 0.5734999847412109, + "IoU.lamp": 0.5343999862670898, + "IoU.bathtub": 0.8301999664306641, + "IoU.railing": 0.39869998931884765, + "IoU.cushion": 0.5795000076293946, + "IoU.base": 0.30040000915527343, + "IoU.box": 0.23780000686645508, + "IoU.column": 0.4534000015258789, + "IoU.signboard": 0.33610000610351565, + "IoU.chest of drawers": 0.3440999984741211, + "IoU.counter": 0.3833000183105469, + "IoU.sand": 0.4718000030517578, + "IoU.sink": 0.6983000183105469, + "IoU.skyscraper": 0.5065000152587891, + "IoU.fireplace": 0.7341000366210938, + "IoU.refrigerator": 0.7608000183105469, + "IoU.grandstand": 0.5152000045776367, + "IoU.path": 0.236200008392334, + "IoU.stairs": 0.16299999237060547, + "IoU.runway": 0.6315999984741211, + "IoU.case": 0.5459999847412109, + "IoU.pool table": 0.9243000030517579, + "IoU.pillow": 0.4983000183105469, + "IoU.screen door": 0.7201000213623047, + "IoU.stairway": 0.28719999313354494, + "IoU.river": 0.18299999237060546, + "IoU.bridge": 0.5666999816894531, + "IoU.bookcase": 0.3121999931335449, + "IoU.blind": 0.40110000610351565, + "IoU.coffee table": 0.6188000106811523, + "IoU.toilet": 0.8297000122070313, + "IoU.flower": 0.3902000045776367, + "IoU.book": 0.42540000915527343, + "IoU.hill": 0.14329999923706055, + "IoU.bench": 0.4477000045776367, + "IoU.countertop": 0.5659999847412109, + "IoU.stove": 0.7508999633789063, + "IoU.palm": 0.44029998779296875, + "IoU.kitchen island": 0.4386999893188477, + "IoU.computer": 0.7473000335693359, + "IoU.swivel chair": 0.492400016784668, + "IoU.boat": 0.6129000091552734, + "IoU.bar": 0.5197999954223633, + "IoU.arcade machine": 0.37209999084472656, + "IoU.hovel": 0.22870000839233398, + "IoU.bus": 0.9075, + "IoU.towel": 0.6727999877929688, + "IoU.light": 0.315, + "IoU.truck": 0.176200008392334, + "IoU.tower": 0.165, + "IoU.chandelier": 0.6104999923706055, + "IoU.awning": 0.20950000762939452, + "IoU.streetlight": 0.1559000015258789, + "IoU.booth": 0.33610000610351565, + "IoU.television receiver": 0.6476999664306641, + "IoU.airplane": 0.5611000061035156, + "IoU.dirt track": 0.13180000305175782, + "IoU.apparel": 0.34509998321533203, + "IoU.pole": 0.09859999656677246, + "IoU.land": 0.03819999933242798, + "IoU.bannister": 0.09649999618530274, + "IoU.escalator": 0.549099998474121, + "IoU.ottoman": 0.46099998474121096, + "IoU.bottle": 0.21270000457763671, + "IoU.buffet": 0.3256999969482422, + "IoU.poster": 0.11760000228881835, + "IoU.stage": 0.1402999973297119, + "IoU.van": 0.3484000015258789, + "IoU.ship": 0.07760000228881836, + "IoU.fountain": 0.23020000457763673, + "IoU.conveyer belt": 0.7283000183105469, + "IoU.canopy": 0.1715999984741211, + "IoU.washer": 0.7084999847412109, + "IoU.plaything": 0.26600000381469724, + "IoU.swimming pool": 0.7502999877929688, + "IoU.stool": 0.3777000045776367, + "IoU.barrel": 0.5927999877929687, + "IoU.basket": 0.34380001068115235, + "IoU.waterfall": 0.44209999084472656, + "IoU.tent": 0.9490000152587891, + "IoU.bag": 0.15390000343322754, + "IoU.minibike": 0.6309999847412109, + "IoU.cradle": 0.8011000061035156, + "IoU.oven": 0.3, + "IoU.ball": 0.5436999893188477, + "IoU.food": 0.42139999389648436, + "IoU.step": 0.12539999961853027, + "IoU.tank": 0.5431000137329102, + "IoU.trade name": 0.16329999923706054, + "IoU.microwave": 0.6945999908447266, + "IoU.pot": 0.4506999969482422, + "IoU.animal": 0.5952999877929688, + "IoU.bicycle": 0.5022000122070313, + "IoU.lake": 0.37740001678466795, + "IoU.dishwasher": 0.6090000152587891, + "IoU.screen": 0.5963000106811523, + "IoU.blanket": 0.11260000228881836, + "IoU.sculpture": 0.6520999908447266, + "IoU.hood": 0.5204000091552734, + "IoU.sconce": 0.27280000686645506, + "IoU.vase": 0.33229999542236327, + "IoU.traffic light": 0.21899999618530275, + "IoU.tray": 0.034200000762939456, + "IoU.ashcan": 0.35080001831054686, + "IoU.fan": 0.4709000015258789, + "IoU.pier": 0.2925, + "IoU.crt screen": 0.021800000667572022, + "IoU.plate": 0.5070999908447266, + "IoU.monitor": 0.48189998626708985, + "IoU.bulletin board": 0.2538999938964844, + "IoU.shower": 0.010499999523162842, + "IoU.radiator": 0.560099983215332, + "IoU.glass": 0.09869999885559082, + "IoU.clock": 0.2493000030517578, + "IoU.flag": 0.3865000152587891, + "Acc.wall": 0.8976999664306641, + "Acc.building": 0.9445999908447266, + "Acc.sky": 0.9834999847412109, + "Acc.floor": 0.925199966430664, + "Acc.tree": 0.8240000152587891, + "Acc.ceiling": 0.9330000305175781, + "Acc.road": 0.9109999847412109, + "Acc.bed ": 0.9591999816894531, + "Acc.windowpane": 0.7958999633789062, + "Acc.grass": 0.8351999664306641, + "Acc.cabinet": 0.7647000122070312, + "Acc.sidewalk": 0.7870999908447266, + "Acc.person": 0.8836000061035156, + "Acc.earth": 0.5536000061035157, + "Acc.door": 0.6109000015258789, + "Acc.table": 0.7876999664306641, + "Acc.mountain": 0.725999984741211, + "Acc.plant": 0.625, + "Acc.curtain": 0.8543000030517578, + "Acc.chair": 0.6393999862670898, + "Acc.car": 0.9015000152587891, + "Acc.water": 0.800199966430664, + "Acc.painting": 0.8177999877929687, + "Acc.sofa": 0.8812999725341797, + "Acc.shelf": 0.6163000106811524, + "Acc.house": 0.5281999969482422, + "Acc.sea": 0.8770999908447266, + "Acc.mirror": 0.7298999786376953, + "Acc.rug": 0.7001999664306641, + "Acc.field": 0.5008000183105469, + "Acc.armchair": 0.47709999084472654, + "Acc.seat": 0.8402999877929688, + "Acc.fence": 0.5359000015258789, + "Acc.desk": 0.6011999893188477, + "Acc.rock": 0.6147999954223633, + "Acc.wardrobe": 0.6830999755859375, + "Acc.lamp": 0.6054000091552735, + "Acc.bathtub": 0.8590000152587891, + "Acc.railing": 0.5399000167846679, + "Acc.cushion": 0.6825, + "Acc.base": 0.40189998626708984, + "Acc.box": 0.29520000457763673, + "Acc.column": 0.5256999969482422, + "Acc.signboard": 0.43470001220703125, + "Acc.chest of drawers": 0.5238999938964843, + "Acc.counter": 0.5120000076293946, + "Acc.sand": 0.5997000122070313, + "Acc.sink": 0.7525, + "Acc.skyscraper": 0.5622000122070312, + "Acc.fireplace": 0.8231999969482422, + "Acc.refrigerator": 0.7919999694824219, + "Acc.grandstand": 0.7140000152587891, + "Acc.path": 0.3040999984741211, + "Acc.stairs": 0.20190000534057617, + "Acc.runway": 0.8254000091552735, + "Acc.case": 0.7576999664306641, + "Acc.pool table": 0.9583999633789062, + "Acc.pillow": 0.5668999862670898, + "Acc.screen door": 0.7630000305175781, + "Acc.stairway": 0.46650001525878904, + "Acc.river": 0.28079999923706056, + "Acc.bridge": 0.6530000305175782, + "Acc.bookcase": 0.5083000183105468, + "Acc.blind": 0.43200000762939456, + "Acc.coffee table": 0.7491000366210937, + "Acc.toilet": 0.8712999725341797, + "Acc.flower": 0.523499984741211, + "Acc.book": 0.6, + "Acc.hill": 0.1972999954223633, + "Acc.bench": 0.48470001220703124, + "Acc.countertop": 0.7191000366210938, + "Acc.stove": 0.8091000366210938, + "Acc.palm": 0.5583000183105469, + "Acc.kitchen island": 0.6166999816894532, + "Acc.computer": 0.844800033569336, + "Acc.swivel chair": 0.5970000076293945, + "Acc.boat": 0.6575, + "Acc.bar": 0.610099983215332, + "Acc.arcade machine": 0.38869998931884764, + "Acc.hovel": 0.24309999465942383, + "Acc.bus": 0.9383999633789063, + "Acc.towel": 0.7972000122070313, + "Acc.light": 0.3497999954223633, + "Acc.truck": 0.2140999984741211, + "Acc.tower": 0.21799999237060547, + "Acc.chandelier": 0.7820999908447266, + "Acc.awning": 0.23079999923706054, + "Acc.streetlight": 0.1956999969482422, + "Acc.booth": 0.3736000061035156, + "Acc.television receiver": 0.7063999938964843, + "Acc.airplane": 0.6181000137329101, + "Acc.dirt track": 0.22260000228881835, + "Acc.apparel": 0.44299999237060544, + "Acc.pole": 0.11760000228881835, + "Acc.land": 0.05699999809265137, + "Acc.bannister": 0.11829999923706054, + "Acc.escalator": 0.7233000183105469, + "Acc.ottoman": 0.5888999938964844, + "Acc.bottle": 0.2609000015258789, + "Acc.buffet": 0.3845000076293945, + "Acc.poster": 0.16079999923706054, + "Acc.stage": 0.17979999542236327, + "Acc.van": 0.40970001220703123, + "Acc.ship": 0.0909000015258789, + "Acc.fountain": 0.2325, + "Acc.conveyer belt": 0.9144000244140625, + "Acc.canopy": 0.17819999694824218, + "Acc.washer": 0.7290000152587891, + "Acc.plaything": 0.37349998474121093, + "Acc.swimming pool": 0.8630000305175781, + "Acc.stool": 0.44779998779296876, + "Acc.barrel": 0.6316999816894531, + "Acc.basket": 0.4341999816894531, + "Acc.waterfall": 0.5581999969482422, + "Acc.tent": 0.9572000122070312, + "Acc.bag": 0.18219999313354493, + "Acc.minibike": 0.6626000213623047, + "Acc.cradle": 0.9495999908447266, + "Acc.oven": 0.4384000015258789, + "Acc.ball": 0.6390999984741211, + "Acc.food": 0.47959999084472654, + "Acc.step": 0.15369999885559082, + "Acc.tank": 0.5986999893188476, + "Acc.trade name": 0.17020000457763673, + "Acc.microwave": 0.7668000030517578, + "Acc.pot": 0.49900001525878906, + "Acc.animal": 0.6145000076293945, + "Acc.bicycle": 0.6079999923706054, + "Acc.lake": 0.41869998931884767, + "Acc.dishwasher": 0.6981999969482422, + "Acc.screen": 0.81, + "Acc.blanket": 0.136899995803833, + "Acc.sculpture": 0.6820999908447266, + "Acc.hood": 0.615, + "Acc.sconce": 0.3163999938964844, + "Acc.vase": 0.4115999984741211, + "Acc.traffic light": 0.2805999946594238, + "Acc.tray": 0.04059999942779541, + "Acc.ashcan": 0.4741999816894531, + "Acc.fan": 0.5329999923706055, + "Acc.pier": 0.3865999984741211, + "Acc.crt screen": 0.03279999971389771, + "Acc.plate": 0.6320999908447266, + "Acc.monitor": 0.5838999938964844, + "Acc.bulletin board": 0.3128000068664551, + "Acc.shower": 0.03119999885559082, + "Acc.radiator": 0.6229999923706054, + "Acc.glass": 0.10399999618530273, + "Acc.clock": 0.2643000030517578, + "Acc.flag": 0.4109999847412109 + } + }, + "107": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8161, + "mIoU": 0.45280000000000004, + "mAcc": 0.5687, + "IoU.wall": 0.7506999969482422, + "IoU.building": 0.8173000335693359, + "IoU.sky": 0.9362000274658203, + "IoU.floor": 0.7938999938964844, + "IoU.tree": 0.7356999969482422, + "IoU.ceiling": 0.8227999877929687, + "IoU.road": 0.8237999725341797, + "IoU.bed ": 0.8630999755859375, + "IoU.windowpane": 0.590999984741211, + "IoU.grass": 0.6669000244140625, + "IoU.cabinet": 0.5886000061035156, + "IoU.sidewalk": 0.6266999816894532, + "IoU.person": 0.7712000274658203, + "IoU.earth": 0.35150001525878904, + "IoU.door": 0.445, + "IoU.table": 0.5495000076293945, + "IoU.mountain": 0.580999984741211, + "IoU.plant": 0.5011999893188477, + "IoU.curtain": 0.7106999969482422, + "IoU.chair": 0.5079999923706054, + "IoU.car": 0.8268000030517578, + "IoU.water": 0.5529000091552735, + "IoU.painting": 0.6938999938964844, + "IoU.sofa": 0.63, + "IoU.shelf": 0.39919998168945314, + "IoU.house": 0.5145000076293945, + "IoU.sea": 0.6213000106811524, + "IoU.mirror": 0.6436000061035156, + "IoU.rug": 0.6077999877929687, + "IoU.field": 0.30590000152587893, + "IoU.armchair": 0.38110000610351563, + "IoU.seat": 0.6111999893188477, + "IoU.fence": 0.29420000076293945, + "IoU.desk": 0.4565999984741211, + "IoU.rock": 0.41400001525878904, + "IoU.wardrobe": 0.4816999816894531, + "IoU.lamp": 0.5356000137329101, + "IoU.bathtub": 0.7538999938964843, + "IoU.railing": 0.3395999908447266, + "IoU.cushion": 0.5222999954223633, + "IoU.base": 0.27329999923706055, + "IoU.box": 0.251200008392334, + "IoU.column": 0.4416999816894531, + "IoU.signboard": 0.3295999908447266, + "IoU.chest of drawers": 0.31020000457763675, + "IoU.counter": 0.30059999465942383, + "IoU.sand": 0.42150001525878905, + "IoU.sink": 0.647300033569336, + "IoU.skyscraper": 0.540099983215332, + "IoU.fireplace": 0.6815000152587891, + "IoU.refrigerator": 0.7591999816894531, + "IoU.grandstand": 0.49139999389648437, + "IoU.path": 0.22840000152587892, + "IoU.stairs": 0.2518000030517578, + "IoU.runway": 0.7026000213623047, + "IoU.case": 0.5588000106811524, + "IoU.pool table": 0.9112999725341797, + "IoU.pillow": 0.5416999816894531, + "IoU.screen door": 0.4102999877929687, + "IoU.stairway": 0.31879999160766603, + "IoU.river": 0.16450000762939454, + "IoU.bridge": 0.6354999923706055, + "IoU.bookcase": 0.35830001831054686, + "IoU.blind": 0.4284000015258789, + "IoU.coffee table": 0.5911999893188477, + "IoU.toilet": 0.7926999664306641, + "IoU.flower": 0.3007999992370605, + "IoU.book": 0.4218000030517578, + "IoU.hill": 0.11270000457763672, + "IoU.bench": 0.45029998779296876, + "IoU.countertop": 0.479900016784668, + "IoU.stove": 0.6683000183105469, + "IoU.palm": 0.47049999237060547, + "IoU.kitchen island": 0.3427999877929688, + "IoU.computer": 0.7118000030517578, + "IoU.swivel chair": 0.45619998931884764, + "IoU.boat": 0.7118000030517578, + "IoU.bar": 0.46810001373291016, + "IoU.arcade machine": 0.34310001373291016, + "IoU.hovel": 0.316200008392334, + "IoU.bus": 0.8473000335693359, + "IoU.towel": 0.5409000015258789, + "IoU.light": 0.3057999992370605, + "IoU.truck": 0.30139999389648436, + "IoU.tower": 0.3065999984741211, + "IoU.chandelier": 0.6208000183105469, + "IoU.awning": 0.3372000122070313, + "IoU.streetlight": 0.12859999656677246, + "IoU.booth": 0.3103000068664551, + "IoU.television receiver": 0.6308000183105469, + "IoU.airplane": 0.5704000091552734, + "IoU.dirt track": 0.05679999828338623, + "IoU.apparel": 0.31090000152587893, + "IoU.pole": 0.13329999923706054, + "IoU.land": 0.04880000114440918, + "IoU.bannister": 0.114399995803833, + "IoU.escalator": 0.23370000839233399, + "IoU.ottoman": 0.3809000015258789, + "IoU.bottle": 0.2669000053405762, + "IoU.buffet": 0.36970001220703125, + "IoU.poster": 0.316200008392334, + "IoU.stage": 0.16920000076293945, + "IoU.van": 0.445099983215332, + "IoU.ship": 0.6608999633789062, + "IoU.fountain": 0.18719999313354493, + "IoU.conveyer belt": 0.6861000061035156, + "IoU.canopy": 0.21540000915527344, + "IoU.washer": 0.7237999725341797, + "IoU.plaything": 0.2427000045776367, + "IoU.swimming pool": 0.6283000183105468, + "IoU.stool": 0.231299991607666, + "IoU.barrel": 0.4645999908447266, + "IoU.basket": 0.19450000762939454, + "IoU.waterfall": 0.6179999923706054, + "IoU.tent": 0.9094999694824218, + "IoU.bag": 0.09949999809265136, + "IoU.minibike": 0.596500015258789, + "IoU.cradle": 0.7575, + "IoU.oven": 0.15880000114440918, + "IoU.ball": 0.41939998626708985, + "IoU.food": 0.4702000045776367, + "IoU.step": 0.059600000381469724, + "IoU.tank": 0.544000015258789, + "IoU.trade name": 0.22709999084472657, + "IoU.microwave": 0.3502000045776367, + "IoU.pot": 0.33630001068115234, + "IoU.animal": 0.5893000030517578, + "IoU.bicycle": 0.5145999908447265, + "IoU.lake": 0.6263000106811524, + "IoU.dishwasher": 0.5263999938964844, + "IoU.screen": 0.6022000122070312, + "IoU.blanket": 0.07829999923706055, + "IoU.sculpture": 0.42400001525878905, + "IoU.hood": 0.4981999969482422, + "IoU.sconce": 0.35220001220703123, + "IoU.vase": 0.26209999084472657, + "IoU.traffic light": 0.24520000457763672, + "IoU.tray": 0.03769999980926514, + "IoU.ashcan": 0.30190000534057615, + "IoU.fan": 0.482400016784668, + "IoU.pier": 0.27280000686645506, + "IoU.crt screen": 0.023299999237060547, + "IoU.plate": 0.44040000915527344, + "IoU.monitor": 0.03630000114440918, + "IoU.bulletin board": 0.3195000076293945, + "IoU.shower": 0.0010999999940395355, + "IoU.radiator": 0.5247000122070312, + "IoU.glass": 0.065, + "IoU.clock": 0.17709999084472655, + "IoU.flag": 0.3788999938964844, + "Acc.wall": 0.8726000213623046, + "Acc.building": 0.9186000061035157, + "Acc.sky": 0.9769999694824218, + "Acc.floor": 0.9006999969482422, + "Acc.tree": 0.864800033569336, + "Acc.ceiling": 0.9111000061035156, + "Acc.road": 0.898499984741211, + "Acc.bed ": 0.9480999755859375, + "Acc.windowpane": 0.7427999877929687, + "Acc.grass": 0.8080000305175781, + "Acc.cabinet": 0.7081999969482422, + "Acc.sidewalk": 0.7719999694824219, + "Acc.person": 0.9068000030517578, + "Acc.earth": 0.4990999984741211, + "Acc.door": 0.6234000015258789, + "Acc.table": 0.705999984741211, + "Acc.mountain": 0.7056999969482421, + "Acc.plant": 0.6163999938964844, + "Acc.curtain": 0.8302999877929688, + "Acc.chair": 0.6358000183105469, + "Acc.car": 0.9162000274658203, + "Acc.water": 0.7088999938964844, + "Acc.painting": 0.8348999786376953, + "Acc.sofa": 0.8156999969482421, + "Acc.shelf": 0.6008000183105469, + "Acc.house": 0.6712000274658203, + "Acc.sea": 0.8405000305175782, + "Acc.mirror": 0.7233000183105469, + "Acc.rug": 0.6719999694824219, + "Acc.field": 0.5202000045776367, + "Acc.armchair": 0.5566999816894531, + "Acc.seat": 0.8130000305175781, + "Acc.fence": 0.3875, + "Acc.desk": 0.6461000061035156, + "Acc.rock": 0.6526000213623047, + "Acc.wardrobe": 0.6379000091552735, + "Acc.lamp": 0.6651000213623047, + "Acc.bathtub": 0.8258000183105468, + "Acc.railing": 0.5036000061035156, + "Acc.cushion": 0.6475, + "Acc.base": 0.43220001220703125, + "Acc.box": 0.3525, + "Acc.column": 0.5620000076293945, + "Acc.signboard": 0.42880001068115237, + "Acc.chest of drawers": 0.5515000152587891, + "Acc.counter": 0.4279999923706055, + "Acc.sand": 0.6120000076293945, + "Acc.sink": 0.7343000030517578, + "Acc.skyscraper": 0.615999984741211, + "Acc.fireplace": 0.905, + "Acc.refrigerator": 0.8548999786376953, + "Acc.grandstand": 0.7673999786376953, + "Acc.path": 0.30510000228881834, + "Acc.stairs": 0.3195000076293945, + "Acc.runway": 0.8283000183105469, + "Acc.case": 0.7480000305175781, + "Acc.pool table": 0.9608000183105468, + "Acc.pillow": 0.6537000274658203, + "Acc.screen door": 0.4884999847412109, + "Acc.stairway": 0.4366999816894531, + "Acc.river": 0.31020000457763675, + "Acc.bridge": 0.7681999969482421, + "Acc.bookcase": 0.5811000061035156, + "Acc.blind": 0.494900016784668, + "Acc.coffee table": 0.7626999664306641, + "Acc.toilet": 0.8751000213623047, + "Acc.flower": 0.49540000915527344, + "Acc.book": 0.5793000030517578, + "Acc.hill": 0.2231999969482422, + "Acc.bench": 0.543499984741211, + "Acc.countertop": 0.6252000045776367, + "Acc.stove": 0.7652999877929687, + "Acc.palm": 0.6811000061035156, + "Acc.kitchen island": 0.6537999725341797, + "Acc.computer": 0.8468000030517578, + "Acc.swivel chair": 0.6181999969482422, + "Acc.boat": 0.8444999694824219, + "Acc.bar": 0.6090999984741211, + "Acc.arcade machine": 0.36840000152587893, + "Acc.hovel": 0.5006000137329102, + "Acc.bus": 0.9327999877929688, + "Acc.towel": 0.6781999969482422, + "Acc.light": 0.33490001678466796, + "Acc.truck": 0.42029998779296873, + "Acc.tower": 0.4218000030517578, + "Acc.chandelier": 0.7530000305175781, + "Acc.awning": 0.40330001831054685, + "Acc.streetlight": 0.15170000076293946, + "Acc.booth": 0.4361000061035156, + "Acc.television receiver": 0.7294000244140625, + "Acc.airplane": 0.6454000091552734, + "Acc.dirt track": 0.19149999618530272, + "Acc.apparel": 0.43189998626708986, + "Acc.pole": 0.1718000030517578, + "Acc.land": 0.06760000228881836, + "Acc.bannister": 0.15109999656677245, + "Acc.escalator": 0.27860000610351565, + "Acc.ottoman": 0.47869998931884766, + "Acc.bottle": 0.38310001373291014, + "Acc.buffet": 0.4093000030517578, + "Acc.poster": 0.37540000915527344, + "Acc.stage": 0.3189999961853027, + "Acc.van": 0.545, + "Acc.ship": 0.7820999908447266, + "Acc.fountain": 0.21260000228881837, + "Acc.conveyer belt": 0.8776000213623046, + "Acc.canopy": 0.32740001678466796, + "Acc.washer": 0.7334999847412109, + "Acc.plaything": 0.41779998779296873, + "Acc.swimming pool": 0.7883999633789063, + "Acc.stool": 0.276200008392334, + "Acc.barrel": 0.5820000076293945, + "Acc.basket": 0.26600000381469724, + "Acc.waterfall": 0.6758000183105469, + "Acc.tent": 0.9894999694824219, + "Acc.bag": 0.12159999847412109, + "Acc.minibike": 0.7269000244140625, + "Acc.cradle": 0.9722000122070312, + "Acc.oven": 0.450099983215332, + "Acc.ball": 0.5143000030517578, + "Acc.food": 0.5797999954223633, + "Acc.step": 0.07659999847412109, + "Acc.tank": 0.6365000152587891, + "Acc.trade name": 0.26059999465942385, + "Acc.microwave": 0.397599983215332, + "Acc.pot": 0.3925, + "Acc.animal": 0.6315000152587891, + "Acc.bicycle": 0.7162000274658203, + "Acc.lake": 0.6315999984741211, + "Acc.dishwasher": 0.6061999893188477, + "Acc.screen": 0.9166000366210938, + "Acc.blanket": 0.0925, + "Acc.sculpture": 0.6340999984741211, + "Acc.hood": 0.5720000076293945, + "Acc.sconce": 0.4179000091552734, + "Acc.vase": 0.33540000915527346, + "Acc.traffic light": 0.3788000106811523, + "Acc.tray": 0.050999999046325684, + "Acc.ashcan": 0.4143000030517578, + "Acc.fan": 0.6318999862670899, + "Acc.pier": 0.44540000915527345, + "Acc.crt screen": 0.06590000152587891, + "Acc.plate": 0.5643000030517578, + "Acc.monitor": 0.03869999885559082, + "Acc.bulletin board": 0.4709000015258789, + "Acc.shower": 0.010199999809265137, + "Acc.radiator": 0.6184000015258789, + "Acc.glass": 0.06769999980926514, + "Acc.clock": 0.20200000762939452, + "Acc.flag": 0.41400001525878904 + } + }, + "108": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8210999999999999, + "mIoU": 0.4623, + "mAcc": 0.5819, + "IoU.wall": 0.7583000183105468, + "IoU.building": 0.8240000152587891, + "IoU.sky": 0.9377999877929688, + "IoU.floor": 0.8065000152587891, + "IoU.tree": 0.7370999908447265, + "IoU.ceiling": 0.8208000183105468, + "IoU.road": 0.8291999816894531, + "IoU.bed ": 0.8695999908447266, + "IoU.windowpane": 0.5958000183105469, + "IoU.grass": 0.6798000335693359, + "IoU.cabinet": 0.5920000076293945, + "IoU.sidewalk": 0.64, + "IoU.person": 0.7805000305175781, + "IoU.earth": 0.36630001068115237, + "IoU.door": 0.4538999938964844, + "IoU.table": 0.5556000137329101, + "IoU.mountain": 0.606500015258789, + "IoU.plant": 0.4968000030517578, + "IoU.curtain": 0.7204000091552735, + "IoU.chair": 0.5088999938964843, + "IoU.car": 0.8351999664306641, + "IoU.water": 0.5152000045776367, + "IoU.painting": 0.6715000152587891, + "IoU.sofa": 0.6683000183105469, + "IoU.shelf": 0.4034000015258789, + "IoU.house": 0.5156999969482422, + "IoU.sea": 0.6122000122070312, + "IoU.mirror": 0.6566000366210938, + "IoU.rug": 0.6311999893188477, + "IoU.field": 0.2934000015258789, + "IoU.armchair": 0.4075, + "IoU.seat": 0.629000015258789, + "IoU.fence": 0.3656000137329102, + "IoU.desk": 0.5156000137329102, + "IoU.rock": 0.4281000137329102, + "IoU.wardrobe": 0.5120000076293946, + "IoU.lamp": 0.5506000137329101, + "IoU.bathtub": 0.7766999816894531, + "IoU.railing": 0.34, + "IoU.cushion": 0.5233000183105468, + "IoU.base": 0.30059999465942383, + "IoU.box": 0.24469999313354493, + "IoU.column": 0.4759000015258789, + "IoU.signboard": 0.3547999954223633, + "IoU.chest of drawers": 0.3065999984741211, + "IoU.counter": 0.31860000610351563, + "IoU.sand": 0.43139999389648437, + "IoU.sink": 0.652300033569336, + "IoU.skyscraper": 0.6433999633789063, + "IoU.fireplace": 0.6877999877929688, + "IoU.refrigerator": 0.7616000366210938, + "IoU.grandstand": 0.48939998626708986, + "IoU.path": 0.2518000030517578, + "IoU.stairs": 0.2484000015258789, + "IoU.runway": 0.6288000106811523, + "IoU.case": 0.5886000061035156, + "IoU.pool table": 0.9187000274658204, + "IoU.pillow": 0.5672000122070312, + "IoU.screen door": 0.4725, + "IoU.stairway": 0.3127000045776367, + "IoU.river": 0.22649999618530273, + "IoU.bridge": 0.6419999694824219, + "IoU.bookcase": 0.3566999816894531, + "IoU.blind": 0.38349998474121094, + "IoU.coffee table": 0.5599000167846679, + "IoU.toilet": 0.7538999938964843, + "IoU.flower": 0.30700000762939456, + "IoU.book": 0.45290000915527345, + "IoU.hill": 0.1397000026702881, + "IoU.bench": 0.44970001220703126, + "IoU.countertop": 0.45970001220703127, + "IoU.stove": 0.6745999908447265, + "IoU.palm": 0.47650001525878904, + "IoU.kitchen island": 0.35560001373291017, + "IoU.computer": 0.6375999832153321, + "IoU.swivel chair": 0.5015999984741211, + "IoU.boat": 0.6870999908447266, + "IoU.bar": 0.42889999389648437, + "IoU.arcade machine": 0.38110000610351563, + "IoU.hovel": 0.4570999908447266, + "IoU.bus": 0.8791000366210937, + "IoU.towel": 0.6006000137329102, + "IoU.light": 0.2926000022888184, + "IoU.truck": 0.30760000228881834, + "IoU.tower": 0.320099983215332, + "IoU.chandelier": 0.6437999725341796, + "IoU.awning": 0.3920000076293945, + "IoU.streetlight": 0.14760000228881837, + "IoU.booth": 0.33610000610351565, + "IoU.television receiver": 0.642300033569336, + "IoU.airplane": 0.572599983215332, + "IoU.dirt track": 0.13189999580383302, + "IoU.apparel": 0.32419998168945313, + "IoU.pole": 0.16049999237060547, + "IoU.land": 0.024700000286102294, + "IoU.bannister": 0.10260000228881835, + "IoU.escalator": 0.38299999237060545, + "IoU.ottoman": 0.42459999084472655, + "IoU.bottle": 0.35319999694824217, + "IoU.buffet": 0.4656999969482422, + "IoU.poster": 0.13760000228881836, + "IoU.stage": 0.11279999732971191, + "IoU.van": 0.44009998321533206, + "IoU.ship": 0.41060001373291016, + "IoU.fountain": 0.18979999542236328, + "IoU.conveyer belt": 0.6969000244140625, + "IoU.canopy": 0.2581999969482422, + "IoU.washer": 0.7272000122070312, + "IoU.plaything": 0.2618000030517578, + "IoU.swimming pool": 0.769000015258789, + "IoU.stool": 0.3056999969482422, + "IoU.barrel": 0.37209999084472656, + "IoU.basket": 0.17389999389648436, + "IoU.waterfall": 0.7598999786376953, + "IoU.tent": 0.9494999694824219, + "IoU.bag": 0.08970000267028809, + "IoU.minibike": 0.5281999969482422, + "IoU.cradle": 0.7427999877929687, + "IoU.oven": 0.18270000457763672, + "IoU.ball": 0.43189998626708986, + "IoU.food": 0.4229000091552734, + "IoU.step": 0.059800000190734864, + "IoU.tank": 0.5586000061035157, + "IoU.trade name": 0.26729999542236327, + "IoU.microwave": 0.4068000030517578, + "IoU.pot": 0.36520000457763674, + "IoU.animal": 0.5656999969482421, + "IoU.bicycle": 0.4518999862670898, + "IoU.lake": 0.5816999816894531, + "IoU.dishwasher": 0.4779000091552734, + "IoU.screen": 0.5918999862670898, + "IoU.blanket": 0.08100000381469727, + "IoU.sculpture": 0.4311000061035156, + "IoU.hood": 0.5036000061035156, + "IoU.sconce": 0.32540000915527345, + "IoU.vase": 0.29459999084472654, + "IoU.traffic light": 0.23190000534057617, + "IoU.tray": 0.028199999332427977, + "IoU.ashcan": 0.32099998474121094, + "IoU.fan": 0.5072000122070313, + "IoU.pier": 0.3085000038146973, + "IoU.crt screen": 0.03609999895095825, + "IoU.plate": 0.45130001068115233, + "IoU.monitor": 0.025499999523162842, + "IoU.bulletin board": 0.3561000061035156, + "IoU.shower": 0.002199999988079071, + "IoU.radiator": 0.5765999984741211, + "IoU.glass": 0.07119999885559082, + "IoU.clock": 0.2225, + "IoU.flag": 0.35150001525878904, + "Acc.wall": 0.8748999786376953, + "Acc.building": 0.927300033569336, + "Acc.sky": 0.9756999969482422, + "Acc.floor": 0.9029000091552735, + "Acc.tree": 0.8716999816894532, + "Acc.ceiling": 0.9101000213623047, + "Acc.road": 0.8944000244140625, + "Acc.bed ": 0.9468000030517578, + "Acc.windowpane": 0.7326000213623047, + "Acc.grass": 0.8204000091552734, + "Acc.cabinet": 0.7002999877929688, + "Acc.sidewalk": 0.7866999816894531, + "Acc.person": 0.9108999633789062, + "Acc.earth": 0.5106000137329102, + "Acc.door": 0.6322999954223633, + "Acc.table": 0.7186000061035156, + "Acc.mountain": 0.7070999908447265, + "Acc.plant": 0.610099983215332, + "Acc.curtain": 0.8383000183105469, + "Acc.chair": 0.6386999893188476, + "Acc.car": 0.9266000366210938, + "Acc.water": 0.6569000244140625, + "Acc.painting": 0.8508999633789063, + "Acc.sofa": 0.8133999633789063, + "Acc.shelf": 0.6234000015258789, + "Acc.house": 0.6580999755859375, + "Acc.sea": 0.8987999725341796, + "Acc.mirror": 0.7576000213623046, + "Acc.rug": 0.7044000244140625, + "Acc.field": 0.5034000015258789, + "Acc.armchair": 0.6341999816894531, + "Acc.seat": 0.825999984741211, + "Acc.fence": 0.49200000762939455, + "Acc.desk": 0.7088999938964844, + "Acc.rock": 0.6970999908447265, + "Acc.wardrobe": 0.6468000030517578, + "Acc.lamp": 0.6887000274658203, + "Acc.bathtub": 0.8443000030517578, + "Acc.railing": 0.4861000061035156, + "Acc.cushion": 0.658499984741211, + "Acc.base": 0.45240001678466796, + "Acc.box": 0.33990001678466797, + "Acc.column": 0.5945999908447266, + "Acc.signboard": 0.4690999984741211, + "Acc.chest of drawers": 0.5413000106811523, + "Acc.counter": 0.4291999816894531, + "Acc.sand": 0.6177000045776367, + "Acc.sink": 0.7341999816894531, + "Acc.skyscraper": 0.7280999755859375, + "Acc.fireplace": 0.9223999786376953, + "Acc.refrigerator": 0.8651000213623047, + "Acc.grandstand": 0.6847000122070312, + "Acc.path": 0.3240000152587891, + "Acc.stairs": 0.31739999771118166, + "Acc.runway": 0.798499984741211, + "Acc.case": 0.7615000152587891, + "Acc.pool table": 0.9633000183105469, + "Acc.pillow": 0.6902999877929688, + "Acc.screen door": 0.5081000137329101, + "Acc.stairway": 0.4513999938964844, + "Acc.river": 0.39880001068115234, + "Acc.bridge": 0.8305999755859375, + "Acc.bookcase": 0.5727000045776367, + "Acc.blind": 0.46630001068115234, + "Acc.coffee table": 0.7830000305175782, + "Acc.toilet": 0.8815000152587891, + "Acc.flower": 0.5231000137329102, + "Acc.book": 0.6325999832153321, + "Acc.hill": 0.24610000610351562, + "Acc.bench": 0.5286000061035157, + "Acc.countertop": 0.6054999923706055, + "Acc.stove": 0.7702999877929687, + "Acc.palm": 0.6729000091552735, + "Acc.kitchen island": 0.5936999893188477, + "Acc.computer": 0.7476000213623046, + "Acc.swivel chair": 0.7033999633789062, + "Acc.boat": 0.8519000244140625, + "Acc.bar": 0.56, + "Acc.arcade machine": 0.4102000045776367, + "Acc.hovel": 0.5668000030517578, + "Acc.bus": 0.9348000335693359, + "Acc.towel": 0.7283999633789062, + "Acc.light": 0.3218999862670898, + "Acc.truck": 0.4386999893188477, + "Acc.tower": 0.4109000015258789, + "Acc.chandelier": 0.7947000122070312, + "Acc.awning": 0.4881000137329102, + "Acc.streetlight": 0.17290000915527343, + "Acc.booth": 0.44049999237060544, + "Acc.television receiver": 0.7613999938964844, + "Acc.airplane": 0.6408000183105469, + "Acc.dirt track": 0.19370000839233398, + "Acc.apparel": 0.4633000183105469, + "Acc.pole": 0.20559999465942383, + "Acc.land": 0.036600000858306884, + "Acc.bannister": 0.1372000026702881, + "Acc.escalator": 0.4704000091552734, + "Acc.ottoman": 0.5297000122070312, + "Acc.bottle": 0.5511000061035156, + "Acc.buffet": 0.5747999954223633, + "Acc.poster": 0.18260000228881837, + "Acc.stage": 0.166299991607666, + "Acc.van": 0.5479999923706055, + "Acc.ship": 0.46119998931884765, + "Acc.fountain": 0.21399999618530274, + "Acc.conveyer belt": 0.8805000305175781, + "Acc.canopy": 0.40819999694824216, + "Acc.washer": 0.7326000213623047, + "Acc.plaything": 0.5070999908447266, + "Acc.swimming pool": 0.8602999877929688, + "Acc.stool": 0.4047999954223633, + "Acc.barrel": 0.6152999877929688, + "Acc.basket": 0.24629999160766602, + "Acc.waterfall": 0.821500015258789, + "Acc.tent": 0.9833999633789062, + "Acc.bag": 0.105, + "Acc.minibike": 0.6293000030517578, + "Acc.cradle": 0.9766000366210937, + "Acc.oven": 0.49520000457763674, + "Acc.ball": 0.4754999923706055, + "Acc.food": 0.5147999954223633, + "Acc.step": 0.07400000095367432, + "Acc.tank": 0.6381999969482421, + "Acc.trade name": 0.31909999847412107, + "Acc.microwave": 0.46630001068115234, + "Acc.pot": 0.4072999954223633, + "Acc.animal": 0.6222000122070312, + "Acc.bicycle": 0.691500015258789, + "Acc.lake": 0.6283000183105468, + "Acc.dishwasher": 0.6095999908447266, + "Acc.screen": 0.9491999816894531, + "Acc.blanket": 0.09399999618530273, + "Acc.sculpture": 0.662300033569336, + "Acc.hood": 0.5522999954223633, + "Acc.sconce": 0.4097999954223633, + "Acc.vase": 0.39180000305175783, + "Acc.traffic light": 0.3636000061035156, + "Acc.tray": 0.03670000076293945, + "Acc.ashcan": 0.445, + "Acc.fan": 0.635, + "Acc.pier": 0.46869998931884765, + "Acc.crt screen": 0.11130000114440917, + "Acc.plate": 0.6388000106811523, + "Acc.monitor": 0.038900001049041746, + "Acc.bulletin board": 0.5361000061035156, + "Acc.shower": 0.021400001049041748, + "Acc.radiator": 0.657300033569336, + "Acc.glass": 0.0746999979019165, + "Acc.clock": 0.2521999931335449, + "Acc.flag": 0.43389999389648437 + } + }, + "109": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8266, + "mIoU": 0.47659999999999997, + "mAcc": 0.5928, + "IoU.wall": 0.7655999755859375, + "IoU.building": 0.8251999664306641, + "IoU.sky": 0.9381999969482422, + "IoU.floor": 0.8122000122070312, + "IoU.tree": 0.7431999969482422, + "IoU.ceiling": 0.8277999877929687, + "IoU.road": 0.8341000366210938, + "IoU.bed ": 0.8723000335693359, + "IoU.windowpane": 0.6156999969482422, + "IoU.grass": 0.6948999786376953, + "IoU.cabinet": 0.5968999862670898, + "IoU.sidewalk": 0.6519999694824219, + "IoU.person": 0.7916000366210938, + "IoU.earth": 0.38, + "IoU.door": 0.4768000030517578, + "IoU.table": 0.5711999893188476, + "IoU.mountain": 0.5986000061035156, + "IoU.plant": 0.5122999954223633, + "IoU.curtain": 0.7191999816894531, + "IoU.chair": 0.5206999969482422, + "IoU.car": 0.8341999816894531, + "IoU.water": 0.523499984741211, + "IoU.painting": 0.6923000335693359, + "IoU.sofa": 0.7019999694824218, + "IoU.shelf": 0.41900001525878905, + "IoU.house": 0.5422999954223633, + "IoU.sea": 0.625, + "IoU.mirror": 0.6901999664306641, + "IoU.rug": 0.6533000183105468, + "IoU.field": 0.2918000030517578, + "IoU.armchair": 0.45619998931884764, + "IoU.seat": 0.6472000122070313, + "IoU.fence": 0.3984000015258789, + "IoU.desk": 0.44729999542236326, + "IoU.rock": 0.4677999877929688, + "IoU.wardrobe": 0.5368999862670898, + "IoU.lamp": 0.5313999938964844, + "IoU.bathtub": 0.7687000274658203, + "IoU.railing": 0.35869998931884767, + "IoU.cushion": 0.5402000045776367, + "IoU.base": 0.3064999961853027, + "IoU.box": 0.2275, + "IoU.column": 0.4722999954223633, + "IoU.signboard": 0.3454000091552734, + "IoU.chest of drawers": 0.3283000183105469, + "IoU.counter": 0.37139999389648437, + "IoU.sand": 0.5297999954223633, + "IoU.sink": 0.6644000244140625, + "IoU.skyscraper": 0.5936000061035156, + "IoU.fireplace": 0.7094999694824219, + "IoU.refrigerator": 0.7893000030517578, + "IoU.grandstand": 0.571500015258789, + "IoU.path": 0.21329999923706056, + "IoU.stairs": 0.27290000915527346, + "IoU.runway": 0.6272999954223633, + "IoU.case": 0.6211999893188477, + "IoU.pool table": 0.9269999694824219, + "IoU.pillow": 0.5704999923706054, + "IoU.screen door": 0.5070000076293946, + "IoU.stairway": 0.3265000152587891, + "IoU.river": 0.175, + "IoU.bridge": 0.6181000137329101, + "IoU.bookcase": 0.34709999084472654, + "IoU.blind": 0.400099983215332, + "IoU.coffee table": 0.5979000091552734, + "IoU.toilet": 0.7968000030517578, + "IoU.flower": 0.32240001678466795, + "IoU.book": 0.4456999969482422, + "IoU.hill": 0.11069999694824219, + "IoU.bench": 0.45279998779296876, + "IoU.countertop": 0.49709999084472656, + "IoU.stove": 0.6661000061035156, + "IoU.palm": 0.4672999954223633, + "IoU.kitchen island": 0.3793999862670898, + "IoU.computer": 0.6265000152587891, + "IoU.swivel chair": 0.45349998474121095, + "IoU.boat": 0.7245999908447266, + "IoU.bar": 0.5856999969482422, + "IoU.arcade machine": 0.49209999084472655, + "IoU.hovel": 0.4595000076293945, + "IoU.bus": 0.8976999664306641, + "IoU.towel": 0.6059000015258789, + "IoU.light": 0.33130001068115233, + "IoU.truck": 0.3193000030517578, + "IoU.tower": 0.3490999984741211, + "IoU.chandelier": 0.5970000076293945, + "IoU.awning": 0.2592000007629395, + "IoU.streetlight": 0.1347999954223633, + "IoU.booth": 0.4463999938964844, + "IoU.television receiver": 0.6959999847412109, + "IoU.airplane": 0.5736000061035156, + "IoU.dirt track": 0.04539999961853027, + "IoU.apparel": 0.30620000839233397, + "IoU.pole": 0.15449999809265136, + "IoU.land": 0.02869999885559082, + "IoU.bannister": 0.10369999885559082, + "IoU.escalator": 0.5090999984741211, + "IoU.ottoman": 0.41, + "IoU.bottle": 0.26399999618530273, + "IoU.buffet": 0.3422000122070312, + "IoU.poster": 0.18940000534057616, + "IoU.stage": 0.11470000267028808, + "IoU.van": 0.40810001373291016, + "IoU.ship": 0.7451000213623047, + "IoU.fountain": 0.17379999160766602, + "IoU.conveyer belt": 0.6872000122070312, + "IoU.canopy": 0.18290000915527344, + "IoU.washer": 0.7277999877929687, + "IoU.plaything": 0.25299999237060544, + "IoU.swimming pool": 0.685199966430664, + "IoU.stool": 0.3136000061035156, + "IoU.barrel": 0.47900001525878905, + "IoU.basket": 0.19889999389648438, + "IoU.waterfall": 0.6177000045776367, + "IoU.tent": 0.9519000244140625, + "IoU.bag": 0.10960000038146972, + "IoU.minibike": 0.658499984741211, + "IoU.cradle": 0.7970999908447266, + "IoU.oven": 0.1975, + "IoU.ball": 0.5047999954223633, + "IoU.food": 0.5015000152587891, + "IoU.step": 0.0525, + "IoU.tank": 0.5756999969482421, + "IoU.trade name": 0.2720000076293945, + "IoU.microwave": 0.4166999816894531, + "IoU.pot": 0.40650001525878904, + "IoU.animal": 0.5970000076293945, + "IoU.bicycle": 0.5288000106811523, + "IoU.lake": 0.5402999877929687, + "IoU.dishwasher": 0.5331000137329102, + "IoU.screen": 0.5036000061035156, + "IoU.blanket": 0.11380000114440918, + "IoU.sculpture": 0.47400001525878904, + "IoU.hood": 0.5254999923706055, + "IoU.sconce": 0.3057999992370605, + "IoU.vase": 0.28709999084472654, + "IoU.traffic light": 0.2368000030517578, + "IoU.tray": 0.023299999237060547, + "IoU.ashcan": 0.3654999923706055, + "IoU.fan": 0.5025999832153321, + "IoU.pier": 0.2893000030517578, + "IoU.crt screen": 0.05860000133514404, + "IoU.plate": 0.48130001068115236, + "IoU.monitor": 0.30590000152587893, + "IoU.bulletin board": 0.44790000915527345, + "IoU.shower": 0.0005000000074505806, + "IoU.radiator": 0.5806000137329101, + "IoU.glass": 0.07789999961853028, + "IoU.clock": 0.25079999923706053, + "IoU.flag": 0.39869998931884765, + "Acc.wall": 0.8769000244140625, + "Acc.building": 0.9294000244140626, + "Acc.sky": 0.9762999725341797, + "Acc.floor": 0.9070999908447266, + "Acc.tree": 0.8673000335693359, + "Acc.ceiling": 0.9161000061035156, + "Acc.road": 0.899000015258789, + "Acc.bed ": 0.9487999725341797, + "Acc.windowpane": 0.7641000366210937, + "Acc.grass": 0.8097000122070312, + "Acc.cabinet": 0.7080000305175781, + "Acc.sidewalk": 0.7905000305175781, + "Acc.person": 0.9119999694824219, + "Acc.earth": 0.5620000076293945, + "Acc.door": 0.6631999969482422, + "Acc.table": 0.7295999908447266, + "Acc.mountain": 0.7001999664306641, + "Acc.plant": 0.6341999816894531, + "Acc.curtain": 0.8434999847412109, + "Acc.chair": 0.6441999816894531, + "Acc.car": 0.9276000213623047, + "Acc.water": 0.6809999847412109, + "Acc.painting": 0.8601000213623047, + "Acc.sofa": 0.8480000305175781, + "Acc.shelf": 0.622599983215332, + "Acc.house": 0.6904000091552734, + "Acc.sea": 0.8670999908447266, + "Acc.mirror": 0.7780000305175782, + "Acc.rug": 0.715, + "Acc.field": 0.4665999984741211, + "Acc.armchair": 0.6683000183105469, + "Acc.seat": 0.8198999786376953, + "Acc.fence": 0.5543000030517579, + "Acc.desk": 0.6470999908447266, + "Acc.rock": 0.7137999725341797, + "Acc.wardrobe": 0.6604000091552734, + "Acc.lamp": 0.6655999755859375, + "Acc.bathtub": 0.8398999786376953, + "Acc.railing": 0.5177000045776368, + "Acc.cushion": 0.6698999786376953, + "Acc.base": 0.47819999694824217, + "Acc.box": 0.30770000457763674, + "Acc.column": 0.580999984741211, + "Acc.signboard": 0.4588999938964844, + "Acc.chest of drawers": 0.5738000106811524, + "Acc.counter": 0.47900001525878905, + "Acc.sand": 0.7066000366210937, + "Acc.sink": 0.7318000030517579, + "Acc.skyscraper": 0.678499984741211, + "Acc.fireplace": 0.8901000213623047, + "Acc.refrigerator": 0.8619999694824219, + "Acc.grandstand": 0.6976000213623047, + "Acc.path": 0.29670000076293945, + "Acc.stairs": 0.36279998779296874, + "Acc.runway": 0.8006999969482422, + "Acc.case": 0.7908000183105469, + "Acc.pool table": 0.9640000152587891, + "Acc.pillow": 0.6875, + "Acc.screen door": 0.5463000106811523, + "Acc.stairway": 0.45930000305175783, + "Acc.river": 0.3061000061035156, + "Acc.bridge": 0.7361000061035157, + "Acc.bookcase": 0.5363999938964844, + "Acc.blind": 0.47150001525878904, + "Acc.coffee table": 0.7781999969482422, + "Acc.toilet": 0.8856999969482422, + "Acc.flower": 0.4990999984741211, + "Acc.book": 0.6372000122070313, + "Acc.hill": 0.22790000915527345, + "Acc.bench": 0.555, + "Acc.countertop": 0.6523999786376953, + "Acc.stove": 0.7559999847412109, + "Acc.palm": 0.6987000274658203, + "Acc.kitchen island": 0.6694000244140625, + "Acc.computer": 0.7463999938964844, + "Acc.swivel chair": 0.6840000152587891, + "Acc.boat": 0.8490000152587891, + "Acc.bar": 0.7194000244140625, + "Acc.arcade machine": 0.5336999893188477, + "Acc.hovel": 0.5752999877929688, + "Acc.bus": 0.9483999633789062, + "Acc.towel": 0.7370999908447265, + "Acc.light": 0.367400016784668, + "Acc.truck": 0.44869998931884764, + "Acc.tower": 0.48720001220703124, + "Acc.chandelier": 0.7718000030517578, + "Acc.awning": 0.30770000457763674, + "Acc.streetlight": 0.15930000305175782, + "Acc.booth": 0.4925, + "Acc.television receiver": 0.8413999938964843, + "Acc.airplane": 0.6433999633789063, + "Acc.dirt track": 0.1884000015258789, + "Acc.apparel": 0.4281000137329102, + "Acc.pole": 0.20049999237060548, + "Acc.land": 0.03940000057220459, + "Acc.bannister": 0.13390000343322753, + "Acc.escalator": 0.7627999877929688, + "Acc.ottoman": 0.5618000030517578, + "Acc.bottle": 0.36380001068115236, + "Acc.buffet": 0.3856999969482422, + "Acc.poster": 0.23280000686645508, + "Acc.stage": 0.1828000068664551, + "Acc.van": 0.5161000061035156, + "Acc.ship": 0.8490000152587891, + "Acc.fountain": 0.20329999923706055, + "Acc.conveyer belt": 0.8738999938964844, + "Acc.canopy": 0.28280000686645507, + "Acc.washer": 0.7319999694824219, + "Acc.plaything": 0.3852000045776367, + "Acc.swimming pool": 0.8312000274658203, + "Acc.stool": 0.3661999893188477, + "Acc.barrel": 0.6423999786376953, + "Acc.basket": 0.25879999160766604, + "Acc.waterfall": 0.6763999938964844, + "Acc.tent": 0.9833000183105469, + "Acc.bag": 0.1290999984741211, + "Acc.minibike": 0.7726000213623047, + "Acc.cradle": 0.9672000122070312, + "Acc.oven": 0.5445999908447265, + "Acc.ball": 0.5495000076293945, + "Acc.food": 0.6116999816894532, + "Acc.step": 0.06869999885559082, + "Acc.tank": 0.6509999847412109, + "Acc.trade name": 0.3085000038146973, + "Acc.microwave": 0.48119998931884767, + "Acc.pot": 0.4681999969482422, + "Acc.animal": 0.6576000213623047, + "Acc.bicycle": 0.7212999725341797, + "Acc.lake": 0.5790999984741211, + "Acc.dishwasher": 0.6487000274658203, + "Acc.screen": 0.7430000305175781, + "Acc.blanket": 0.12899999618530272, + "Acc.sculpture": 0.6177000045776367, + "Acc.hood": 0.586500015258789, + "Acc.sconce": 0.3965999984741211, + "Acc.vase": 0.37340000152587893, + "Acc.traffic light": 0.3477000045776367, + "Acc.tray": 0.03119999885559082, + "Acc.ashcan": 0.49139999389648437, + "Acc.fan": 0.6470999908447266, + "Acc.pier": 0.48639999389648436, + "Acc.crt screen": 0.1115999984741211, + "Acc.plate": 0.6455999755859375, + "Acc.monitor": 0.561500015258789, + "Acc.bulletin board": 0.614900016784668, + "Acc.shower": 0.004799999892711639, + "Acc.radiator": 0.6963999938964843, + "Acc.glass": 0.08050000190734863, + "Acc.clock": 0.29469999313354495, + "Acc.flag": 0.44459999084472657 + } + }, + "110": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8287, + "mIoU": 0.47369999999999995, + "mAcc": 0.5863, + "IoU.wall": 0.7688999938964843, + "IoU.building": 0.8277999877929687, + "IoU.sky": 0.9388999938964844, + "IoU.floor": 0.8098000335693359, + "IoU.tree": 0.7541999816894531, + "IoU.ceiling": 0.83, + "IoU.road": 0.8262000274658203, + "IoU.bed ": 0.8766000366210938, + "IoU.windowpane": 0.6195999908447266, + "IoU.grass": 0.7131999969482422, + "IoU.cabinet": 0.6111999893188477, + "IoU.sidewalk": 0.6315000152587891, + "IoU.person": 0.795, + "IoU.earth": 0.382599983215332, + "IoU.door": 0.49130001068115237, + "IoU.table": 0.5715999984741211, + "IoU.mountain": 0.579900016784668, + "IoU.plant": 0.5366999816894531, + "IoU.curtain": 0.7125, + "IoU.chair": 0.5429000091552735, + "IoU.car": 0.8379000091552734, + "IoU.water": 0.5806000137329101, + "IoU.painting": 0.6826000213623047, + "IoU.sofa": 0.6754000091552734, + "IoU.shelf": 0.412400016784668, + "IoU.house": 0.515099983215332, + "IoU.sea": 0.7162000274658203, + "IoU.mirror": 0.6706999969482422, + "IoU.rug": 0.6256999969482422, + "IoU.field": 0.3490999984741211, + "IoU.armchair": 0.4379999923706055, + "IoU.seat": 0.6601000213623047, + "IoU.fence": 0.4647000122070313, + "IoU.desk": 0.4766999816894531, + "IoU.rock": 0.44540000915527345, + "IoU.wardrobe": 0.5561000061035156, + "IoU.lamp": 0.537400016784668, + "IoU.bathtub": 0.8122000122070312, + "IoU.railing": 0.3529999923706055, + "IoU.cushion": 0.5616999816894531, + "IoU.base": 0.31610000610351563, + "IoU.box": 0.228799991607666, + "IoU.column": 0.45549999237060546, + "IoU.signboard": 0.35330001831054686, + "IoU.chest of drawers": 0.311299991607666, + "IoU.counter": 0.3681000137329102, + "IoU.sand": 0.3863999938964844, + "IoU.sink": 0.6619999694824219, + "IoU.skyscraper": 0.5143000030517578, + "IoU.fireplace": 0.7095999908447266, + "IoU.refrigerator": 0.7765000152587891, + "IoU.grandstand": 0.582599983215332, + "IoU.path": 0.22290000915527344, + "IoU.stairs": 0.2804000091552734, + "IoU.runway": 0.615099983215332, + "IoU.case": 0.5968000030517578, + "IoU.pool table": 0.9258000183105469, + "IoU.pillow": 0.5597999954223633, + "IoU.screen door": 0.6022999954223632, + "IoU.stairway": 0.3372000122070313, + "IoU.river": 0.20399999618530273, + "IoU.bridge": 0.6372999954223633, + "IoU.bookcase": 0.33470001220703127, + "IoU.blind": 0.3965000152587891, + "IoU.coffee table": 0.5995000076293945, + "IoU.toilet": 0.7537999725341797, + "IoU.flower": 0.3256999969482422, + "IoU.book": 0.4368000030517578, + "IoU.hill": 0.1402999973297119, + "IoU.bench": 0.42369998931884767, + "IoU.countertop": 0.5638999938964844, + "IoU.stove": 0.696500015258789, + "IoU.palm": 0.49080001831054687, + "IoU.kitchen island": 0.42009998321533204, + "IoU.computer": 0.6412999725341797, + "IoU.swivel chair": 0.49990001678466794, + "IoU.boat": 0.6683999633789063, + "IoU.bar": 0.5059999847412109, + "IoU.arcade machine": 0.5513000106811523, + "IoU.hovel": 0.5297999954223633, + "IoU.bus": 0.8930000305175781, + "IoU.towel": 0.6361999893188477, + "IoU.light": 0.3259000015258789, + "IoU.truck": 0.21100000381469727, + "IoU.tower": 0.24829999923706056, + "IoU.chandelier": 0.6108000183105469, + "IoU.awning": 0.3079999923706055, + "IoU.streetlight": 0.1425, + "IoU.booth": 0.43779998779296875, + "IoU.television receiver": 0.6987999725341797, + "IoU.airplane": 0.5925, + "IoU.dirt track": 0.03369999885559082, + "IoU.apparel": 0.3145000076293945, + "IoU.pole": 0.16399999618530273, + "IoU.land": 0.008600000143051147, + "IoU.bannister": 0.07650000095367432, + "IoU.escalator": 0.5018000030517578, + "IoU.ottoman": 0.45419998168945314, + "IoU.bottle": 0.3375, + "IoU.buffet": 0.34180000305175784, + "IoU.poster": 0.16600000381469726, + "IoU.stage": 0.16360000610351563, + "IoU.van": 0.3959000015258789, + "IoU.ship": 0.09670000076293946, + "IoU.fountain": 0.20600000381469727, + "IoU.conveyer belt": 0.7212999725341797, + "IoU.canopy": 0.16969999313354492, + "IoU.washer": 0.7106999969482422, + "IoU.plaything": 0.325, + "IoU.swimming pool": 0.7333999633789062, + "IoU.stool": 0.25579999923706054, + "IoU.barrel": 0.5336000061035157, + "IoU.basket": 0.22940000534057617, + "IoU.waterfall": 0.5320000076293945, + "IoU.tent": 0.9261000061035156, + "IoU.bag": 0.11630000114440918, + "IoU.minibike": 0.5777999877929687, + "IoU.cradle": 0.7869999694824219, + "IoU.oven": 0.4418999862670898, + "IoU.ball": 0.29760000228881833, + "IoU.food": 0.5565000152587891, + "IoU.step": 0.04110000133514404, + "IoU.tank": 0.5802000045776368, + "IoU.trade name": 0.30190000534057615, + "IoU.microwave": 0.7726999664306641, + "IoU.pot": 0.39610000610351564, + "IoU.animal": 0.5895999908447266, + "IoU.bicycle": 0.47689998626708985, + "IoU.lake": 0.010800000429153443, + "IoU.dishwasher": 0.5408000183105469, + "IoU.screen": 0.49720001220703125, + "IoU.blanket": 0.13920000076293945, + "IoU.sculpture": 0.502400016784668, + "IoU.hood": 0.5750999832153321, + "IoU.sconce": 0.3145000076293945, + "IoU.vase": 0.2996999931335449, + "IoU.traffic light": 0.2617000007629395, + "IoU.tray": 0.009800000190734863, + "IoU.ashcan": 0.3716999816894531, + "IoU.fan": 0.4775, + "IoU.pier": 0.33919998168945314, + "IoU.crt screen": 0.043299999237060544, + "IoU.plate": 0.48520000457763673, + "IoU.monitor": 0.24879999160766603, + "IoU.bulletin board": 0.3659999847412109, + "IoU.shower": 0.010499999523162842, + "IoU.radiator": 0.5363999938964844, + "IoU.glass": 0.10289999961853027, + "IoU.clock": 0.28389999389648435, + "IoU.flag": 0.3783000183105469, + "Acc.wall": 0.8769999694824219, + "Acc.building": 0.9265000152587891, + "Acc.sky": 0.9744999694824219, + "Acc.floor": 0.9068000030517578, + "Acc.tree": 0.8718000030517579, + "Acc.ceiling": 0.9219000244140625, + "Acc.road": 0.8919000244140625, + "Acc.bed ": 0.9475, + "Acc.windowpane": 0.7630000305175781, + "Acc.grass": 0.836500015258789, + "Acc.cabinet": 0.7151999664306641, + "Acc.sidewalk": 0.7841999816894532, + "Acc.person": 0.9116999816894531, + "Acc.earth": 0.5790999984741211, + "Acc.door": 0.6877999877929688, + "Acc.table": 0.7426000213623047, + "Acc.mountain": 0.6962000274658203, + "Acc.plant": 0.6558999633789062, + "Acc.curtain": 0.8476000213623047, + "Acc.chair": 0.6783999633789063, + "Acc.car": 0.9258000183105469, + "Acc.water": 0.7787000274658203, + "Acc.painting": 0.8494000244140625, + "Acc.sofa": 0.8129000091552734, + "Acc.shelf": 0.5879000091552734, + "Acc.house": 0.7236000061035156, + "Acc.sea": 0.8945999908447265, + "Acc.mirror": 0.759000015258789, + "Acc.rug": 0.6933000183105469, + "Acc.field": 0.4961999893188477, + "Acc.armchair": 0.67, + "Acc.seat": 0.8261000061035156, + "Acc.fence": 0.6358000183105469, + "Acc.desk": 0.6798000335693359, + "Acc.rock": 0.6637000274658204, + "Acc.wardrobe": 0.6930999755859375, + "Acc.lamp": 0.6752999877929687, + "Acc.bathtub": 0.8794000244140625, + "Acc.railing": 0.500999984741211, + "Acc.cushion": 0.7076000213623047, + "Acc.base": 0.5225999832153321, + "Acc.box": 0.31739999771118166, + "Acc.column": 0.5770000076293945, + "Acc.signboard": 0.47150001525878904, + "Acc.chest of drawers": 0.5429999923706055, + "Acc.counter": 0.4829999923706055, + "Acc.sand": 0.5563999938964844, + "Acc.sink": 0.7384999847412109, + "Acc.skyscraper": 0.6102000045776367, + "Acc.fireplace": 0.8986000061035156, + "Acc.refrigerator": 0.8737999725341797, + "Acc.grandstand": 0.7091999816894531, + "Acc.path": 0.2986000061035156, + "Acc.stairs": 0.3656000137329102, + "Acc.runway": 0.7816000366210938, + "Acc.case": 0.7418000030517579, + "Acc.pool table": 0.9645999908447266, + "Acc.pillow": 0.6905999755859376, + "Acc.screen door": 0.6669000244140625, + "Acc.stairway": 0.4197999954223633, + "Acc.river": 0.3106999969482422, + "Acc.bridge": 0.7694000244140625, + "Acc.bookcase": 0.5738000106811524, + "Acc.blind": 0.45630001068115233, + "Acc.coffee table": 0.772300033569336, + "Acc.toilet": 0.8930999755859375, + "Acc.flower": 0.5036999893188476, + "Acc.book": 0.6043000030517578, + "Acc.hill": 0.2618000030517578, + "Acc.bench": 0.5113000106811524, + "Acc.countertop": 0.7062999725341796, + "Acc.stove": 0.8004000091552734, + "Acc.palm": 0.7130999755859375, + "Acc.kitchen island": 0.6706999969482422, + "Acc.computer": 0.7669999694824219, + "Acc.swivel chair": 0.6604000091552734, + "Acc.boat": 0.8355999755859375, + "Acc.bar": 0.6338000106811523, + "Acc.arcade machine": 0.6197000122070313, + "Acc.hovel": 0.6122999954223632, + "Acc.bus": 0.9469000244140625, + "Acc.towel": 0.7559999847412109, + "Acc.light": 0.35069999694824217, + "Acc.truck": 0.3, + "Acc.tower": 0.37200000762939456, + "Acc.chandelier": 0.7669999694824219, + "Acc.awning": 0.3825, + "Acc.streetlight": 0.16840000152587892, + "Acc.booth": 0.5106000137329102, + "Acc.television receiver": 0.8126999664306641, + "Acc.airplane": 0.6706999969482422, + "Acc.dirt track": 0.13680000305175782, + "Acc.apparel": 0.43139999389648437, + "Acc.pole": 0.21379999160766602, + "Acc.land": 0.014900000095367431, + "Acc.bannister": 0.114399995803833, + "Acc.escalator": 0.7625, + "Acc.ottoman": 0.6304999923706055, + "Acc.bottle": 0.5904999923706055, + "Acc.buffet": 0.40490001678466797, + "Acc.poster": 0.21129999160766602, + "Acc.stage": 0.25690000534057617, + "Acc.van": 0.49759998321533205, + "Acc.ship": 0.10239999771118163, + "Acc.fountain": 0.2115999984741211, + "Acc.conveyer belt": 0.900199966430664, + "Acc.canopy": 0.24899999618530275, + "Acc.washer": 0.7197000122070313, + "Acc.plaything": 0.5304000091552734, + "Acc.swimming pool": 0.8320999908447265, + "Acc.stool": 0.30540000915527343, + "Acc.barrel": 0.6395000076293945, + "Acc.basket": 0.2928000068664551, + "Acc.waterfall": 0.5688000106811524, + "Acc.tent": 0.9855999755859375, + "Acc.bag": 0.13850000381469726, + "Acc.minibike": 0.6393999862670898, + "Acc.cradle": 0.9675, + "Acc.oven": 0.5756000137329101, + "Acc.ball": 0.30520000457763674, + "Acc.food": 0.6480000305175782, + "Acc.step": 0.05510000228881836, + "Acc.tank": 0.6538999938964843, + "Acc.trade name": 0.3370999908447266, + "Acc.microwave": 0.8687999725341797, + "Acc.pot": 0.46, + "Acc.animal": 0.6422000122070313, + "Acc.bicycle": 0.7173999786376953, + "Acc.lake": 0.011200000047683716, + "Acc.dishwasher": 0.6268000030517578, + "Acc.screen": 0.7358000183105469, + "Acc.blanket": 0.15630000114440917, + "Acc.sculpture": 0.6626999664306641, + "Acc.hood": 0.6480999755859375, + "Acc.sconce": 0.3952000045776367, + "Acc.vase": 0.40060001373291015, + "Acc.traffic light": 0.36450000762939455, + "Acc.tray": 0.011399999856948853, + "Acc.ashcan": 0.5045000076293945, + "Acc.fan": 0.575999984741211, + "Acc.pier": 0.4615000152587891, + "Acc.crt screen": 0.09420000076293945, + "Acc.plate": 0.649000015258789, + "Acc.monitor": 0.41700000762939454, + "Acc.bulletin board": 0.5095000076293945, + "Acc.shower": 0.05, + "Acc.radiator": 0.5927000045776367, + "Acc.glass": 0.10779999732971192, + "Acc.clock": 0.3215999984741211, + "Acc.flag": 0.4225 + } + }, + "111": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8301000000000001, + "mIoU": 0.4824, + "mAcc": 0.5898, + "IoU.wall": 0.7695999908447265, + "IoU.building": 0.8297000122070313, + "IoU.sky": 0.9377999877929688, + "IoU.floor": 0.8077999877929688, + "IoU.tree": 0.7472000122070312, + "IoU.ceiling": 0.8305999755859375, + "IoU.road": 0.8243000030517578, + "IoU.bed ": 0.8876000213623046, + "IoU.windowpane": 0.6127999877929687, + "IoU.grass": 0.6981999969482422, + "IoU.cabinet": 0.6308000183105469, + "IoU.sidewalk": 0.630099983215332, + "IoU.person": 0.7976999664306641, + "IoU.earth": 0.3793000030517578, + "IoU.door": 0.4940999984741211, + "IoU.table": 0.5838000106811524, + "IoU.mountain": 0.5840000152587891, + "IoU.plant": 0.5175, + "IoU.curtain": 0.7116999816894531, + "IoU.chair": 0.5433000183105469, + "IoU.car": 0.8347000122070313, + "IoU.water": 0.5740999984741211, + "IoU.painting": 0.6948999786376953, + "IoU.sofa": 0.6858000183105468, + "IoU.shelf": 0.4109000015258789, + "IoU.house": 0.5306000137329101, + "IoU.sea": 0.6755000305175781, + "IoU.mirror": 0.6920999908447265, + "IoU.rug": 0.6133000183105469, + "IoU.field": 0.3656999969482422, + "IoU.armchair": 0.4384000015258789, + "IoU.seat": 0.6638999938964844, + "IoU.fence": 0.4336999893188477, + "IoU.desk": 0.4697999954223633, + "IoU.rock": 0.4336000061035156, + "IoU.wardrobe": 0.5656999969482421, + "IoU.lamp": 0.5347000122070312, + "IoU.bathtub": 0.8383000183105469, + "IoU.railing": 0.3783000183105469, + "IoU.cushion": 0.5797999954223633, + "IoU.base": 0.29989999771118164, + "IoU.box": 0.2084000015258789, + "IoU.column": 0.4645999908447266, + "IoU.signboard": 0.35439998626708985, + "IoU.chest of drawers": 0.3484999847412109, + "IoU.counter": 0.3808000183105469, + "IoU.sand": 0.4141999816894531, + "IoU.sink": 0.683499984741211, + "IoU.skyscraper": 0.5175999832153321, + "IoU.fireplace": 0.7101999664306641, + "IoU.refrigerator": 0.8087000274658203, + "IoU.grandstand": 0.5511000061035156, + "IoU.path": 0.21969999313354494, + "IoU.stairs": 0.2811000061035156, + "IoU.runway": 0.591500015258789, + "IoU.case": 0.5927999877929687, + "IoU.pool table": 0.9291000366210938, + "IoU.pillow": 0.5800999832153321, + "IoU.screen door": 0.6284999847412109, + "IoU.stairway": 0.3756999969482422, + "IoU.river": 0.23670000076293946, + "IoU.bridge": 0.6345999908447265, + "IoU.bookcase": 0.3715999984741211, + "IoU.blind": 0.38990001678466796, + "IoU.coffee table": 0.6125, + "IoU.toilet": 0.8113999938964844, + "IoU.flower": 0.337599983215332, + "IoU.book": 0.4122999954223633, + "IoU.hill": 0.1325, + "IoU.bench": 0.43040000915527343, + "IoU.countertop": 0.5599000167846679, + "IoU.stove": 0.7591999816894531, + "IoU.palm": 0.49209999084472655, + "IoU.kitchen island": 0.3791999816894531, + "IoU.computer": 0.6476999664306641, + "IoU.swivel chair": 0.4975, + "IoU.boat": 0.7127999877929687, + "IoU.bar": 0.5063999938964844, + "IoU.arcade machine": 0.44369998931884763, + "IoU.hovel": 0.4086000061035156, + "IoU.bus": 0.8997000122070312, + "IoU.towel": 0.6377000045776368, + "IoU.light": 0.276200008392334, + "IoU.truck": 0.19010000228881835, + "IoU.tower": 0.255, + "IoU.chandelier": 0.6161999893188477, + "IoU.awning": 0.24959999084472656, + "IoU.streetlight": 0.15449999809265136, + "IoU.booth": 0.40490001678466797, + "IoU.television receiver": 0.6806999969482422, + "IoU.airplane": 0.5793000030517578, + "IoU.dirt track": 0.04070000171661377, + "IoU.apparel": 0.31659999847412107, + "IoU.pole": 0.15149999618530274, + "IoU.land": 0.04090000152587891, + "IoU.bannister": 0.024700000286102294, + "IoU.escalator": 0.47970001220703123, + "IoU.ottoman": 0.46130001068115234, + "IoU.bottle": 0.35400001525878905, + "IoU.buffet": 0.42580001831054687, + "IoU.poster": 0.26680000305175783, + "IoU.stage": 0.13539999961853028, + "IoU.van": 0.3838000106811523, + "IoU.ship": 0.7629000091552735, + "IoU.fountain": 0.21670000076293947, + "IoU.conveyer belt": 0.7001999664306641, + "IoU.canopy": 0.1718000030517578, + "IoU.washer": 0.6888999938964844, + "IoU.plaything": 0.3333000183105469, + "IoU.swimming pool": 0.7168000030517578, + "IoU.stool": 0.34240001678466797, + "IoU.barrel": 0.48900001525878906, + "IoU.basket": 0.22110000610351563, + "IoU.waterfall": 0.4995000076293945, + "IoU.tent": 0.9431999969482422, + "IoU.bag": 0.12520000457763672, + "IoU.minibike": 0.6988999938964844, + "IoU.cradle": 0.8043000030517579, + "IoU.oven": 0.4116999816894531, + "IoU.ball": 0.34720001220703123, + "IoU.food": 0.4861000061035156, + "IoU.step": 0.03700000047683716, + "IoU.tank": 0.5916999816894531, + "IoU.trade name": 0.2813999938964844, + "IoU.microwave": 0.7955000305175781, + "IoU.pot": 0.43389999389648437, + "IoU.animal": 0.5865999984741211, + "IoU.bicycle": 0.5568000030517578, + "IoU.lake": 0.6075, + "IoU.dishwasher": 0.5688999938964844, + "IoU.screen": 0.5061999893188477, + "IoU.blanket": 0.15609999656677245, + "IoU.sculpture": 0.5661000061035156, + "IoU.hood": 0.48470001220703124, + "IoU.sconce": 0.2465999984741211, + "IoU.vase": 0.3007999992370605, + "IoU.traffic light": 0.23149999618530273, + "IoU.tray": 0.015199999809265136, + "IoU.ashcan": 0.3693000030517578, + "IoU.fan": 0.41619998931884766, + "IoU.pier": 0.2615999984741211, + "IoU.crt screen": 0.03789999961853027, + "IoU.plate": 0.49689998626708987, + "IoU.monitor": 0.20290000915527343, + "IoU.bulletin board": 0.402599983215332, + "IoU.shower": 0.008899999856948853, + "IoU.radiator": 0.537400016784668, + "IoU.glass": 0.09449999809265136, + "IoU.clock": 0.2272999954223633, + "IoU.flag": 0.4031999969482422, + "Acc.wall": 0.8869999694824219, + "Acc.building": 0.9297000122070312, + "Acc.sky": 0.9761000061035157, + "Acc.floor": 0.9022000122070313, + "Acc.tree": 0.8733000183105468, + "Acc.ceiling": 0.9244999694824219, + "Acc.road": 0.889000015258789, + "Acc.bed ": 0.9555000305175781, + "Acc.windowpane": 0.7583000183105468, + "Acc.grass": 0.814800033569336, + "Acc.cabinet": 0.7476000213623046, + "Acc.sidewalk": 0.7906999969482422, + "Acc.person": 0.909000015258789, + "Acc.earth": 0.5540000152587891, + "Acc.door": 0.6748000335693359, + "Acc.table": 0.7462999725341797, + "Acc.mountain": 0.7023999786376953, + "Acc.plant": 0.6581999969482422, + "Acc.curtain": 0.8418000030517578, + "Acc.chair": 0.6786000061035157, + "Acc.car": 0.9283999633789063, + "Acc.water": 0.7366999816894532, + "Acc.painting": 0.8273000335693359, + "Acc.sofa": 0.8343000030517578, + "Acc.shelf": 0.5884000015258789, + "Acc.house": 0.7077999877929687, + "Acc.sea": 0.8894000244140625, + "Acc.mirror": 0.7677999877929688, + "Acc.rug": 0.6966000366210937, + "Acc.field": 0.5297000122070312, + "Acc.armchair": 0.6422000122070313, + "Acc.seat": 0.8380000305175781, + "Acc.fence": 0.5838999938964844, + "Acc.desk": 0.6630000305175782, + "Acc.rock": 0.6538999938964843, + "Acc.wardrobe": 0.6873000335693359, + "Acc.lamp": 0.6479000091552735, + "Acc.bathtub": 0.885199966430664, + "Acc.railing": 0.5179999923706055, + "Acc.cushion": 0.7291999816894531, + "Acc.base": 0.5206999969482422, + "Acc.box": 0.2652000045776367, + "Acc.column": 0.5658000183105468, + "Acc.signboard": 0.46580001831054685, + "Acc.chest of drawers": 0.524000015258789, + "Acc.counter": 0.5063999938964844, + "Acc.sand": 0.5854999923706055, + "Acc.sink": 0.7609999847412109, + "Acc.skyscraper": 0.6106000137329102, + "Acc.fireplace": 0.9123999786376953, + "Acc.refrigerator": 0.8747000122070312, + "Acc.grandstand": 0.7468000030517579, + "Acc.path": 0.2943000030517578, + "Acc.stairs": 0.3668000030517578, + "Acc.runway": 0.7505999755859375, + "Acc.case": 0.7172000122070312, + "Acc.pool table": 0.9594999694824219, + "Acc.pillow": 0.6683999633789063, + "Acc.screen door": 0.6633000183105469, + "Acc.stairway": 0.4506999969482422, + "Acc.river": 0.38369998931884763, + "Acc.bridge": 0.7493000030517578, + "Acc.bookcase": 0.607599983215332, + "Acc.blind": 0.4443000030517578, + "Acc.coffee table": 0.7506999969482422, + "Acc.toilet": 0.8936000061035156, + "Acc.flower": 0.5122000122070313, + "Acc.book": 0.5565000152587891, + "Acc.hill": 0.24809999465942384, + "Acc.bench": 0.5281999969482422, + "Acc.countertop": 0.705, + "Acc.stove": 0.8193000030517578, + "Acc.palm": 0.6854000091552734, + "Acc.kitchen island": 0.5197999954223633, + "Acc.computer": 0.7786000061035157, + "Acc.swivel chair": 0.6386000061035156, + "Acc.boat": 0.8279000091552734, + "Acc.bar": 0.6036999893188476, + "Acc.arcade machine": 0.4765999984741211, + "Acc.hovel": 0.4761000061035156, + "Acc.bus": 0.9556999969482421, + "Acc.towel": 0.7494999694824219, + "Acc.light": 0.2880999946594238, + "Acc.truck": 0.2620000076293945, + "Acc.tower": 0.3745000076293945, + "Acc.chandelier": 0.7543000030517578, + "Acc.awning": 0.2852000045776367, + "Acc.streetlight": 0.1809000015258789, + "Acc.booth": 0.5070000076293946, + "Acc.television receiver": 0.7905000305175781, + "Acc.airplane": 0.6512999725341797, + "Acc.dirt track": 0.18540000915527344, + "Acc.apparel": 0.43090000152587893, + "Acc.pole": 0.19430000305175782, + "Acc.land": 0.05289999961853027, + "Acc.bannister": 0.032200000286102294, + "Acc.escalator": 0.6693000030517579, + "Acc.ottoman": 0.6224000167846679, + "Acc.bottle": 0.5661999893188476, + "Acc.buffet": 0.48470001220703124, + "Acc.poster": 0.3906999969482422, + "Acc.stage": 0.21979999542236328, + "Acc.van": 0.4652000045776367, + "Acc.ship": 0.7905000305175781, + "Acc.fountain": 0.22030000686645507, + "Acc.conveyer belt": 0.9058000183105469, + "Acc.canopy": 0.22399999618530272, + "Acc.washer": 0.7070999908447265, + "Acc.plaything": 0.5672000122070312, + "Acc.swimming pool": 0.8733999633789062, + "Acc.stool": 0.40290000915527346, + "Acc.barrel": 0.6372000122070313, + "Acc.basket": 0.29670000076293945, + "Acc.waterfall": 0.5520999908447266, + "Acc.tent": 0.9798999786376953, + "Acc.bag": 0.1478999996185303, + "Acc.minibike": 0.7861000061035156, + "Acc.cradle": 0.9526000213623047, + "Acc.oven": 0.477400016784668, + "Acc.ball": 0.37310001373291013, + "Acc.food": 0.5693000030517578, + "Acc.step": 0.05460000038146973, + "Acc.tank": 0.6491000366210937, + "Acc.trade name": 0.3129999923706055, + "Acc.microwave": 0.8908999633789062, + "Acc.pot": 0.49529998779296874, + "Acc.animal": 0.6208000183105469, + "Acc.bicycle": 0.6877999877929688, + "Acc.lake": 0.6370999908447266, + "Acc.dishwasher": 0.6894999694824219, + "Acc.screen": 0.7766999816894531, + "Acc.blanket": 0.17100000381469727, + "Acc.sculpture": 0.7798999786376953, + "Acc.hood": 0.6140000152587891, + "Acc.sconce": 0.3135000038146973, + "Acc.vase": 0.4127000045776367, + "Acc.traffic light": 0.3313999938964844, + "Acc.tray": 0.017300000190734865, + "Acc.ashcan": 0.4986999893188477, + "Acc.fan": 0.4681999969482422, + "Acc.pier": 0.42200000762939455, + "Acc.crt screen": 0.08579999923706055, + "Acc.plate": 0.6587999725341797, + "Acc.monitor": 0.3238999938964844, + "Acc.bulletin board": 0.5506000137329101, + "Acc.shower": 0.05, + "Acc.radiator": 0.6022000122070312, + "Acc.glass": 0.09770000457763672, + "Acc.clock": 0.24729999542236328, + "Acc.flag": 0.450099983215332 + } + }, + "112": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8306, + "mIoU": 0.47700000000000004, + "mAcc": 0.578, + "IoU.wall": 0.7666999816894531, + "IoU.building": 0.8304000091552735, + "IoU.sky": 0.9368000030517578, + "IoU.floor": 0.8068000030517578, + "IoU.tree": 0.7447000122070313, + "IoU.ceiling": 0.8294000244140625, + "IoU.road": 0.8291999816894531, + "IoU.bed ": 0.8891000366210937, + "IoU.windowpane": 0.6145999908447266, + "IoU.grass": 0.699800033569336, + "IoU.cabinet": 0.6206999969482422, + "IoU.sidewalk": 0.6466000366210938, + "IoU.person": 0.7993000030517579, + "IoU.earth": 0.37790000915527344, + "IoU.door": 0.5018000030517578, + "IoU.table": 0.596500015258789, + "IoU.mountain": 0.5913999938964843, + "IoU.plant": 0.5243999862670898, + "IoU.curtain": 0.7201000213623047, + "IoU.chair": 0.5375, + "IoU.car": 0.8351999664306641, + "IoU.water": 0.5504999923706054, + "IoU.painting": 0.7112000274658203, + "IoU.sofa": 0.6991000366210938, + "IoU.shelf": 0.4204999923706055, + "IoU.house": 0.5220999908447266, + "IoU.sea": 0.6620999908447266, + "IoU.mirror": 0.6912000274658203, + "IoU.rug": 0.6152000045776367, + "IoU.field": 0.3763000106811523, + "IoU.armchair": 0.45060001373291014, + "IoU.seat": 0.6619999694824219, + "IoU.fence": 0.390099983215332, + "IoU.desk": 0.505, + "IoU.rock": 0.42389999389648436, + "IoU.wardrobe": 0.5429000091552735, + "IoU.lamp": 0.5472000122070313, + "IoU.bathtub": 0.8587999725341797, + "IoU.railing": 0.3884000015258789, + "IoU.cushion": 0.5938999938964844, + "IoU.base": 0.31190000534057616, + "IoU.box": 0.22799999237060548, + "IoU.column": 0.46669998168945315, + "IoU.signboard": 0.35689998626708985, + "IoU.chest of drawers": 0.3164999961853027, + "IoU.counter": 0.38599998474121094, + "IoU.sand": 0.4125, + "IoU.sink": 0.6823999786376953, + "IoU.skyscraper": 0.528499984741211, + "IoU.fireplace": 0.7301000213623047, + "IoU.refrigerator": 0.8061000061035156, + "IoU.grandstand": 0.5386000061035157, + "IoU.path": 0.2543000030517578, + "IoU.stairs": 0.22100000381469725, + "IoU.runway": 0.6106999969482422, + "IoU.case": 0.5727999877929687, + "IoU.pool table": 0.9244999694824219, + "IoU.pillow": 0.5754999923706055, + "IoU.screen door": 0.7025, + "IoU.stairway": 0.31909999847412107, + "IoU.river": 0.1975, + "IoU.bridge": 0.48220001220703124, + "IoU.bookcase": 0.37139999389648437, + "IoU.blind": 0.4031999969482422, + "IoU.coffee table": 0.6220000076293946, + "IoU.toilet": 0.8318000030517578, + "IoU.flower": 0.31479999542236325, + "IoU.book": 0.44869998931884764, + "IoU.hill": 0.13739999771118164, + "IoU.bench": 0.42509998321533204, + "IoU.countertop": 0.5695000076293946, + "IoU.stove": 0.74, + "IoU.palm": 0.5104999923706055, + "IoU.kitchen island": 0.43130001068115237, + "IoU.computer": 0.6381000137329101, + "IoU.swivel chair": 0.47830001831054686, + "IoU.boat": 0.7480999755859375, + "IoU.bar": 0.46919998168945315, + "IoU.arcade machine": 0.5188999938964843, + "IoU.hovel": 0.40310001373291016, + "IoU.bus": 0.9061000061035156, + "IoU.towel": 0.6526999664306641, + "IoU.light": 0.3022999954223633, + "IoU.truck": 0.19950000762939454, + "IoU.tower": 0.21559999465942384, + "IoU.chandelier": 0.6254000091552734, + "IoU.awning": 0.23659999847412108, + "IoU.streetlight": 0.1459000015258789, + "IoU.booth": 0.36220001220703124, + "IoU.television receiver": 0.6666000366210938, + "IoU.airplane": 0.5702999877929688, + "IoU.dirt track": 0.1168000030517578, + "IoU.apparel": 0.35869998931884767, + "IoU.pole": 0.13340000152587891, + "IoU.land": 0.025199999809265138, + "IoU.bannister": 0.026500000953674316, + "IoU.escalator": 0.5381999969482422, + "IoU.ottoman": 0.43540000915527344, + "IoU.bottle": 0.3390000152587891, + "IoU.buffet": 0.3845000076293945, + "IoU.poster": 0.23489999771118164, + "IoU.stage": 0.14859999656677247, + "IoU.van": 0.40130001068115234, + "IoU.ship": 0.1452999973297119, + "IoU.fountain": 0.16989999771118164, + "IoU.conveyer belt": 0.6893000030517578, + "IoU.canopy": 0.1722999954223633, + "IoU.washer": 0.6969999694824218, + "IoU.plaything": 0.26209999084472657, + "IoU.swimming pool": 0.7323000335693359, + "IoU.stool": 0.3622999954223633, + "IoU.barrel": 0.576500015258789, + "IoU.basket": 0.23920000076293946, + "IoU.waterfall": 0.47209999084472654, + "IoU.tent": 0.9490000152587891, + "IoU.bag": 0.14729999542236327, + "IoU.minibike": 0.6893000030517578, + "IoU.cradle": 0.8054000091552734, + "IoU.oven": 0.22680000305175782, + "IoU.ball": 0.5159999847412109, + "IoU.food": 0.4759000015258789, + "IoU.step": 0.0603000020980835, + "IoU.tank": 0.5475, + "IoU.trade name": 0.25959999084472657, + "IoU.microwave": 0.4804000091552734, + "IoU.pot": 0.46630001068115234, + "IoU.animal": 0.5797000122070313, + "IoU.bicycle": 0.5545000076293946, + "IoU.lake": 0.2393000030517578, + "IoU.dishwasher": 0.6245999908447266, + "IoU.screen": 0.5729999923706055, + "IoU.blanket": 0.149399995803833, + "IoU.sculpture": 0.6313000106811524, + "IoU.hood": 0.48830001831054687, + "IoU.sconce": 0.2818000030517578, + "IoU.vase": 0.3245000076293945, + "IoU.traffic light": 0.23809999465942383, + "IoU.tray": 0.019700000286102293, + "IoU.ashcan": 0.3990999984741211, + "IoU.fan": 0.4284000015258789, + "IoU.pier": 0.2570000076293945, + "IoU.crt screen": 0.06599999904632568, + "IoU.plate": 0.5165000152587891, + "IoU.monitor": 0.422400016784668, + "IoU.bulletin board": 0.3890999984741211, + "IoU.shower": 0.02940000057220459, + "IoU.radiator": 0.5549000167846679, + "IoU.glass": 0.05510000228881836, + "IoU.clock": 0.19120000839233398, + "IoU.flag": 0.37139999389648437, + "Acc.wall": 0.8969000244140625, + "Acc.building": 0.9319000244140625, + "Acc.sky": 0.9769000244140625, + "Acc.floor": 0.9087999725341797, + "Acc.tree": 0.8769000244140625, + "Acc.ceiling": 0.927300033569336, + "Acc.road": 0.9044000244140625, + "Acc.bed ": 0.9530000305175781, + "Acc.windowpane": 0.7561000061035156, + "Acc.grass": 0.826500015258789, + "Acc.cabinet": 0.7256999969482422, + "Acc.sidewalk": 0.7872000122070313, + "Acc.person": 0.9104000091552734, + "Acc.earth": 0.5454000091552734, + "Acc.door": 0.655, + "Acc.table": 0.7494999694824219, + "Acc.mountain": 0.7329000091552734, + "Acc.plant": 0.6456999969482422, + "Acc.curtain": 0.8373000335693359, + "Acc.chair": 0.6498000335693359, + "Acc.car": 0.9247000122070312, + "Acc.water": 0.7341999816894531, + "Acc.painting": 0.832699966430664, + "Acc.sofa": 0.8843000030517578, + "Acc.shelf": 0.6088000106811523, + "Acc.house": 0.6594000244140625, + "Acc.sea": 0.8619999694824219, + "Acc.mirror": 0.7566000366210938, + "Acc.rug": 0.7091999816894531, + "Acc.field": 0.5459999847412109, + "Acc.armchair": 0.6352999877929687, + "Acc.seat": 0.8219999694824218, + "Acc.fence": 0.5247999954223633, + "Acc.desk": 0.7002999877929688, + "Acc.rock": 0.5781000137329102, + "Acc.wardrobe": 0.6491999816894531, + "Acc.lamp": 0.6445999908447265, + "Acc.bathtub": 0.9094000244140625, + "Acc.railing": 0.5338000106811523, + "Acc.cushion": 0.7113999938964843, + "Acc.base": 0.5016999816894532, + "Acc.box": 0.28969999313354494, + "Acc.column": 0.5554999923706054, + "Acc.signboard": 0.44610000610351563, + "Acc.chest of drawers": 0.517400016784668, + "Acc.counter": 0.49810001373291013, + "Acc.sand": 0.5993999862670898, + "Acc.sink": 0.7533999633789062, + "Acc.skyscraper": 0.5961000061035157, + "Acc.fireplace": 0.8723000335693359, + "Acc.refrigerator": 0.8673999786376954, + "Acc.grandstand": 0.7291000366210938, + "Acc.path": 0.3390000152587891, + "Acc.stairs": 0.28569999694824216, + "Acc.runway": 0.7786000061035157, + "Acc.case": 0.7405999755859375, + "Acc.pool table": 0.9594000244140625, + "Acc.pillow": 0.6622000122070313, + "Acc.screen door": 0.7986000061035157, + "Acc.stairway": 0.4379999923706055, + "Acc.river": 0.3590999984741211, + "Acc.bridge": 0.5506999969482422, + "Acc.bookcase": 0.5790999984741211, + "Acc.blind": 0.4584000015258789, + "Acc.coffee table": 0.7601000213623047, + "Acc.toilet": 0.8869000244140625, + "Acc.flower": 0.4581999969482422, + "Acc.book": 0.5938000106811523, + "Acc.hill": 0.22989999771118164, + "Acc.bench": 0.5022999954223633, + "Acc.countertop": 0.6869999694824219, + "Acc.stove": 0.8133000183105469, + "Acc.palm": 0.6863999938964844, + "Acc.kitchen island": 0.5808000183105468, + "Acc.computer": 0.7345999908447266, + "Acc.swivel chair": 0.6429000091552735, + "Acc.boat": 0.81, + "Acc.bar": 0.575900001525879, + "Acc.arcade machine": 0.5493999862670899, + "Acc.hovel": 0.4534000015258789, + "Acc.bus": 0.9451999664306641, + "Acc.towel": 0.7441999816894531, + "Acc.light": 0.3213999938964844, + "Acc.truck": 0.26049999237060545, + "Acc.tower": 0.3575, + "Acc.chandelier": 0.7744999694824218, + "Acc.awning": 0.26510000228881836, + "Acc.streetlight": 0.17600000381469727, + "Acc.booth": 0.38159999847412107, + "Acc.television receiver": 0.7566999816894531, + "Acc.airplane": 0.6243000030517578, + "Acc.dirt track": 0.45939998626708983, + "Acc.apparel": 0.4931000137329102, + "Acc.pole": 0.16290000915527345, + "Acc.land": 0.03200000047683716, + "Acc.bannister": 0.030799999237060546, + "Acc.escalator": 0.749000015258789, + "Acc.ottoman": 0.5731000137329102, + "Acc.bottle": 0.48259998321533204, + "Acc.buffet": 0.4481999969482422, + "Acc.poster": 0.3890000152587891, + "Acc.stage": 0.2109000015258789, + "Acc.van": 0.48, + "Acc.ship": 0.151899995803833, + "Acc.fountain": 0.17149999618530273, + "Acc.conveyer belt": 0.8998999786376953, + "Acc.canopy": 0.19520000457763673, + "Acc.washer": 0.7104000091552735, + "Acc.plaything": 0.35869998931884767, + "Acc.swimming pool": 0.8918000030517578, + "Acc.stool": 0.429900016784668, + "Acc.barrel": 0.6397999954223633, + "Acc.basket": 0.30200000762939455, + "Acc.waterfall": 0.5297000122070312, + "Acc.tent": 0.9744000244140625, + "Acc.bag": 0.17360000610351561, + "Acc.minibike": 0.7754000091552734, + "Acc.cradle": 0.9556999969482421, + "Acc.oven": 0.5338999938964843, + "Acc.ball": 0.5606999969482422, + "Acc.food": 0.5481999969482422, + "Acc.step": 0.08819999694824218, + "Acc.tank": 0.6393999862670898, + "Acc.trade name": 0.28719999313354494, + "Acc.microwave": 0.5291999816894531, + "Acc.pot": 0.5209999847412109, + "Acc.animal": 0.6043000030517578, + "Acc.bicycle": 0.6659999847412109, + "Acc.lake": 0.25510000228881835, + "Acc.dishwasher": 0.6873999786376953, + "Acc.screen": 0.7313999938964844, + "Acc.blanket": 0.16280000686645507, + "Acc.sculpture": 0.7262000274658204, + "Acc.hood": 0.6359000015258789, + "Acc.sconce": 0.3413999938964844, + "Acc.vase": 0.3945999908447266, + "Acc.traffic light": 0.3146999931335449, + "Acc.tray": 0.024800000190734865, + "Acc.ashcan": 0.5118000030517578, + "Acc.fan": 0.48389999389648436, + "Acc.pier": 0.40560001373291016, + "Acc.crt screen": 0.09810000419616699, + "Acc.plate": 0.6541000366210937, + "Acc.monitor": 0.6738999938964844, + "Acc.bulletin board": 0.4584000015258789, + "Acc.shower": 0.04619999885559082, + "Acc.radiator": 0.6052000045776367, + "Acc.glass": 0.05610000133514404, + "Acc.clock": 0.20399999618530273, + "Acc.flag": 0.4090999984741211 + } + }, + "113": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8284999999999999, + "mIoU": 0.46619999999999995, + "mAcc": 0.556, + "IoU.wall": 0.764000015258789, + "IoU.building": 0.825, + "IoU.sky": 0.9294999694824219, + "IoU.floor": 0.8013999938964844, + "IoU.tree": 0.730999984741211, + "IoU.ceiling": 0.8266000366210937, + "IoU.road": 0.827699966430664, + "IoU.bed ": 0.8830000305175781, + "IoU.windowpane": 0.6127000045776367, + "IoU.grass": 0.6972000122070312, + "IoU.cabinet": 0.6265999984741211, + "IoU.sidewalk": 0.6409999847412109, + "IoU.person": 0.7970999908447266, + "IoU.earth": 0.3868999862670898, + "IoU.door": 0.4811000061035156, + "IoU.table": 0.5940000152587891, + "IoU.mountain": 0.5870999908447265, + "IoU.plant": 0.5138000106811523, + "IoU.curtain": 0.7188999938964844, + "IoU.chair": 0.5265999984741211, + "IoU.car": 0.8362000274658203, + "IoU.water": 0.5761999893188476, + "IoU.painting": 0.7052999877929688, + "IoU.sofa": 0.6787000274658204, + "IoU.shelf": 0.42, + "IoU.house": 0.5156999969482422, + "IoU.sea": 0.6976000213623047, + "IoU.mirror": 0.6687000274658204, + "IoU.rug": 0.6047000122070313, + "IoU.field": 0.3754999923706055, + "IoU.armchair": 0.407400016784668, + "IoU.seat": 0.657300033569336, + "IoU.fence": 0.3838000106811523, + "IoU.desk": 0.48630001068115236, + "IoU.rock": 0.460099983215332, + "IoU.wardrobe": 0.5479999923706055, + "IoU.lamp": 0.5268999862670899, + "IoU.bathtub": 0.84, + "IoU.railing": 0.38860000610351564, + "IoU.cushion": 0.58, + "IoU.base": 0.26129999160766604, + "IoU.box": 0.23420000076293945, + "IoU.column": 0.4366999816894531, + "IoU.signboard": 0.3306999969482422, + "IoU.chest of drawers": 0.33930000305175784, + "IoU.counter": 0.37200000762939456, + "IoU.sand": 0.46240001678466797, + "IoU.sink": 0.6805999755859375, + "IoU.skyscraper": 0.5265999984741211, + "IoU.fireplace": 0.7305000305175782, + "IoU.refrigerator": 0.8, + "IoU.grandstand": 0.5347000122070312, + "IoU.path": 0.2538999938964844, + "IoU.stairs": 0.14859999656677247, + "IoU.runway": 0.6361999893188477, + "IoU.case": 0.5279000091552735, + "IoU.pool table": 0.9233999633789063, + "IoU.pillow": 0.5147999954223633, + "IoU.screen door": 0.702699966430664, + "IoU.stairway": 0.2772999954223633, + "IoU.river": 0.19639999389648438, + "IoU.bridge": 0.5645000076293946, + "IoU.bookcase": 0.3504999923706055, + "IoU.blind": 0.38220001220703126, + "IoU.coffee table": 0.6231999969482422, + "IoU.toilet": 0.813499984741211, + "IoU.flower": 0.32610000610351564, + "IoU.book": 0.4236000061035156, + "IoU.hill": 0.12800000190734864, + "IoU.bench": 0.4534000015258789, + "IoU.countertop": 0.5663000106811523, + "IoU.stove": 0.7401999664306641, + "IoU.palm": 0.4463999938964844, + "IoU.kitchen island": 0.4331000137329102, + "IoU.computer": 0.7501000213623047, + "IoU.swivel chair": 0.4672000122070312, + "IoU.boat": 0.6727999877929688, + "IoU.bar": 0.485, + "IoU.arcade machine": 0.34970001220703123, + "IoU.hovel": 0.3306999969482422, + "IoU.bus": 0.9069000244140625, + "IoU.towel": 0.6462000274658203, + "IoU.light": 0.3031999969482422, + "IoU.truck": 0.20200000762939452, + "IoU.tower": 0.17909999847412109, + "IoU.chandelier": 0.6063000106811524, + "IoU.awning": 0.20780000686645508, + "IoU.streetlight": 0.15119999885559082, + "IoU.booth": 0.35080001831054686, + "IoU.television receiver": 0.6601000213623047, + "IoU.airplane": 0.5720000076293945, + "IoU.dirt track": 0.08430000305175782, + "IoU.apparel": 0.3381999969482422, + "IoU.pole": 0.09649999618530274, + "IoU.land": 0.030399999618530273, + "IoU.bannister": 0.0603000020980835, + "IoU.escalator": 0.494900016784668, + "IoU.ottoman": 0.4118000030517578, + "IoU.bottle": 0.22309999465942382, + "IoU.buffet": 0.3568000030517578, + "IoU.poster": 0.13239999771118163, + "IoU.stage": 0.1402000045776367, + "IoU.van": 0.35509998321533204, + "IoU.ship": 0.07260000228881835, + "IoU.fountain": 0.20989999771118165, + "IoU.conveyer belt": 0.6968000030517578, + "IoU.canopy": 0.13880000114440919, + "IoU.washer": 0.7022000122070312, + "IoU.plaything": 0.21559999465942384, + "IoU.swimming pool": 0.7312000274658204, + "IoU.stool": 0.34709999084472654, + "IoU.barrel": 0.5775, + "IoU.basket": 0.32119998931884763, + "IoU.waterfall": 0.47939998626708985, + "IoU.tent": 0.9451999664306641, + "IoU.bag": 0.14600000381469727, + "IoU.minibike": 0.5988000106811523, + "IoU.cradle": 0.7875, + "IoU.oven": 0.26600000381469724, + "IoU.ball": 0.5043000030517578, + "IoU.food": 0.417400016784668, + "IoU.step": 0.057899999618530276, + "IoU.tank": 0.5309999847412109, + "IoU.trade name": 0.1859000015258789, + "IoU.microwave": 0.6933000183105469, + "IoU.pot": 0.4325, + "IoU.animal": 0.5666999816894531, + "IoU.bicycle": 0.49529998779296874, + "IoU.lake": 0.1381999969482422, + "IoU.dishwasher": 0.5995999908447266, + "IoU.screen": 0.5850999832153321, + "IoU.blanket": 0.10529999732971192, + "IoU.sculpture": 0.6354000091552734, + "IoU.hood": 0.47009998321533203, + "IoU.sconce": 0.27, + "IoU.vase": 0.3243999862670898, + "IoU.traffic light": 0.2128000068664551, + "IoU.tray": 0.03309999942779541, + "IoU.ashcan": 0.39610000610351564, + "IoU.fan": 0.45040000915527345, + "IoU.pier": 0.2690999984741211, + "IoU.crt screen": 0.03059999942779541, + "IoU.plate": 0.47450000762939454, + "IoU.monitor": 0.4216999816894531, + "IoU.bulletin board": 0.3003000068664551, + "IoU.shower": 0.017300000190734865, + "IoU.radiator": 0.5584000015258789, + "IoU.glass": 0.09239999771118164, + "IoU.clock": 0.19899999618530273, + "IoU.flag": 0.3884000015258789, + "Acc.wall": 0.8944000244140625, + "Acc.building": 0.9472000122070312, + "Acc.sky": 0.9830999755859375, + "Acc.floor": 0.9211000061035156, + "Acc.tree": 0.8316999816894531, + "Acc.ceiling": 0.9312999725341797, + "Acc.road": 0.9091000366210937, + "Acc.bed ": 0.9537000274658203, + "Acc.windowpane": 0.7845999908447265, + "Acc.grass": 0.8383999633789062, + "Acc.cabinet": 0.7530999755859376, + "Acc.sidewalk": 0.7909999847412109, + "Acc.person": 0.8798999786376953, + "Acc.earth": 0.5611999893188476, + "Acc.door": 0.610099983215332, + "Acc.table": 0.7762999725341797, + "Acc.mountain": 0.7368000030517579, + "Acc.plant": 0.6070000076293945, + "Acc.curtain": 0.8431999969482422, + "Acc.chair": 0.6204000091552735, + "Acc.car": 0.9044000244140625, + "Acc.water": 0.7880000305175782, + "Acc.painting": 0.8137999725341797, + "Acc.sofa": 0.8898000335693359, + "Acc.shelf": 0.6074000167846679, + "Acc.house": 0.6215999984741211, + "Acc.sea": 0.8868000030517578, + "Acc.mirror": 0.7288999938964844, + "Acc.rug": 0.6919999694824219, + "Acc.field": 0.5520000076293945, + "Acc.armchair": 0.5038000106811523, + "Acc.seat": 0.8372000122070312, + "Acc.fence": 0.5111000061035156, + "Acc.desk": 0.6304000091552734, + "Acc.rock": 0.6043999862670898, + "Acc.wardrobe": 0.6733999633789063, + "Acc.lamp": 0.6009000015258789, + "Acc.bathtub": 0.8834999847412109, + "Acc.railing": 0.535, + "Acc.cushion": 0.6797000122070312, + "Acc.base": 0.357400016784668, + "Acc.box": 0.2925, + "Acc.column": 0.5027999877929688, + "Acc.signboard": 0.4329999923706055, + "Acc.chest of drawers": 0.5111000061035156, + "Acc.counter": 0.5090999984741211, + "Acc.sand": 0.5863000106811523, + "Acc.sink": 0.7437999725341797, + "Acc.skyscraper": 0.5768000030517578, + "Acc.fireplace": 0.832699966430664, + "Acc.refrigerator": 0.8373000335693359, + "Acc.grandstand": 0.7234999847412109, + "Acc.path": 0.3288999938964844, + "Acc.stairs": 0.18469999313354493, + "Acc.runway": 0.8244000244140625, + "Acc.case": 0.7125, + "Acc.pool table": 0.9568000030517578, + "Acc.pillow": 0.5865999984741211, + "Acc.screen door": 0.7618000030517578, + "Acc.stairway": 0.44240001678466795, + "Acc.river": 0.29239999771118164, + "Acc.bridge": 0.6558000183105469, + "Acc.bookcase": 0.5190999984741211, + "Acc.blind": 0.41959999084472654, + "Acc.coffee table": 0.7581999969482421, + "Acc.toilet": 0.8677999877929687, + "Acc.flower": 0.49270000457763674, + "Acc.book": 0.590999984741211, + "Acc.hill": 0.20180000305175783, + "Acc.bench": 0.49840000152587893, + "Acc.countertop": 0.7066999816894531, + "Acc.stove": 0.7958000183105469, + "Acc.palm": 0.5513999938964844, + "Acc.kitchen island": 0.6422000122070313, + "Acc.computer": 0.8443000030517578, + "Acc.swivel chair": 0.5975, + "Acc.boat": 0.7213999938964843, + "Acc.bar": 0.5813000106811523, + "Acc.arcade machine": 0.3659999847412109, + "Acc.hovel": 0.3609000015258789, + "Acc.bus": 0.9437999725341797, + "Acc.towel": 0.7837000274658203, + "Acc.light": 0.3345999908447266, + "Acc.truck": 0.24579999923706056, + "Acc.tower": 0.24139999389648437, + "Acc.chandelier": 0.7629000091552735, + "Acc.awning": 0.22700000762939454, + "Acc.streetlight": 0.18790000915527344, + "Acc.booth": 0.36970001220703125, + "Acc.television receiver": 0.725999984741211, + "Acc.airplane": 0.6145999908447266, + "Acc.dirt track": 0.24670000076293946, + "Acc.apparel": 0.43650001525878906, + "Acc.pole": 0.11449999809265136, + "Acc.land": 0.04239999771118164, + "Acc.bannister": 0.07409999847412109, + "Acc.escalator": 0.6788999938964844, + "Acc.ottoman": 0.5366999816894531, + "Acc.bottle": 0.27799999237060546, + "Acc.buffet": 0.41569999694824217, + "Acc.poster": 0.17670000076293946, + "Acc.stage": 0.18209999084472656, + "Acc.van": 0.4186000061035156, + "Acc.ship": 0.0790999984741211, + "Acc.fountain": 0.21239999771118165, + "Acc.conveyer belt": 0.907300033569336, + "Acc.canopy": 0.14619999885559082, + "Acc.washer": 0.7213999938964843, + "Acc.plaything": 0.3120000076293945, + "Acc.swimming pool": 0.8730000305175781, + "Acc.stool": 0.4136000061035156, + "Acc.barrel": 0.6331999969482421, + "Acc.basket": 0.4134999847412109, + "Acc.waterfall": 0.5663999938964843, + "Acc.tent": 0.960999984741211, + "Acc.bag": 0.17690000534057618, + "Acc.minibike": 0.6288000106811523, + "Acc.cradle": 0.9487999725341797, + "Acc.oven": 0.3995000076293945, + "Acc.ball": 0.5825, + "Acc.food": 0.47080001831054685, + "Acc.step": 0.07429999828338624, + "Acc.tank": 0.5945999908447266, + "Acc.trade name": 0.198799991607666, + "Acc.microwave": 0.7644999694824218, + "Acc.pot": 0.4686000061035156, + "Acc.animal": 0.5838000106811524, + "Acc.bicycle": 0.5922999954223633, + "Acc.lake": 0.13920000076293945, + "Acc.dishwasher": 0.6883000183105469, + "Acc.screen": 0.7880999755859375, + "Acc.blanket": 0.12510000228881835, + "Acc.sculpture": 0.6854000091552734, + "Acc.hood": 0.5825, + "Acc.sconce": 0.3103000068664551, + "Acc.vase": 0.4027000045776367, + "Acc.traffic light": 0.268700008392334, + "Acc.tray": 0.044800000190734865, + "Acc.ashcan": 0.504900016784668, + "Acc.fan": 0.502400016784668, + "Acc.pier": 0.3777000045776367, + "Acc.crt screen": 0.05239999771118164, + "Acc.plate": 0.5908000183105468, + "Acc.monitor": 0.5036999893188476, + "Acc.bulletin board": 0.369900016784668, + "Acc.shower": 0.05, + "Acc.radiator": 0.6241999816894531, + "Acc.glass": 0.09779999732971191, + "Acc.clock": 0.2140999984741211, + "Acc.flag": 0.4236000061035156 + } + }, + "114": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8153, + "mIoU": 0.4495, + "mAcc": 0.5625, + "IoU.wall": 0.7527999877929688, + "IoU.building": 0.8141000366210938, + "IoU.sky": 0.936500015258789, + "IoU.floor": 0.7991000366210937, + "IoU.tree": 0.7366999816894532, + "IoU.ceiling": 0.8187999725341797, + "IoU.road": 0.8237999725341797, + "IoU.bed ": 0.8612999725341797, + "IoU.windowpane": 0.5990999984741211, + "IoU.grass": 0.6687999725341797, + "IoU.cabinet": 0.5806000137329101, + "IoU.sidewalk": 0.6272999954223633, + "IoU.person": 0.7620999908447266, + "IoU.earth": 0.34720001220703123, + "IoU.door": 0.4111000061035156, + "IoU.table": 0.5281000137329102, + "IoU.mountain": 0.5695000076293946, + "IoU.plant": 0.49290000915527343, + "IoU.curtain": 0.7102999877929688, + "IoU.chair": 0.509000015258789, + "IoU.car": 0.8262999725341796, + "IoU.water": 0.5122999954223633, + "IoU.painting": 0.6737000274658204, + "IoU.sofa": 0.6281999969482421, + "IoU.shelf": 0.4216999816894531, + "IoU.house": 0.487400016784668, + "IoU.sea": 0.6008000183105469, + "IoU.mirror": 0.6254000091552734, + "IoU.rug": 0.6054999923706055, + "IoU.field": 0.27790000915527346, + "IoU.armchair": 0.37970001220703126, + "IoU.seat": 0.6190000152587891, + "IoU.fence": 0.31510000228881835, + "IoU.desk": 0.4759000015258789, + "IoU.rock": 0.44209999084472656, + "IoU.wardrobe": 0.4834999847412109, + "IoU.lamp": 0.5218999862670899, + "IoU.bathtub": 0.8080999755859375, + "IoU.railing": 0.31489999771118166, + "IoU.cushion": 0.5143999862670898, + "IoU.base": 0.271200008392334, + "IoU.box": 0.23600000381469727, + "IoU.column": 0.45549999237060546, + "IoU.signboard": 0.3171999931335449, + "IoU.chest of drawers": 0.3468000030517578, + "IoU.counter": 0.29700000762939455, + "IoU.sand": 0.40330001831054685, + "IoU.sink": 0.6431999969482421, + "IoU.skyscraper": 0.5522000122070313, + "IoU.fireplace": 0.7327999877929687, + "IoU.refrigerator": 0.7413999938964844, + "IoU.grandstand": 0.5079999923706054, + "IoU.path": 0.26190000534057617, + "IoU.stairs": 0.2560000038146973, + "IoU.runway": 0.5993999862670898, + "IoU.case": 0.5416999816894531, + "IoU.pool table": 0.9209999847412109, + "IoU.pillow": 0.5243000030517578, + "IoU.screen door": 0.5116999816894531, + "IoU.stairway": 0.27969999313354493, + "IoU.river": 0.28639999389648435, + "IoU.bridge": 0.6188000106811523, + "IoU.bookcase": 0.347400016784668, + "IoU.blind": 0.36189998626708986, + "IoU.coffee table": 0.5731999969482422, + "IoU.toilet": 0.805199966430664, + "IoU.flower": 0.2797999954223633, + "IoU.book": 0.4431999969482422, + "IoU.hill": 0.08699999809265137, + "IoU.bench": 0.4968000030517578, + "IoU.countertop": 0.5247000122070312, + "IoU.stove": 0.6741000366210937, + "IoU.palm": 0.46060001373291015, + "IoU.kitchen island": 0.35529998779296873, + "IoU.computer": 0.6313000106811524, + "IoU.swivel chair": 0.49209999084472655, + "IoU.boat": 0.6983999633789062, + "IoU.bar": 0.4741999816894531, + "IoU.arcade machine": 0.3422999954223633, + "IoU.hovel": 0.35139999389648435, + "IoU.bus": 0.8118000030517578, + "IoU.towel": 0.5554000091552734, + "IoU.light": 0.2478000068664551, + "IoU.truck": 0.23309999465942383, + "IoU.tower": 0.3270000076293945, + "IoU.chandelier": 0.6054000091552735, + "IoU.awning": 0.3218000030517578, + "IoU.streetlight": 0.13189999580383302, + "IoU.booth": 0.39310001373291015, + "IoU.television receiver": 0.597599983215332, + "IoU.airplane": 0.575999984741211, + "IoU.dirt track": 0.1718000030517578, + "IoU.apparel": 0.30450000762939455, + "IoU.pole": 0.13670000076293945, + "IoU.land": 0.0056999999284744265, + "IoU.bannister": 0.06360000133514404, + "IoU.escalator": 0.21809999465942384, + "IoU.ottoman": 0.4290999984741211, + "IoU.bottle": 0.2834000015258789, + "IoU.buffet": 0.4079000091552734, + "IoU.poster": 0.17659999847412108, + "IoU.stage": 0.18190000534057618, + "IoU.van": 0.4077000045776367, + "IoU.ship": 0.6763999938964844, + "IoU.fountain": 0.18290000915527344, + "IoU.conveyer belt": 0.7, + "IoU.canopy": 0.22309999465942382, + "IoU.washer": 0.7231999969482422, + "IoU.plaything": 0.24809999465942384, + "IoU.swimming pool": 0.7258999633789063, + "IoU.stool": 0.2567000007629395, + "IoU.barrel": 0.4184999847412109, + "IoU.basket": 0.18920000076293944, + "IoU.waterfall": 0.7637000274658203, + "IoU.tent": 0.8979000091552735, + "IoU.bag": 0.07389999866485596, + "IoU.minibike": 0.5229999923706055, + "IoU.cradle": 0.7483000183105468, + "IoU.oven": 0.23760000228881836, + "IoU.ball": 0.3756999969482422, + "IoU.food": 0.46810001373291016, + "IoU.step": 0.07210000038146973, + "IoU.tank": 0.5433000183105469, + "IoU.trade name": 0.2503000068664551, + "IoU.microwave": 0.37529998779296875, + "IoU.pot": 0.3536000061035156, + "IoU.animal": 0.5631999969482422, + "IoU.bicycle": 0.4268000030517578, + "IoU.lake": 0.30440000534057615, + "IoU.dishwasher": 0.484900016784668, + "IoU.screen": 0.6043000030517578, + "IoU.blanket": 0.03210000038146973, + "IoU.sculpture": 0.44799999237060545, + "IoU.hood": 0.4190999984741211, + "IoU.sconce": 0.3434000015258789, + "IoU.vase": 0.261200008392334, + "IoU.traffic light": 0.2290999984741211, + "IoU.tray": 0.028399999141693114, + "IoU.ashcan": 0.32360000610351564, + "IoU.fan": 0.4425, + "IoU.pier": 0.3452999877929688, + "IoU.crt screen": 0.029000000953674318, + "IoU.plate": 0.47509998321533203, + "IoU.monitor": 0.032400000095367434, + "IoU.bulletin board": 0.34080001831054685, + "IoU.shower": 0.0056999999284744265, + "IoU.radiator": 0.523499984741211, + "IoU.glass": 0.04989999771118164, + "IoU.clock": 0.203700008392334, + "IoU.flag": 0.3065999984741211, + "Acc.wall": 0.8827999877929688, + "Acc.building": 0.9213999938964844, + "Acc.sky": 0.9756999969482422, + "Acc.floor": 0.9012000274658203, + "Acc.tree": 0.870999984741211, + "Acc.ceiling": 0.9079000091552735, + "Acc.road": 0.8961000061035156, + "Acc.bed ": 0.9395999908447266, + "Acc.windowpane": 0.7504000091552734, + "Acc.grass": 0.815, + "Acc.cabinet": 0.6822000122070313, + "Acc.sidewalk": 0.7712000274658203, + "Acc.person": 0.9037999725341797, + "Acc.earth": 0.4922999954223633, + "Acc.door": 0.5679000091552734, + "Acc.table": 0.6894000244140625, + "Acc.mountain": 0.6872000122070312, + "Acc.plant": 0.6204000091552735, + "Acc.curtain": 0.8241000366210938, + "Acc.chair": 0.6329000091552734, + "Acc.car": 0.9230999755859375, + "Acc.water": 0.6508999633789062, + "Acc.painting": 0.8319000244140625, + "Acc.sofa": 0.7983999633789063, + "Acc.shelf": 0.6413999938964844, + "Acc.house": 0.6268000030517578, + "Acc.sea": 0.8908999633789062, + "Acc.mirror": 0.7130000305175781, + "Acc.rug": 0.6893000030517578, + "Acc.field": 0.47700000762939454, + "Acc.armchair": 0.5733000183105469, + "Acc.seat": 0.8137000274658203, + "Acc.fence": 0.4079000091552734, + "Acc.desk": 0.6616999816894531, + "Acc.rock": 0.6491000366210937, + "Acc.wardrobe": 0.5979999923706054, + "Acc.lamp": 0.6522000122070313, + "Acc.bathtub": 0.8765000152587891, + "Acc.railing": 0.4581999969482422, + "Acc.cushion": 0.6680999755859375, + "Acc.base": 0.46900001525878904, + "Acc.box": 0.3338999938964844, + "Acc.column": 0.5579000091552735, + "Acc.signboard": 0.4166999816894531, + "Acc.chest of drawers": 0.6122999954223632, + "Acc.counter": 0.40439998626708984, + "Acc.sand": 0.599900016784668, + "Acc.sink": 0.7394999694824219, + "Acc.skyscraper": 0.6630999755859375, + "Acc.fireplace": 0.9066000366210938, + "Acc.refrigerator": 0.8619999694824219, + "Acc.grandstand": 0.6629000091552735, + "Acc.path": 0.3440999984741211, + "Acc.stairs": 0.3256999969482422, + "Acc.runway": 0.7716000366210938, + "Acc.case": 0.7108000183105468, + "Acc.pool table": 0.9688999938964844, + "Acc.pillow": 0.6437999725341796, + "Acc.screen door": 0.6106999969482422, + "Acc.stairway": 0.41330001831054686, + "Acc.river": 0.5065000152587891, + "Acc.bridge": 0.7811000061035156, + "Acc.bookcase": 0.567400016784668, + "Acc.blind": 0.4343000030517578, + "Acc.coffee table": 0.7730999755859375, + "Acc.toilet": 0.8831999969482421, + "Acc.flower": 0.44779998779296876, + "Acc.book": 0.6108000183105469, + "Acc.hill": 0.13539999961853028, + "Acc.bench": 0.5952999877929688, + "Acc.countertop": 0.6866999816894531, + "Acc.stove": 0.7672000122070313, + "Acc.palm": 0.6218000030517579, + "Acc.kitchen island": 0.5911999893188477, + "Acc.computer": 0.7302999877929688, + "Acc.swivel chair": 0.6816000366210937, + "Acc.boat": 0.8584999847412109, + "Acc.bar": 0.5958000183105469, + "Acc.arcade machine": 0.3665999984741211, + "Acc.hovel": 0.5118999862670899, + "Acc.bus": 0.9113999938964844, + "Acc.towel": 0.6944999694824219, + "Acc.light": 0.27299999237060546, + "Acc.truck": 0.2926000022888184, + "Acc.tower": 0.40819999694824216, + "Acc.chandelier": 0.7668000030517578, + "Acc.awning": 0.40439998626708984, + "Acc.streetlight": 0.15029999732971192, + "Acc.booth": 0.45799999237060546, + "Acc.television receiver": 0.7251000213623047, + "Acc.airplane": 0.6415000152587891, + "Acc.dirt track": 0.18569999694824219, + "Acc.apparel": 0.4672999954223633, + "Acc.pole": 0.17329999923706055, + "Acc.land": 0.008500000238418579, + "Acc.bannister": 0.08369999885559082, + "Acc.escalator": 0.23629999160766602, + "Acc.ottoman": 0.5468999862670898, + "Acc.bottle": 0.3640999984741211, + "Acc.buffet": 0.45549999237060546, + "Acc.poster": 0.206200008392334, + "Acc.stage": 0.3054999923706055, + "Acc.van": 0.48720001220703124, + "Acc.ship": 0.7588999938964843, + "Acc.fountain": 0.20450000762939452, + "Acc.conveyer belt": 0.8848000335693359, + "Acc.canopy": 0.37040000915527344, + "Acc.washer": 0.7381999969482422, + "Acc.plaything": 0.42509998321533204, + "Acc.swimming pool": 0.8070999908447266, + "Acc.stool": 0.35700000762939454, + "Acc.barrel": 0.6061999893188477, + "Acc.basket": 0.2368000030517578, + "Acc.waterfall": 0.8380999755859375, + "Acc.tent": 0.9869999694824219, + "Acc.bag": 0.090600004196167, + "Acc.minibike": 0.625099983215332, + "Acc.cradle": 0.9741999816894531, + "Acc.oven": 0.46919998168945315, + "Acc.ball": 0.3984000015258789, + "Acc.food": 0.5652999877929688, + "Acc.step": 0.0925, + "Acc.tank": 0.630099983215332, + "Acc.trade name": 0.31170000076293947, + "Acc.microwave": 0.4240999984741211, + "Acc.pot": 0.395, + "Acc.animal": 0.5981000137329101, + "Acc.bicycle": 0.6825, + "Acc.lake": 0.30540000915527343, + "Acc.dishwasher": 0.6701000213623047, + "Acc.screen": 0.935, + "Acc.blanket": 0.035799999237060544, + "Acc.sculpture": 0.6379999923706055, + "Acc.hood": 0.45729999542236327, + "Acc.sconce": 0.42389999389648436, + "Acc.vase": 0.32729999542236327, + "Acc.traffic light": 0.33490001678466796, + "Acc.tray": 0.03519999980926514, + "Acc.ashcan": 0.45110000610351564, + "Acc.fan": 0.5465000152587891, + "Acc.pier": 0.47709999084472654, + "Acc.crt screen": 0.090600004196167, + "Acc.plate": 0.6545999908447265, + "Acc.monitor": 0.049099998474121095, + "Acc.bulletin board": 0.48580001831054687, + "Acc.shower": 0.03799999952316284, + "Acc.radiator": 0.5877000045776367, + "Acc.glass": 0.05150000095367432, + "Acc.clock": 0.22450000762939454, + "Acc.flag": 0.33740001678466797 + } + }, + "115": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8217, + "mIoU": 0.46130000000000004, + "mAcc": 0.5764, + "IoU.wall": 0.7563999938964844, + "IoU.building": 0.8195999908447266, + "IoU.sky": 0.9355999755859375, + "IoU.floor": 0.8031999969482422, + "IoU.tree": 0.7438999938964844, + "IoU.ceiling": 0.8273000335693359, + "IoU.road": 0.8233000183105469, + "IoU.bed ": 0.8627999877929687, + "IoU.windowpane": 0.6106000137329102, + "IoU.grass": 0.709800033569336, + "IoU.cabinet": 0.5831000137329102, + "IoU.sidewalk": 0.6451000213623047, + "IoU.person": 0.7747000122070312, + "IoU.earth": 0.3763999938964844, + "IoU.door": 0.467599983215332, + "IoU.table": 0.5422999954223633, + "IoU.mountain": 0.5866999816894531, + "IoU.plant": 0.5066999816894531, + "IoU.curtain": 0.7090000152587891, + "IoU.chair": 0.5131000137329101, + "IoU.car": 0.8234999847412109, + "IoU.water": 0.5399000167846679, + "IoU.painting": 0.6737000274658204, + "IoU.sofa": 0.6437999725341796, + "IoU.shelf": 0.4234000015258789, + "IoU.house": 0.5704000091552734, + "IoU.sea": 0.6313000106811524, + "IoU.mirror": 0.6395000076293945, + "IoU.rug": 0.634900016784668, + "IoU.field": 0.26209999084472657, + "IoU.armchair": 0.40439998626708984, + "IoU.seat": 0.6234999847412109, + "IoU.fence": 0.34900001525878904, + "IoU.desk": 0.4452000045776367, + "IoU.rock": 0.47400001525878904, + "IoU.wardrobe": 0.5436000061035157, + "IoU.lamp": 0.513499984741211, + "IoU.bathtub": 0.7787999725341797, + "IoU.railing": 0.34630001068115235, + "IoU.cushion": 0.5202000045776367, + "IoU.base": 0.2959000015258789, + "IoU.box": 0.20479999542236327, + "IoU.column": 0.4704000091552734, + "IoU.signboard": 0.33189998626708983, + "IoU.chest of drawers": 0.3591999816894531, + "IoU.counter": 0.3758000183105469, + "IoU.sand": 0.45630001068115233, + "IoU.sink": 0.6662999725341797, + "IoU.skyscraper": 0.5368999862670898, + "IoU.fireplace": 0.7316000366210937, + "IoU.refrigerator": 0.7731999969482422, + "IoU.grandstand": 0.4918000030517578, + "IoU.path": 0.21200000762939453, + "IoU.stairs": 0.27329999923706055, + "IoU.runway": 0.6159000015258789, + "IoU.case": 0.5738000106811524, + "IoU.pool table": 0.9233000183105469, + "IoU.pillow": 0.5306000137329101, + "IoU.screen door": 0.6297000122070312, + "IoU.stairway": 0.4154000091552734, + "IoU.river": 0.19559999465942382, + "IoU.bridge": 0.6268000030517578, + "IoU.bookcase": 0.3334000015258789, + "IoU.blind": 0.4077000045776367, + "IoU.coffee table": 0.5643999862670899, + "IoU.toilet": 0.7963999938964844, + "IoU.flower": 0.3113999938964844, + "IoU.book": 0.44240001678466795, + "IoU.hill": 0.08770000457763671, + "IoU.bench": 0.4443999862670898, + "IoU.countertop": 0.5165999984741211, + "IoU.stove": 0.6819999694824219, + "IoU.palm": 0.42709999084472655, + "IoU.kitchen island": 0.390099983215332, + "IoU.computer": 0.6022999954223632, + "IoU.swivel chair": 0.4645999908447266, + "IoU.boat": 0.7108000183105468, + "IoU.bar": 0.5779000091552734, + "IoU.arcade machine": 0.3413999938964844, + "IoU.hovel": 0.3359000015258789, + "IoU.bus": 0.8766000366210938, + "IoU.towel": 0.5620000076293945, + "IoU.light": 0.3018000030517578, + "IoU.truck": 0.22940000534057617, + "IoU.tower": 0.32229999542236326, + "IoU.chandelier": 0.5845999908447266, + "IoU.awning": 0.22309999465942382, + "IoU.streetlight": 0.12409999847412109, + "IoU.booth": 0.445, + "IoU.television receiver": 0.6122000122070312, + "IoU.airplane": 0.5579999923706055, + "IoU.dirt track": 0.049200000762939455, + "IoU.apparel": 0.3065999984741211, + "IoU.pole": 0.125, + "IoU.land": 0.0010000000149011613, + "IoU.bannister": 0.05989999771118164, + "IoU.escalator": 0.5388000106811524, + "IoU.ottoman": 0.43979999542236325, + "IoU.bottle": 0.3209000015258789, + "IoU.buffet": 0.37509998321533206, + "IoU.poster": 0.09359999656677247, + "IoU.stage": 0.15539999961853027, + "IoU.van": 0.3895000076293945, + "IoU.ship": 0.6991999816894531, + "IoU.fountain": 0.18729999542236328, + "IoU.conveyer belt": 0.6716000366210938, + "IoU.canopy": 0.2168000030517578, + "IoU.washer": 0.7206999969482422, + "IoU.plaything": 0.24579999923706056, + "IoU.swimming pool": 0.7530000305175781, + "IoU.stool": 0.23959999084472655, + "IoU.barrel": 0.520099983215332, + "IoU.basket": 0.2256999969482422, + "IoU.waterfall": 0.7294999694824219, + "IoU.tent": 0.8381999969482422, + "IoU.bag": 0.10170000076293945, + "IoU.minibike": 0.6462999725341797, + "IoU.cradle": 0.7473000335693359, + "IoU.oven": 0.20299999237060548, + "IoU.ball": 0.35900001525878905, + "IoU.food": 0.505, + "IoU.step": 0.07019999980926514, + "IoU.tank": 0.5695999908447266, + "IoU.trade name": 0.25420000076293947, + "IoU.microwave": 0.41139999389648435, + "IoU.pot": 0.4006999969482422, + "IoU.animal": 0.577599983215332, + "IoU.bicycle": 0.5236000061035156, + "IoU.lake": 0.3890999984741211, + "IoU.dishwasher": 0.4943000030517578, + "IoU.screen": 0.569900016784668, + "IoU.blanket": 0.04860000133514404, + "IoU.sculpture": 0.5063999938964844, + "IoU.hood": 0.5011000061035156, + "IoU.sconce": 0.33119998931884764, + "IoU.vase": 0.256200008392334, + "IoU.traffic light": 0.21350000381469728, + "IoU.tray": 0.02869999885559082, + "IoU.ashcan": 0.3547999954223633, + "IoU.fan": 0.4468999862670898, + "IoU.pier": 0.3218000030517578, + "IoU.crt screen": 0.014600000381469726, + "IoU.plate": 0.4606999969482422, + "IoU.monitor": 0.11270000457763672, + "IoU.bulletin board": 0.3795000076293945, + "IoU.shower": 0.0044999998807907105, + "IoU.radiator": 0.537599983215332, + "IoU.glass": 0.06380000114440917, + "IoU.clock": 0.1390999984741211, + "IoU.flag": 0.34310001373291016, + "Acc.wall": 0.8747000122070312, + "Acc.building": 0.9258999633789062, + "Acc.sky": 0.9758999633789063, + "Acc.floor": 0.9026000213623047, + "Acc.tree": 0.8727999877929687, + "Acc.ceiling": 0.9183999633789063, + "Acc.road": 0.897300033569336, + "Acc.bed ": 0.9425, + "Acc.windowpane": 0.7661000061035156, + "Acc.grass": 0.8302999877929688, + "Acc.cabinet": 0.6883000183105469, + "Acc.sidewalk": 0.7830000305175782, + "Acc.person": 0.9081999969482422, + "Acc.earth": 0.5493000030517579, + "Acc.door": 0.6320000076293946, + "Acc.table": 0.6987000274658203, + "Acc.mountain": 0.7241999816894531, + "Acc.plant": 0.6272999954223633, + "Acc.curtain": 0.8306999969482421, + "Acc.chair": 0.6363000106811524, + "Acc.car": 0.9208999633789062, + "Acc.water": 0.6990000152587891, + "Acc.painting": 0.8455000305175782, + "Acc.sofa": 0.8098999786376954, + "Acc.shelf": 0.6505000305175781, + "Acc.house": 0.6759999847412109, + "Acc.sea": 0.8625, + "Acc.mirror": 0.7355000305175782, + "Acc.rug": 0.6973999786376953, + "Acc.field": 0.4218000030517578, + "Acc.armchair": 0.6091999816894531, + "Acc.seat": 0.8097000122070312, + "Acc.fence": 0.47450000762939454, + "Acc.desk": 0.6602999877929687, + "Acc.rock": 0.6762999725341797, + "Acc.wardrobe": 0.6687000274658204, + "Acc.lamp": 0.6454000091552734, + "Acc.bathtub": 0.8852999877929687, + "Acc.railing": 0.4893000030517578, + "Acc.cushion": 0.6766999816894531, + "Acc.base": 0.44549999237060545, + "Acc.box": 0.28959999084472654, + "Acc.column": 0.5793000030517578, + "Acc.signboard": 0.42889999389648437, + "Acc.chest of drawers": 0.6297999954223633, + "Acc.counter": 0.4986999893188477, + "Acc.sand": 0.6143000030517578, + "Acc.sink": 0.7287999725341797, + "Acc.skyscraper": 0.6206999969482422, + "Acc.fireplace": 0.8883000183105468, + "Acc.refrigerator": 0.8437000274658203, + "Acc.grandstand": 0.6983000183105469, + "Acc.path": 0.28920000076293945, + "Acc.stairs": 0.3845000076293945, + "Acc.runway": 0.7906999969482422, + "Acc.case": 0.7090000152587891, + "Acc.pool table": 0.9654000091552735, + "Acc.pillow": 0.6275999832153321, + "Acc.screen door": 0.7184999847412109, + "Acc.stairway": 0.5397000122070312, + "Acc.river": 0.3243000030517578, + "Acc.bridge": 0.7551999664306641, + "Acc.bookcase": 0.5522999954223633, + "Acc.blind": 0.4709000015258789, + "Acc.coffee table": 0.7901000213623047, + "Acc.toilet": 0.8919000244140625, + "Acc.flower": 0.4859000015258789, + "Acc.book": 0.6072000122070312, + "Acc.hill": 0.13840000152587892, + "Acc.bench": 0.5561000061035156, + "Acc.countertop": 0.7016000366210937, + "Acc.stove": 0.7795999908447265, + "Acc.palm": 0.6295000076293945, + "Acc.kitchen island": 0.6708999633789062, + "Acc.computer": 0.7301000213623047, + "Acc.swivel chair": 0.6579000091552735, + "Acc.boat": 0.8429000091552734, + "Acc.bar": 0.6683000183105469, + "Acc.arcade machine": 0.3672999954223633, + "Acc.hovel": 0.49340000152587893, + "Acc.bus": 0.9219999694824219, + "Acc.towel": 0.7112000274658203, + "Acc.light": 0.3340000152587891, + "Acc.truck": 0.29520000457763673, + "Acc.tower": 0.40880001068115235, + "Acc.chandelier": 0.7608999633789062, + "Acc.awning": 0.278799991607666, + "Acc.streetlight": 0.14350000381469727, + "Acc.booth": 0.4631999969482422, + "Acc.television receiver": 0.7455000305175781, + "Acc.airplane": 0.6336999893188476, + "Acc.dirt track": 0.18350000381469728, + "Acc.apparel": 0.44220001220703126, + "Acc.pole": 0.16040000915527344, + "Acc.land": 0.0012999999523162842, + "Acc.bannister": 0.07489999771118164, + "Acc.escalator": 0.7180000305175781, + "Acc.ottoman": 0.586500015258789, + "Acc.bottle": 0.4515999984741211, + "Acc.buffet": 0.41639999389648436, + "Acc.poster": 0.11319999694824219, + "Acc.stage": 0.2740999984741211, + "Acc.van": 0.47509998321533203, + "Acc.ship": 0.8487999725341797, + "Acc.fountain": 0.21309999465942384, + "Acc.conveyer belt": 0.8833999633789062, + "Acc.canopy": 0.36900001525878906, + "Acc.washer": 0.7333000183105469, + "Acc.plaything": 0.3490999984741211, + "Acc.swimming pool": 0.8586000061035156, + "Acc.stool": 0.27760000228881837, + "Acc.barrel": 0.6448000335693359, + "Acc.basket": 0.2769000053405762, + "Acc.waterfall": 0.8352999877929688, + "Acc.tent": 0.9862999725341797, + "Acc.bag": 0.120600004196167, + "Acc.minibike": 0.7608999633789062, + "Acc.cradle": 0.9666999816894531, + "Acc.oven": 0.5270000076293946, + "Acc.ball": 0.37439998626708987, + "Acc.food": 0.6181999969482422, + "Acc.step": 0.08109999656677246, + "Acc.tank": 0.6469999694824219, + "Acc.trade name": 0.2929000091552734, + "Acc.microwave": 0.46880001068115235, + "Acc.pot": 0.46380001068115234, + "Acc.animal": 0.6222999954223633, + "Acc.bicycle": 0.6904000091552734, + "Acc.lake": 0.41819999694824217, + "Acc.dishwasher": 0.6755000305175781, + "Acc.screen": 0.8433999633789062, + "Acc.blanket": 0.05389999866485596, + "Acc.sculpture": 0.6386000061035156, + "Acc.hood": 0.5931999969482422, + "Acc.sconce": 0.43090000152587893, + "Acc.vase": 0.33630001068115234, + "Acc.traffic light": 0.3325, + "Acc.tray": 0.046799998283386234, + "Acc.ashcan": 0.4891999816894531, + "Acc.fan": 0.5520000076293945, + "Acc.pier": 0.5609999847412109, + "Acc.crt screen": 0.03720000028610229, + "Acc.plate": 0.6284999847412109, + "Acc.monitor": 0.195, + "Acc.bulletin board": 0.5656000137329101, + "Acc.shower": 0.014600000381469726, + "Acc.radiator": 0.6448999786376953, + "Acc.glass": 0.06590000152587891, + "Acc.clock": 0.15640000343322755, + "Acc.flag": 0.36700000762939455 + } + }, + "116": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8247, + "mIoU": 0.45909999999999995, + "mAcc": 0.5704, + "IoU.wall": 0.7670999908447266, + "IoU.building": 0.8258000183105468, + "IoU.sky": 0.9376999664306641, + "IoU.floor": 0.8062000274658203, + "IoU.tree": 0.7470999908447266, + "IoU.ceiling": 0.8316999816894531, + "IoU.road": 0.8258000183105468, + "IoU.bed ": 0.8711000061035157, + "IoU.windowpane": 0.6136999893188476, + "IoU.grass": 0.7051000213623047, + "IoU.cabinet": 0.5958000183105469, + "IoU.sidewalk": 0.6412000274658203, + "IoU.person": 0.7841000366210937, + "IoU.earth": 0.3591999816894531, + "IoU.door": 0.48529998779296873, + "IoU.table": 0.5543000030517579, + "IoU.mountain": 0.5756000137329101, + "IoU.plant": 0.5288999938964843, + "IoU.curtain": 0.7140000152587891, + "IoU.chair": 0.5356000137329101, + "IoU.car": 0.8308000183105468, + "IoU.water": 0.5718000030517578, + "IoU.painting": 0.6797000122070312, + "IoU.sofa": 0.647300033569336, + "IoU.shelf": 0.4059000015258789, + "IoU.house": 0.5225, + "IoU.sea": 0.6841000366210938, + "IoU.mirror": 0.6481999969482422, + "IoU.rug": 0.6168000030517579, + "IoU.field": 0.2915999984741211, + "IoU.armchair": 0.42939998626708986, + "IoU.seat": 0.6068999862670899, + "IoU.fence": 0.42450000762939455, + "IoU.desk": 0.47209999084472654, + "IoU.rock": 0.45810001373291015, + "IoU.wardrobe": 0.5413000106811523, + "IoU.lamp": 0.5347000122070312, + "IoU.bathtub": 0.7951000213623047, + "IoU.railing": 0.3295999908447266, + "IoU.cushion": 0.542400016784668, + "IoU.base": 0.28459999084472654, + "IoU.box": 0.2140999984741211, + "IoU.column": 0.4383000183105469, + "IoU.signboard": 0.3252000045776367, + "IoU.chest of drawers": 0.33990001678466797, + "IoU.counter": 0.36389999389648436, + "IoU.sand": 0.37799999237060544, + "IoU.sink": 0.6483000183105468, + "IoU.skyscraper": 0.5377000045776367, + "IoU.fireplace": 0.7208000183105469, + "IoU.refrigerator": 0.7673999786376953, + "IoU.grandstand": 0.505, + "IoU.path": 0.25790000915527345, + "IoU.stairs": 0.2504999923706055, + "IoU.runway": 0.5738000106811524, + "IoU.case": 0.5752999877929688, + "IoU.pool table": 0.9254000091552734, + "IoU.pillow": 0.5195000076293945, + "IoU.screen door": 0.6566999816894531, + "IoU.stairway": 0.3043000030517578, + "IoU.river": 0.19489999771118163, + "IoU.bridge": 0.6268999862670899, + "IoU.bookcase": 0.3425, + "IoU.blind": 0.40049999237060546, + "IoU.coffee table": 0.5593999862670899, + "IoU.toilet": 0.7562000274658203, + "IoU.flower": 0.3308000183105469, + "IoU.book": 0.4243000030517578, + "IoU.hill": 0.1340999984741211, + "IoU.bench": 0.42830001831054687, + "IoU.countertop": 0.5713000106811523, + "IoU.stove": 0.6770999908447266, + "IoU.palm": 0.45549999237060546, + "IoU.kitchen island": 0.43290000915527344, + "IoU.computer": 0.6104999923706055, + "IoU.swivel chair": 0.5054000091552734, + "IoU.boat": 0.7033999633789062, + "IoU.bar": 0.5208000183105469, + "IoU.arcade machine": 0.4340999984741211, + "IoU.hovel": 0.42470001220703124, + "IoU.bus": 0.862699966430664, + "IoU.towel": 0.5702999877929688, + "IoU.light": 0.29100000381469726, + "IoU.truck": 0.19809999465942382, + "IoU.tower": 0.2638999938964844, + "IoU.chandelier": 0.6097999954223633, + "IoU.awning": 0.22260000228881835, + "IoU.streetlight": 0.13279999732971193, + "IoU.booth": 0.45560001373291015, + "IoU.television receiver": 0.6537999725341797, + "IoU.airplane": 0.5802999877929688, + "IoU.dirt track": 0.02259999990463257, + "IoU.apparel": 0.31979999542236326, + "IoU.pole": 0.14989999771118165, + "IoU.land": 0.03880000114440918, + "IoU.bannister": 0.08100000381469727, + "IoU.escalator": 0.5138999938964843, + "IoU.ottoman": 0.45610000610351564, + "IoU.bottle": 0.332599983215332, + "IoU.buffet": 0.30670000076293946, + "IoU.poster": 0.15479999542236328, + "IoU.stage": 0.19200000762939454, + "IoU.van": 0.3843000030517578, + "IoU.ship": 0.256200008392334, + "IoU.fountain": 0.20610000610351562, + "IoU.conveyer belt": 0.6562999725341797, + "IoU.canopy": 0.2340999984741211, + "IoU.washer": 0.6820999908447266, + "IoU.plaything": 0.23569999694824217, + "IoU.swimming pool": 0.6898999786376954, + "IoU.stool": 0.21059999465942383, + "IoU.barrel": 0.48189998626708985, + "IoU.basket": 0.2556999969482422, + "IoU.waterfall": 0.785, + "IoU.tent": 0.9109999847412109, + "IoU.bag": 0.10380000114440918, + "IoU.minibike": 0.5916999816894531, + "IoU.cradle": 0.7576000213623046, + "IoU.oven": 0.4640000152587891, + "IoU.ball": 0.24450000762939453, + "IoU.food": 0.5588999938964844, + "IoU.step": 0.055500001907348634, + "IoU.tank": 0.5622999954223633, + "IoU.trade name": 0.27149999618530274, + "IoU.microwave": 0.6679000091552735, + "IoU.pot": 0.3659999847412109, + "IoU.animal": 0.5370999908447266, + "IoU.bicycle": 0.5125999832153321, + "IoU.lake": 0.05380000114440918, + "IoU.dishwasher": 0.4741999816894531, + "IoU.screen": 0.46380001068115234, + "IoU.blanket": 0.05989999771118164, + "IoU.sculpture": 0.5261999893188477, + "IoU.hood": 0.5418999862670898, + "IoU.sconce": 0.2996999931335449, + "IoU.vase": 0.27170000076293943, + "IoU.traffic light": 0.22719999313354491, + "IoU.tray": 0.011299999952316285, + "IoU.ashcan": 0.3502000045776367, + "IoU.fan": 0.4190999984741211, + "IoU.pier": 0.33610000610351565, + "IoU.crt screen": 0.004699999988079071, + "IoU.plate": 0.4665999984741211, + "IoU.monitor": 0.12180000305175781, + "IoU.bulletin board": 0.18190000534057618, + "IoU.shower": 0.018799999952316283, + "IoU.radiator": 0.46450000762939453, + "IoU.glass": 0.09619999885559082, + "IoU.clock": 0.198799991607666, + "IoU.flag": 0.3258000183105469, + "Acc.wall": 0.8847000122070312, + "Acc.building": 0.9216999816894531, + "Acc.sky": 0.9743000030517578, + "Acc.floor": 0.9048000335693359, + "Acc.tree": 0.8718000030517579, + "Acc.ceiling": 0.9219999694824219, + "Acc.road": 0.8998999786376953, + "Acc.bed ": 0.9433999633789063, + "Acc.windowpane": 0.765199966430664, + "Acc.grass": 0.8301000213623047, + "Acc.cabinet": 0.6994999694824219, + "Acc.sidewalk": 0.7838999938964843, + "Acc.person": 0.9098000335693359, + "Acc.earth": 0.544099998474121, + "Acc.door": 0.6581999969482422, + "Acc.table": 0.7170999908447265, + "Acc.mountain": 0.6930999755859375, + "Acc.plant": 0.6509999847412109, + "Acc.curtain": 0.8337000274658203, + "Acc.chair": 0.6712999725341797, + "Acc.car": 0.9226999664306641, + "Acc.water": 0.7498000335693359, + "Acc.painting": 0.8393000030517578, + "Acc.sofa": 0.7916000366210938, + "Acc.shelf": 0.5975, + "Acc.house": 0.7126000213623047, + "Acc.sea": 0.8690000152587891, + "Acc.mirror": 0.7444000244140625, + "Acc.rug": 0.7019000244140625, + "Acc.field": 0.44349998474121094, + "Acc.armchair": 0.659000015258789, + "Acc.seat": 0.7748999786376953, + "Acc.fence": 0.5768999862670898, + "Acc.desk": 0.686500015258789, + "Acc.rock": 0.6679000091552735, + "Acc.wardrobe": 0.6852999877929687, + "Acc.lamp": 0.6672000122070313, + "Acc.bathtub": 0.8891000366210937, + "Acc.railing": 0.47150001525878904, + "Acc.cushion": 0.7081999969482422, + "Acc.base": 0.5016999816894532, + "Acc.box": 0.2880999946594238, + "Acc.column": 0.5668999862670898, + "Acc.signboard": 0.4311000061035156, + "Acc.chest of drawers": 0.5761999893188476, + "Acc.counter": 0.4840999984741211, + "Acc.sand": 0.5129000091552735, + "Acc.sink": 0.7245999908447266, + "Acc.skyscraper": 0.6295999908447265, + "Acc.fireplace": 0.8827999877929688, + "Acc.refrigerator": 0.8683000183105469, + "Acc.grandstand": 0.7169000244140625, + "Acc.path": 0.3477999877929687, + "Acc.stairs": 0.3295000076293945, + "Acc.runway": 0.7388999938964844, + "Acc.case": 0.7291999816894531, + "Acc.pool table": 0.9661000061035157, + "Acc.pillow": 0.6293000030517578, + "Acc.screen door": 0.7273999786376953, + "Acc.stairway": 0.40560001373291016, + "Acc.river": 0.3171999931335449, + "Acc.bridge": 0.7462999725341797, + "Acc.bookcase": 0.5916999816894531, + "Acc.blind": 0.4615000152587891, + "Acc.coffee table": 0.7755000305175781, + "Acc.toilet": 0.8861000061035156, + "Acc.flower": 0.5352999877929687, + "Acc.book": 0.5756999969482421, + "Acc.hill": 0.23340000152587892, + "Acc.bench": 0.5327999877929688, + "Acc.countertop": 0.7355999755859375, + "Acc.stove": 0.7875, + "Acc.palm": 0.6483000183105468, + "Acc.kitchen island": 0.7062000274658203, + "Acc.computer": 0.7369000244140625, + "Acc.swivel chair": 0.6648999786376953, + "Acc.boat": 0.8241000366210938, + "Acc.bar": 0.6268999862670899, + "Acc.arcade machine": 0.4856000137329102, + "Acc.hovel": 0.49189998626708986, + "Acc.bus": 0.9115000152587891, + "Acc.towel": 0.709800033569336, + "Acc.light": 0.3129999923706055, + "Acc.truck": 0.2680999946594238, + "Acc.tower": 0.3745000076293945, + "Acc.chandelier": 0.7818000030517578, + "Acc.awning": 0.28709999084472654, + "Acc.streetlight": 0.1540999984741211, + "Acc.booth": 0.5059000015258789, + "Acc.television receiver": 0.7606999969482422, + "Acc.airplane": 0.663499984741211, + "Acc.dirt track": 0.03369999885559082, + "Acc.apparel": 0.44599998474121094, + "Acc.pole": 0.19530000686645507, + "Acc.land": 0.05300000190734863, + "Acc.bannister": 0.11859999656677246, + "Acc.escalator": 0.7236000061035156, + "Acc.ottoman": 0.6170000076293946, + "Acc.bottle": 0.5727999877929687, + "Acc.buffet": 0.3390999984741211, + "Acc.poster": 0.20149999618530273, + "Acc.stage": 0.3111000061035156, + "Acc.van": 0.4606999969482422, + "Acc.ship": 0.286299991607666, + "Acc.fountain": 0.21110000610351562, + "Acc.conveyer belt": 0.8876000213623046, + "Acc.canopy": 0.35150001525878904, + "Acc.washer": 0.734800033569336, + "Acc.plaything": 0.37650001525878907, + "Acc.swimming pool": 0.8313999938964843, + "Acc.stool": 0.24579999923706056, + "Acc.barrel": 0.6236000061035156, + "Acc.basket": 0.3071999931335449, + "Acc.waterfall": 0.8680999755859375, + "Acc.tent": 0.9881999969482422, + "Acc.bag": 0.11609999656677246, + "Acc.minibike": 0.6616000366210938, + "Acc.cradle": 0.969800033569336, + "Acc.oven": 0.5788999938964844, + "Acc.ball": 0.2543000030517578, + "Acc.food": 0.6516999816894531, + "Acc.step": 0.06710000038146972, + "Acc.tank": 0.6444000244140625, + "Acc.trade name": 0.30840000152587893, + "Acc.microwave": 0.7491999816894531, + "Acc.pot": 0.4190999984741211, + "Acc.animal": 0.5983000183105469, + "Acc.bicycle": 0.7044999694824219, + "Acc.lake": 0.05460000038146973, + "Acc.dishwasher": 0.5734999847412109, + "Acc.screen": 0.765199966430664, + "Acc.blanket": 0.06539999961853027, + "Acc.sculpture": 0.6611000061035156, + "Acc.hood": 0.6409999847412109, + "Acc.sconce": 0.38119998931884763, + "Acc.vase": 0.35959999084472655, + "Acc.traffic light": 0.33779998779296877, + "Acc.tray": 0.014500000476837159, + "Acc.ashcan": 0.49130001068115237, + "Acc.fan": 0.5045000076293945, + "Acc.pier": 0.47220001220703123, + "Acc.crt screen": 0.013899999856948852, + "Acc.plate": 0.6211000061035157, + "Acc.monitor": 0.21069999694824218, + "Acc.bulletin board": 0.26860000610351564, + "Acc.shower": 0.055199999809265134, + "Acc.radiator": 0.5022000122070313, + "Acc.glass": 0.10189999580383301, + "Acc.clock": 0.21719999313354493, + "Acc.flag": 0.35630001068115236 + } + }, + "117": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8256, + "mIoU": 0.4667, + "mAcc": 0.5726, + "IoU.wall": 0.7672000122070313, + "IoU.building": 0.8244000244140625, + "IoU.sky": 0.9366999816894531, + "IoU.floor": 0.7975, + "IoU.tree": 0.7466000366210938, + "IoU.ceiling": 0.8283000183105469, + "IoU.road": 0.8258000183105468, + "IoU.bed ": 0.8748000335693359, + "IoU.windowpane": 0.606500015258789, + "IoU.grass": 0.7011000061035156, + "IoU.cabinet": 0.6045999908447266, + "IoU.sidewalk": 0.6418000030517578, + "IoU.person": 0.7894000244140625, + "IoU.earth": 0.3791999816894531, + "IoU.door": 0.48150001525878905, + "IoU.table": 0.5697999954223633, + "IoU.mountain": 0.5641999816894532, + "IoU.plant": 0.5172999954223633, + "IoU.curtain": 0.7181999969482422, + "IoU.chair": 0.539000015258789, + "IoU.car": 0.8320999908447265, + "IoU.water": 0.5395000076293945, + "IoU.painting": 0.6877999877929688, + "IoU.sofa": 0.6547000122070312, + "IoU.shelf": 0.4134000015258789, + "IoU.house": 0.5179999923706055, + "IoU.sea": 0.64, + "IoU.mirror": 0.6598999786376953, + "IoU.rug": 0.6286999893188476, + "IoU.field": 0.3313999938964844, + "IoU.armchair": 0.42020000457763673, + "IoU.seat": 0.6186999893188476, + "IoU.fence": 0.4297999954223633, + "IoU.desk": 0.482400016784668, + "IoU.rock": 0.46669998168945315, + "IoU.wardrobe": 0.5231000137329102, + "IoU.lamp": 0.5393000030517578, + "IoU.bathtub": 0.8362000274658203, + "IoU.railing": 0.3406999969482422, + "IoU.cushion": 0.5586000061035157, + "IoU.base": 0.29329999923706057, + "IoU.box": 0.19920000076293945, + "IoU.column": 0.45849998474121095, + "IoU.signboard": 0.344900016784668, + "IoU.chest of drawers": 0.3768000030517578, + "IoU.counter": 0.3840999984741211, + "IoU.sand": 0.3738999938964844, + "IoU.sink": 0.6630000305175782, + "IoU.skyscraper": 0.535, + "IoU.fireplace": 0.7283999633789062, + "IoU.refrigerator": 0.7631999969482421, + "IoU.grandstand": 0.475, + "IoU.path": 0.26239999771118167, + "IoU.stairs": 0.25360000610351563, + "IoU.runway": 0.5720000076293945, + "IoU.case": 0.5822999954223633, + "IoU.pool table": 0.9295999908447266, + "IoU.pillow": 0.5309999847412109, + "IoU.screen door": 0.6615000152587891, + "IoU.stairway": 0.32529998779296876, + "IoU.river": 0.16049999237060547, + "IoU.bridge": 0.6711000061035156, + "IoU.bookcase": 0.36470001220703124, + "IoU.blind": 0.41259998321533203, + "IoU.coffee table": 0.5822999954223633, + "IoU.toilet": 0.7730000305175782, + "IoU.flower": 0.31739999771118166, + "IoU.book": 0.40950000762939454, + "IoU.hill": 0.07139999866485595, + "IoU.bench": 0.415, + "IoU.countertop": 0.5884999847412109, + "IoU.stove": 0.7204000091552735, + "IoU.palm": 0.4765999984741211, + "IoU.kitchen island": 0.40330001831054685, + "IoU.computer": 0.5991999816894531, + "IoU.swivel chair": 0.4768000030517578, + "IoU.boat": 0.7255000305175782, + "IoU.bar": 0.46299999237060546, + "IoU.arcade machine": 0.42759998321533205, + "IoU.hovel": 0.2904000091552734, + "IoU.bus": 0.8927999877929688, + "IoU.towel": 0.625099983215332, + "IoU.light": 0.25129999160766603, + "IoU.truck": 0.16870000839233398, + "IoU.tower": 0.258700008392334, + "IoU.chandelier": 0.6161999893188477, + "IoU.awning": 0.2297999954223633, + "IoU.streetlight": 0.138100004196167, + "IoU.booth": 0.3925, + "IoU.television receiver": 0.6505999755859375, + "IoU.airplane": 0.565, + "IoU.dirt track": 0.004900000095367432, + "IoU.apparel": 0.32369998931884764, + "IoU.pole": 0.12579999923706053, + "IoU.land": 0.057699999809265136, + "IoU.bannister": 0.03319999933242798, + "IoU.escalator": 0.4231999969482422, + "IoU.ottoman": 0.4779999923706055, + "IoU.bottle": 0.3443000030517578, + "IoU.buffet": 0.49639999389648437, + "IoU.poster": 0.2184000015258789, + "IoU.stage": 0.151899995803833, + "IoU.van": 0.40970001220703123, + "IoU.ship": 0.5247000122070312, + "IoU.fountain": 0.21069999694824218, + "IoU.conveyer belt": 0.6926000213623047, + "IoU.canopy": 0.21040000915527343, + "IoU.washer": 0.7197000122070313, + "IoU.plaything": 0.27850000381469725, + "IoU.swimming pool": 0.7608000183105469, + "IoU.stool": 0.29389999389648436, + "IoU.barrel": 0.4766999816894531, + "IoU.basket": 0.2427000045776367, + "IoU.waterfall": 0.7455999755859375, + "IoU.tent": 0.8901000213623047, + "IoU.bag": 0.09529999732971191, + "IoU.minibike": 0.6975, + "IoU.cradle": 0.7897000122070312, + "IoU.oven": 0.4002000045776367, + "IoU.ball": 0.3631999969482422, + "IoU.food": 0.5054000091552734, + "IoU.step": 0.05389999866485596, + "IoU.tank": 0.5634000015258789, + "IoU.trade name": 0.25409999847412107, + "IoU.microwave": 0.7837999725341797, + "IoU.pot": 0.39610000610351564, + "IoU.animal": 0.5711000061035156, + "IoU.bicycle": 0.5729999923706055, + "IoU.lake": 0.003799999952316284, + "IoU.dishwasher": 0.5277999877929688, + "IoU.screen": 0.5681000137329102, + "IoU.blanket": 0.11800000190734863, + "IoU.sculpture": 0.6140999984741211, + "IoU.hood": 0.45360000610351564, + "IoU.sconce": 0.2627000045776367, + "IoU.vase": 0.2865999984741211, + "IoU.traffic light": 0.22370000839233398, + "IoU.tray": 0.016399999856948854, + "IoU.ashcan": 0.33189998626708983, + "IoU.fan": 0.3661999893188477, + "IoU.pier": 0.24559999465942384, + "IoU.crt screen": 0.003799999952316284, + "IoU.plate": 0.49209999084472655, + "IoU.monitor": 0.14699999809265138, + "IoU.bulletin board": 0.2984000015258789, + "IoU.shower": 0.009200000166893006, + "IoU.radiator": 0.5020999908447266, + "IoU.glass": 0.08170000076293946, + "IoU.clock": 0.22049999237060547, + "IoU.flag": 0.38369998931884763, + "Acc.wall": 0.8923999786376953, + "Acc.building": 0.9212000274658203, + "Acc.sky": 0.9768000030517578, + "Acc.floor": 0.9, + "Acc.tree": 0.8725, + "Acc.ceiling": 0.9193000030517579, + "Acc.road": 0.8966999816894531, + "Acc.bed ": 0.9525, + "Acc.windowpane": 0.7512999725341797, + "Acc.grass": 0.8194999694824219, + "Acc.cabinet": 0.7187000274658203, + "Acc.sidewalk": 0.7880000305175782, + "Acc.person": 0.9072000122070313, + "Acc.earth": 0.5520999908447266, + "Acc.door": 0.6437999725341796, + "Acc.table": 0.725, + "Acc.mountain": 0.6797000122070312, + "Acc.plant": 0.660199966430664, + "Acc.curtain": 0.8344000244140625, + "Acc.chair": 0.6743000030517579, + "Acc.car": 0.9222000122070313, + "Acc.water": 0.7055000305175781, + "Acc.painting": 0.82, + "Acc.sofa": 0.8145999908447266, + "Acc.shelf": 0.5952000045776367, + "Acc.house": 0.7106999969482422, + "Acc.sea": 0.8183000183105469, + "Acc.mirror": 0.7419999694824219, + "Acc.rug": 0.7030000305175781, + "Acc.field": 0.5018999862670899, + "Acc.armchair": 0.610999984741211, + "Acc.seat": 0.8194999694824219, + "Acc.fence": 0.5756000137329101, + "Acc.desk": 0.6987999725341797, + "Acc.rock": 0.6776999664306641, + "Acc.wardrobe": 0.6795999908447266, + "Acc.lamp": 0.6566000366210938, + "Acc.bathtub": 0.9038999938964843, + "Acc.railing": 0.47630001068115235, + "Acc.cushion": 0.7284999847412109, + "Acc.base": 0.5125999832153321, + "Acc.box": 0.25360000610351563, + "Acc.column": 0.5734000015258789, + "Acc.signboard": 0.4545999908447266, + "Acc.chest of drawers": 0.5559999847412109, + "Acc.counter": 0.5131000137329101, + "Acc.sand": 0.5022999954223633, + "Acc.sink": 0.7584999847412109, + "Acc.skyscraper": 0.6293000030517578, + "Acc.fireplace": 0.8920999908447266, + "Acc.refrigerator": 0.8433000183105469, + "Acc.grandstand": 0.750999984741211, + "Acc.path": 0.3609000015258789, + "Acc.stairs": 0.34599998474121096, + "Acc.runway": 0.7347000122070313, + "Acc.case": 0.7270999908447265, + "Acc.pool table": 0.9594999694824219, + "Acc.pillow": 0.6052999877929688, + "Acc.screen door": 0.7240000152587891, + "Acc.stairway": 0.4229000091552734, + "Acc.river": 0.3745000076293945, + "Acc.bridge": 0.7912999725341797, + "Acc.bookcase": 0.6236000061035156, + "Acc.blind": 0.47580001831054686, + "Acc.coffee table": 0.7466999816894532, + "Acc.toilet": 0.8833999633789062, + "Acc.flower": 0.4988999938964844, + "Acc.book": 0.5495999908447265, + "Acc.hill": 0.10960000038146972, + "Acc.bench": 0.5083000183105468, + "Acc.countertop": 0.7383000183105469, + "Acc.stove": 0.7969000244140625, + "Acc.palm": 0.6641999816894532, + "Acc.kitchen island": 0.5620000076293945, + "Acc.computer": 0.7229000091552734, + "Acc.swivel chair": 0.610999984741211, + "Acc.boat": 0.8120999908447266, + "Acc.bar": 0.5634000015258789, + "Acc.arcade machine": 0.46849998474121096, + "Acc.hovel": 0.3445999908447266, + "Acc.bus": 0.941500015258789, + "Acc.towel": 0.7484999847412109, + "Acc.light": 0.2625, + "Acc.truck": 0.22549999237060547, + "Acc.tower": 0.3725, + "Acc.chandelier": 0.7551999664306641, + "Acc.awning": 0.28280000686645507, + "Acc.streetlight": 0.1625, + "Acc.booth": 0.44979999542236326, + "Acc.television receiver": 0.7351999664306641, + "Acc.airplane": 0.6505000305175781, + "Acc.dirt track": 0.008399999737739562, + "Acc.apparel": 0.4375, + "Acc.pole": 0.16110000610351563, + "Acc.land": 0.07309999942779541, + "Acc.bannister": 0.043600001335144044, + "Acc.escalator": 0.5220999908447266, + "Acc.ottoman": 0.6136000061035156, + "Acc.bottle": 0.5652000045776367, + "Acc.buffet": 0.5236000061035156, + "Acc.poster": 0.34709999084472654, + "Acc.stage": 0.25540000915527344, + "Acc.van": 0.4791999816894531, + "Acc.ship": 0.5520000076293945, + "Acc.fountain": 0.21299999237060546, + "Acc.conveyer belt": 0.9002999877929687, + "Acc.canopy": 0.3163999938964844, + "Acc.washer": 0.7468000030517579, + "Acc.plaything": 0.43130001068115237, + "Acc.swimming pool": 0.8523000335693359, + "Acc.stool": 0.332599983215332, + "Acc.barrel": 0.6368999862670899, + "Acc.basket": 0.30559999465942383, + "Acc.waterfall": 0.8283000183105469, + "Acc.tent": 0.9812000274658204, + "Acc.bag": 0.11149999618530274, + "Acc.minibike": 0.782300033569336, + "Acc.cradle": 0.9619000244140625, + "Acc.oven": 0.45540000915527346, + "Acc.ball": 0.3833000183105469, + "Acc.food": 0.6145000076293945, + "Acc.step": 0.07019999980926514, + "Acc.tank": 0.6047999954223633, + "Acc.trade name": 0.283799991607666, + "Acc.microwave": 0.8836000061035156, + "Acc.pot": 0.44619998931884763, + "Acc.animal": 0.6108000183105469, + "Acc.bicycle": 0.6873999786376953, + "Acc.lake": 0.0038999998569488525, + "Acc.dishwasher": 0.6645999908447265, + "Acc.screen": 0.8801999664306641, + "Acc.blanket": 0.12789999961853027, + "Acc.sculpture": 0.7691000366210937, + "Acc.hood": 0.5984000015258789, + "Acc.sconce": 0.3281999969482422, + "Acc.vase": 0.3908000183105469, + "Acc.traffic light": 0.33220001220703127, + "Acc.tray": 0.01990000009536743, + "Acc.ashcan": 0.46130001068115234, + "Acc.fan": 0.4118000030517578, + "Acc.pier": 0.42529998779296874, + "Acc.crt screen": 0.008199999928474427, + "Acc.plate": 0.6445999908447265, + "Acc.monitor": 0.265, + "Acc.bulletin board": 0.40919998168945315, + "Acc.shower": 0.05, + "Acc.radiator": 0.5466999816894531, + "Acc.glass": 0.08520000457763671, + "Acc.clock": 0.2340999984741211, + "Acc.flag": 0.4184999847412109 + } + }, + "118": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8251999999999999, + "mIoU": 0.46149999999999997, + "mAcc": 0.5608, + "IoU.wall": 0.7637000274658203, + "IoU.building": 0.8241999816894531, + "IoU.sky": 0.9362999725341797, + "IoU.floor": 0.7988999938964844, + "IoU.tree": 0.7443000030517578, + "IoU.ceiling": 0.8298999786376953, + "IoU.road": 0.83, + "IoU.bed ": 0.8833999633789062, + "IoU.windowpane": 0.6047000122070313, + "IoU.grass": 0.6925, + "IoU.cabinet": 0.6138999938964844, + "IoU.sidewalk": 0.6441000366210937, + "IoU.person": 0.7904000091552734, + "IoU.earth": 0.3636000061035156, + "IoU.door": 0.48020000457763673, + "IoU.table": 0.5766999816894531, + "IoU.mountain": 0.5702000045776368, + "IoU.plant": 0.5145999908447265, + "IoU.curtain": 0.7144000244140625, + "IoU.chair": 0.5252000045776367, + "IoU.car": 0.8330999755859375, + "IoU.water": 0.5384000015258789, + "IoU.painting": 0.6983000183105469, + "IoU.sofa": 0.6594999694824218, + "IoU.shelf": 0.41580001831054686, + "IoU.house": 0.47970001220703123, + "IoU.sea": 0.6345000076293945, + "IoU.mirror": 0.6623999786376953, + "IoU.rug": 0.6188999938964844, + "IoU.field": 0.3015999984741211, + "IoU.armchair": 0.4186000061035156, + "IoU.seat": 0.6388000106811523, + "IoU.fence": 0.3731999969482422, + "IoU.desk": 0.5084999847412109, + "IoU.rock": 0.43810001373291013, + "IoU.wardrobe": 0.5008000183105469, + "IoU.lamp": 0.5468000030517578, + "IoU.bathtub": 0.8351999664306641, + "IoU.railing": 0.36, + "IoU.cushion": 0.5618000030517578, + "IoU.base": 0.3078000068664551, + "IoU.box": 0.21260000228881837, + "IoU.column": 0.4622000122070313, + "IoU.signboard": 0.3452999877929688, + "IoU.chest of drawers": 0.35770000457763673, + "IoU.counter": 0.37439998626708987, + "IoU.sand": 0.41119998931884766, + "IoU.sink": 0.6701000213623047, + "IoU.skyscraper": 0.5488999938964844, + "IoU.fireplace": 0.7388999938964844, + "IoU.refrigerator": 0.768499984741211, + "IoU.grandstand": 0.4768000030517578, + "IoU.path": 0.24950000762939453, + "IoU.stairs": 0.19190000534057616, + "IoU.runway": 0.5920000076293945, + "IoU.case": 0.5606000137329101, + "IoU.pool table": 0.9212000274658203, + "IoU.pillow": 0.5381000137329102, + "IoU.screen door": 0.720999984741211, + "IoU.stairway": 0.2865999984741211, + "IoU.river": 0.19170000076293944, + "IoU.bridge": 0.5195000076293945, + "IoU.bookcase": 0.3622999954223633, + "IoU.blind": 0.3975, + "IoU.coffee table": 0.5972999954223632, + "IoU.toilet": 0.8129000091552734, + "IoU.flower": 0.29450000762939454, + "IoU.book": 0.4388000106811523, + "IoU.hill": 0.0584000015258789, + "IoU.bench": 0.4040999984741211, + "IoU.countertop": 0.576500015258789, + "IoU.stove": 0.6916999816894531, + "IoU.palm": 0.49630001068115237, + "IoU.kitchen island": 0.41700000762939454, + "IoU.computer": 0.6163000106811524, + "IoU.swivel chair": 0.44729999542236326, + "IoU.boat": 0.7301000213623047, + "IoU.bar": 0.409900016784668, + "IoU.arcade machine": 0.5127999877929688, + "IoU.hovel": 0.21540000915527344, + "IoU.bus": 0.8833000183105468, + "IoU.towel": 0.5884999847412109, + "IoU.light": 0.24700000762939453, + "IoU.truck": 0.1843000030517578, + "IoU.tower": 0.1834000015258789, + "IoU.chandelier": 0.6329999923706054, + "IoU.awning": 0.21760000228881837, + "IoU.streetlight": 0.1397999954223633, + "IoU.booth": 0.36389999389648436, + "IoU.television receiver": 0.6780000305175782, + "IoU.airplane": 0.5602999877929687, + "IoU.dirt track": 0.20469999313354492, + "IoU.apparel": 0.35939998626708985, + "IoU.pole": 0.116899995803833, + "IoU.land": 0.034200000762939456, + "IoU.bannister": 0.03589999914169312, + "IoU.escalator": 0.567400016784668, + "IoU.ottoman": 0.4538999938964844, + "IoU.bottle": 0.33529998779296877, + "IoU.buffet": 0.42150001525878905, + "IoU.poster": 0.18799999237060547, + "IoU.stage": 0.17120000839233399, + "IoU.van": 0.42270000457763673, + "IoU.ship": 0.08829999923706054, + "IoU.fountain": 0.11239999771118164, + "IoU.conveyer belt": 0.6781999969482422, + "IoU.canopy": 0.21360000610351562, + "IoU.washer": 0.7252999877929688, + "IoU.plaything": 0.26110000610351564, + "IoU.swimming pool": 0.7809999847412109, + "IoU.stool": 0.33880001068115234, + "IoU.barrel": 0.5497000122070312, + "IoU.basket": 0.23989999771118165, + "IoU.waterfall": 0.504900016784668, + "IoU.tent": 0.8905000305175781, + "IoU.bag": 0.10850000381469727, + "IoU.minibike": 0.6827999877929688, + "IoU.cradle": 0.7904000091552734, + "IoU.oven": 0.19329999923706054, + "IoU.ball": 0.45240001678466796, + "IoU.food": 0.48709999084472655, + "IoU.step": 0.08170000076293946, + "IoU.tank": 0.5452000045776367, + "IoU.trade name": 0.2525, + "IoU.microwave": 0.504000015258789, + "IoU.pot": 0.3906999969482422, + "IoU.animal": 0.5727000045776367, + "IoU.bicycle": 0.5693000030517578, + "IoU.lake": 0.12760000228881835, + "IoU.dishwasher": 0.575999984741211, + "IoU.screen": 0.5590999984741211, + "IoU.blanket": 0.06579999923706055, + "IoU.sculpture": 0.6090999984741211, + "IoU.hood": 0.4804000091552734, + "IoU.sconce": 0.2659000015258789, + "IoU.vase": 0.3, + "IoU.traffic light": 0.2306999969482422, + "IoU.tray": 0.034500000476837156, + "IoU.ashcan": 0.36580001831054687, + "IoU.fan": 0.3777000045776367, + "IoU.pier": 0.2306999969482422, + "IoU.crt screen": 0.01440000057220459, + "IoU.plate": 0.5095000076293945, + "IoU.monitor": 0.40599998474121096, + "IoU.bulletin board": 0.35569999694824217, + "IoU.shower": 0.03640000104904175, + "IoU.radiator": 0.4956999969482422, + "IoU.glass": 0.04429999828338623, + "IoU.clock": 0.18260000228881837, + "IoU.flag": 0.30889999389648437, + "Acc.wall": 0.9012000274658203, + "Acc.building": 0.9255999755859375, + "Acc.sky": 0.9769000244140625, + "Acc.floor": 0.9087000274658203, + "Acc.tree": 0.8776000213623046, + "Acc.ceiling": 0.9268000030517578, + "Acc.road": 0.9095999908447265, + "Acc.bed ": 0.9529000091552734, + "Acc.windowpane": 0.7423999786376954, + "Acc.grass": 0.8229000091552734, + "Acc.cabinet": 0.725, + "Acc.sidewalk": 0.7738999938964843, + "Acc.person": 0.9083999633789063, + "Acc.earth": 0.5238999938964843, + "Acc.door": 0.6247000122070312, + "Acc.table": 0.7330000305175781, + "Acc.mountain": 0.7120999908447265, + "Acc.plant": 0.6365999984741211, + "Acc.curtain": 0.8304000091552735, + "Acc.chair": 0.6372999954223633, + "Acc.car": 0.92, + "Acc.water": 0.7106999969482422, + "Acc.painting": 0.8230999755859375, + "Acc.sofa": 0.8568000030517579, + "Acc.shelf": 0.6143999862670898, + "Acc.house": 0.6136999893188476, + "Acc.sea": 0.8120999908447266, + "Acc.mirror": 0.7308999633789063, + "Acc.rug": 0.7093000030517578, + "Acc.field": 0.47720001220703123, + "Acc.armchair": 0.5938999938964844, + "Acc.seat": 0.8238999938964844, + "Acc.fence": 0.5025, + "Acc.desk": 0.7025, + "Acc.rock": 0.5831000137329102, + "Acc.wardrobe": 0.6036000061035156, + "Acc.lamp": 0.6404000091552734, + "Acc.bathtub": 0.9062000274658203, + "Acc.railing": 0.4988000106811523, + "Acc.cushion": 0.6983000183105469, + "Acc.base": 0.48069999694824217, + "Acc.box": 0.26979999542236327, + "Acc.column": 0.5633000183105469, + "Acc.signboard": 0.42950000762939455, + "Acc.chest of drawers": 0.5413999938964844, + "Acc.counter": 0.5159999847412109, + "Acc.sand": 0.5829000091552734, + "Acc.sink": 0.7525, + "Acc.skyscraper": 0.6209000015258789, + "Acc.fireplace": 0.8522000122070312, + "Acc.refrigerator": 0.8184999847412109, + "Acc.grandstand": 0.7341000366210938, + "Acc.path": 0.35369998931884766, + "Acc.stairs": 0.25479999542236326, + "Acc.runway": 0.7597000122070312, + "Acc.case": 0.7338999938964844, + "Acc.pool table": 0.9583999633789062, + "Acc.pillow": 0.6202000045776367, + "Acc.screen door": 0.8079000091552735, + "Acc.stairway": 0.42830001831054687, + "Acc.river": 0.43279998779296874, + "Acc.bridge": 0.587599983215332, + "Acc.bookcase": 0.6, + "Acc.blind": 0.46240001678466797, + "Acc.coffee table": 0.7523999786376954, + "Acc.toilet": 0.8819999694824219, + "Acc.flower": 0.44849998474121094, + "Acc.book": 0.5688999938964844, + "Acc.hill": 0.08369999885559082, + "Acc.bench": 0.4752999877929687, + "Acc.countertop": 0.7080000305175781, + "Acc.stove": 0.7755000305175781, + "Acc.palm": 0.6758000183105469, + "Acc.kitchen island": 0.5804000091552735, + "Acc.computer": 0.7113999938964843, + "Acc.swivel chair": 0.6127000045776367, + "Acc.boat": 0.8037999725341797, + "Acc.bar": 0.46549999237060546, + "Acc.arcade machine": 0.550099983215332, + "Acc.hovel": 0.2275, + "Acc.bus": 0.9266999816894531, + "Acc.towel": 0.7223000335693359, + "Acc.light": 0.2643000030517578, + "Acc.truck": 0.23370000839233399, + "Acc.tower": 0.316200008392334, + "Acc.chandelier": 0.7783000183105468, + "Acc.awning": 0.24850000381469728, + "Acc.streetlight": 0.16430000305175782, + "Acc.booth": 0.38610000610351564, + "Acc.television receiver": 0.745, + "Acc.airplane": 0.6302000045776367, + "Acc.dirt track": 0.2781999969482422, + "Acc.apparel": 0.4868000030517578, + "Acc.pole": 0.1452000045776367, + "Acc.land": 0.041399998664855955, + "Acc.bannister": 0.042600002288818356, + "Acc.escalator": 0.7175, + "Acc.ottoman": 0.5718999862670898, + "Acc.bottle": 0.5045000076293945, + "Acc.buffet": 0.47900001525878905, + "Acc.poster": 0.3502000045776367, + "Acc.stage": 0.23030000686645508, + "Acc.van": 0.49259998321533205, + "Acc.ship": 0.09289999961853028, + "Acc.fountain": 0.11350000381469727, + "Acc.conveyer belt": 0.8948000335693359, + "Acc.canopy": 0.2578000068664551, + "Acc.washer": 0.7395999908447266, + "Acc.plaything": 0.3479000091552734, + "Acc.swimming pool": 0.8783000183105468, + "Acc.stool": 0.3813999938964844, + "Acc.barrel": 0.6466999816894531, + "Acc.basket": 0.29139999389648436, + "Acc.waterfall": 0.5795999908447266, + "Acc.tent": 0.9755000305175782, + "Acc.bag": 0.12390000343322755, + "Acc.minibike": 0.7787000274658203, + "Acc.cradle": 0.9558999633789063, + "Acc.oven": 0.4284999847412109, + "Acc.ball": 0.48639999389648436, + "Acc.food": 0.5679000091552734, + "Acc.step": 0.09970000267028808, + "Acc.tank": 0.638400001525879, + "Acc.trade name": 0.2815999984741211, + "Acc.microwave": 0.5577999877929688, + "Acc.pot": 0.4284000015258789, + "Acc.animal": 0.5972000122070312, + "Acc.bicycle": 0.6659999847412109, + "Acc.lake": 0.13020000457763672, + "Acc.dishwasher": 0.645, + "Acc.screen": 0.7404000091552735, + "Acc.blanket": 0.0696999979019165, + "Acc.sculpture": 0.6969000244140625, + "Acc.hood": 0.6402999877929687, + "Acc.sconce": 0.3213999938964844, + "Acc.vase": 0.365, + "Acc.traffic light": 0.3258000183105469, + "Acc.tray": 0.04300000190734863, + "Acc.ashcan": 0.5047999954223633, + "Acc.fan": 0.42779998779296874, + "Acc.pier": 0.40189998626708984, + "Acc.crt screen": 0.021500000953674315, + "Acc.plate": 0.632400016784668, + "Acc.monitor": 0.6569999694824219, + "Acc.bulletin board": 0.42470001220703124, + "Acc.shower": 0.047699999809265134, + "Acc.radiator": 0.5368999862670898, + "Acc.glass": 0.04559999942779541, + "Acc.clock": 0.19190000534057616, + "Acc.flag": 0.3270000076293945 + } + }, + "119": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8228, + "mIoU": 0.45289999999999997, + "mAcc": 0.5423, + "IoU.wall": 0.7620999908447266, + "IoU.building": 0.8173999786376953, + "IoU.sky": 0.9290000152587891, + "IoU.floor": 0.7918000030517578, + "IoU.tree": 0.7281999969482422, + "IoU.ceiling": 0.8266000366210937, + "IoU.road": 0.8211000061035156, + "IoU.bed ": 0.8761000061035156, + "IoU.windowpane": 0.6079000091552734, + "IoU.grass": 0.6804000091552734, + "IoU.cabinet": 0.6172000122070312, + "IoU.sidewalk": 0.6356000137329102, + "IoU.person": 0.7887999725341797, + "IoU.earth": 0.37509998321533206, + "IoU.door": 0.4622000122070313, + "IoU.table": 0.5718999862670898, + "IoU.mountain": 0.5634000015258789, + "IoU.plant": 0.5156999969482422, + "IoU.curtain": 0.7095999908447266, + "IoU.chair": 0.5202000045776367, + "IoU.car": 0.8295999908447266, + "IoU.water": 0.5543000030517579, + "IoU.painting": 0.6937999725341797, + "IoU.sofa": 0.6493000030517578, + "IoU.shelf": 0.41819999694824217, + "IoU.house": 0.43529998779296875, + "IoU.sea": 0.6504000091552734, + "IoU.mirror": 0.6455999755859375, + "IoU.rug": 0.6079000091552734, + "IoU.field": 0.3046999931335449, + "IoU.armchair": 0.4102999877929687, + "IoU.seat": 0.6136999893188476, + "IoU.fence": 0.3686000061035156, + "IoU.desk": 0.4943000030517578, + "IoU.rock": 0.4463999938964844, + "IoU.wardrobe": 0.525, + "IoU.lamp": 0.5229999923706055, + "IoU.bathtub": 0.8263999938964843, + "IoU.railing": 0.3522999954223633, + "IoU.cushion": 0.5386000061035157, + "IoU.base": 0.28049999237060547, + "IoU.box": 0.22239999771118163, + "IoU.column": 0.44459999084472657, + "IoU.signboard": 0.32029998779296875, + "IoU.chest of drawers": 0.3791999816894531, + "IoU.counter": 0.3865999984741211, + "IoU.sand": 0.4490000152587891, + "IoU.sink": 0.6665000152587891, + "IoU.skyscraper": 0.5309000015258789, + "IoU.fireplace": 0.7301999664306641, + "IoU.refrigerator": 0.7280999755859375, + "IoU.grandstand": 0.4622000122070313, + "IoU.path": 0.25760000228881835, + "IoU.stairs": 0.12939999580383302, + "IoU.runway": 0.5888999938964844, + "IoU.case": 0.5086000061035156, + "IoU.pool table": 0.9233000183105469, + "IoU.pillow": 0.47200000762939454, + "IoU.screen door": 0.714800033569336, + "IoU.stairway": 0.2543000030517578, + "IoU.river": 0.1477000045776367, + "IoU.bridge": 0.6593000030517578, + "IoU.bookcase": 0.3288999938964844, + "IoU.blind": 0.41049999237060547, + "IoU.coffee table": 0.5984000015258789, + "IoU.toilet": 0.8186000061035156, + "IoU.flower": 0.3192000007629395, + "IoU.book": 0.40799999237060547, + "IoU.hill": 0.050399999618530276, + "IoU.bench": 0.40650001525878904, + "IoU.countertop": 0.5668999862670898, + "IoU.stove": 0.6880999755859375, + "IoU.palm": 0.44729999542236326, + "IoU.kitchen island": 0.44740001678466795, + "IoU.computer": 0.617599983215332, + "IoU.swivel chair": 0.4697000122070312, + "IoU.boat": 0.7195999908447266, + "IoU.bar": 0.45060001373291014, + "IoU.arcade machine": 0.3406999969482422, + "IoU.hovel": 0.15289999961853026, + "IoU.bus": 0.8733999633789062, + "IoU.towel": 0.5979000091552734, + "IoU.light": 0.24700000762939453, + "IoU.truck": 0.15989999771118163, + "IoU.tower": 0.163799991607666, + "IoU.chandelier": 0.5961999893188477, + "IoU.awning": 0.20120000839233398, + "IoU.streetlight": 0.14640000343322754, + "IoU.booth": 0.35639999389648436, + "IoU.television receiver": 0.6706999969482422, + "IoU.airplane": 0.5645999908447266, + "IoU.dirt track": 0.16959999084472657, + "IoU.apparel": 0.313700008392334, + "IoU.pole": 0.0703000020980835, + "IoU.land": 0.06940000057220459, + "IoU.bannister": 0.06, + "IoU.escalator": 0.485, + "IoU.ottoman": 0.40130001068115234, + "IoU.bottle": 0.33380001068115234, + "IoU.buffet": 0.43150001525878906, + "IoU.poster": 0.12289999961853028, + "IoU.stage": 0.16790000915527345, + "IoU.van": 0.3813999938964844, + "IoU.ship": 0.09720000267028808, + "IoU.fountain": 0.18780000686645507, + "IoU.conveyer belt": 0.700999984741211, + "IoU.canopy": 0.18600000381469728, + "IoU.washer": 0.6793000030517579, + "IoU.plaything": 0.27600000381469725, + "IoU.swimming pool": 0.7558000183105469, + "IoU.stool": 0.3338999938964844, + "IoU.barrel": 0.5536999893188477, + "IoU.basket": 0.29450000762939454, + "IoU.waterfall": 0.49080001831054687, + "IoU.tent": 0.8933999633789063, + "IoU.bag": 0.12949999809265136, + "IoU.minibike": 0.5893000030517578, + "IoU.cradle": 0.7852999877929687, + "IoU.oven": 0.21360000610351562, + "IoU.ball": 0.4484000015258789, + "IoU.food": 0.48400001525878905, + "IoU.step": 0.0665999984741211, + "IoU.tank": 0.5327000045776367, + "IoU.trade name": 0.19350000381469726, + "IoU.microwave": 0.6790000152587891, + "IoU.pot": 0.40380001068115234, + "IoU.animal": 0.5506000137329101, + "IoU.bicycle": 0.51, + "IoU.lake": 0.04630000114440918, + "IoU.dishwasher": 0.5493000030517579, + "IoU.screen": 0.601599998474121, + "IoU.blanket": 0.09220000267028809, + "IoU.sculpture": 0.5884000015258789, + "IoU.hood": 0.4684000015258789, + "IoU.sconce": 0.2525, + "IoU.vase": 0.2978000068664551, + "IoU.traffic light": 0.22920000076293945, + "IoU.tray": 0.04829999923706055, + "IoU.ashcan": 0.36200000762939455, + "IoU.fan": 0.38790000915527345, + "IoU.pier": 0.2603000068664551, + "IoU.crt screen": 0.0044999998807907105, + "IoU.plate": 0.47569999694824217, + "IoU.monitor": 0.27920000076293944, + "IoU.bulletin board": 0.19899999618530273, + "IoU.shower": 0.023399999141693117, + "IoU.radiator": 0.5054999923706055, + "IoU.glass": 0.08449999809265137, + "IoU.clock": 0.2231999969482422, + "IoU.flag": 0.35560001373291017, + "Acc.wall": 0.8994999694824218, + "Acc.building": 0.9393000030517578, + "Acc.sky": 0.9833999633789062, + "Acc.floor": 0.9181999969482422, + "Acc.tree": 0.8311000061035156, + "Acc.ceiling": 0.9309999847412109, + "Acc.road": 0.909000015258789, + "Acc.bed ": 0.9537999725341797, + "Acc.windowpane": 0.7763999938964844, + "Acc.grass": 0.8419999694824218, + "Acc.cabinet": 0.7505000305175781, + "Acc.sidewalk": 0.7776999664306641, + "Acc.person": 0.8801000213623047, + "Acc.earth": 0.5383000183105469, + "Acc.door": 0.5711000061035156, + "Acc.table": 0.7562999725341797, + "Acc.mountain": 0.7206999969482422, + "Acc.plant": 0.6033000183105469, + "Acc.curtain": 0.830999984741211, + "Acc.chair": 0.6136000061035156, + "Acc.car": 0.895199966430664, + "Acc.water": 0.7529000091552734, + "Acc.painting": 0.8012000274658203, + "Acc.sofa": 0.8612999725341797, + "Acc.shelf": 0.6079999923706054, + "Acc.house": 0.5425, + "Acc.sea": 0.8413999938964843, + "Acc.mirror": 0.7083999633789062, + "Acc.rug": 0.6904000091552734, + "Acc.field": 0.46049999237060546, + "Acc.armchair": 0.504900016784668, + "Acc.seat": 0.8062999725341797, + "Acc.fence": 0.4840999984741211, + "Acc.desk": 0.644000015258789, + "Acc.rock": 0.5758000183105468, + "Acc.wardrobe": 0.6619999694824219, + "Acc.lamp": 0.6006999969482422, + "Acc.bathtub": 0.8779000091552734, + "Acc.railing": 0.4988999938964844, + "Acc.cushion": 0.6588999938964843, + "Acc.base": 0.37729999542236325, + "Acc.box": 0.27510000228881837, + "Acc.column": 0.5206000137329102, + "Acc.signboard": 0.4190999984741211, + "Acc.chest of drawers": 0.5352999877929687, + "Acc.counter": 0.542400016784668, + "Acc.sand": 0.5788000106811524, + "Acc.sink": 0.7390000152587891, + "Acc.skyscraper": 0.591500015258789, + "Acc.fireplace": 0.8233999633789062, + "Acc.refrigerator": 0.7609999847412109, + "Acc.grandstand": 0.7263999938964844, + "Acc.path": 0.3511000061035156, + "Acc.stairs": 0.16350000381469726, + "Acc.runway": 0.7787000274658203, + "Acc.case": 0.6941000366210938, + "Acc.pool table": 0.9573000335693359, + "Acc.pillow": 0.5386000061035157, + "Acc.screen door": 0.7883999633789063, + "Acc.stairway": 0.42009998321533204, + "Acc.river": 0.276299991607666, + "Acc.bridge": 0.7677999877929688, + "Acc.bookcase": 0.5279000091552735, + "Acc.blind": 0.4565000152587891, + "Acc.coffee table": 0.7493000030517578, + "Acc.toilet": 0.8655000305175782, + "Acc.flower": 0.4784000015258789, + "Acc.book": 0.5652999877929688, + "Acc.hill": 0.0705999994277954, + "Acc.bench": 0.44740001678466795, + "Acc.countertop": 0.7276000213623047, + "Acc.stove": 0.7558999633789063, + "Acc.palm": 0.5461999893188476, + "Acc.kitchen island": 0.6630999755859375, + "Acc.computer": 0.7031999969482422, + "Acc.swivel chair": 0.5933000183105469, + "Acc.boat": 0.7818000030517578, + "Acc.bar": 0.5086999893188476, + "Acc.arcade machine": 0.35970001220703124, + "Acc.hovel": 0.16030000686645507, + "Acc.bus": 0.9266000366210938, + "Acc.towel": 0.7547000122070312, + "Acc.light": 0.2688999938964844, + "Acc.truck": 0.19180000305175782, + "Acc.tower": 0.22690000534057617, + "Acc.chandelier": 0.7601000213623047, + "Acc.awning": 0.22450000762939454, + "Acc.streetlight": 0.18069999694824218, + "Acc.booth": 0.3833000183105469, + "Acc.television receiver": 0.6969999694824218, + "Acc.airplane": 0.6152999877929688, + "Acc.dirt track": 0.17959999084472655, + "Acc.apparel": 0.39740001678466796, + "Acc.pole": 0.08390000343322754, + "Acc.land": 0.08539999961853027, + "Acc.bannister": 0.07550000190734864, + "Acc.escalator": 0.5904000091552735, + "Acc.ottoman": 0.5129000091552735, + "Acc.bottle": 0.46060001373291015, + "Acc.buffet": 0.5004999923706055, + "Acc.poster": 0.17719999313354493, + "Acc.stage": 0.208700008392334, + "Acc.van": 0.4336000061035156, + "Acc.ship": 0.09970000267028808, + "Acc.fountain": 0.18989999771118163, + "Acc.conveyer belt": 0.904000015258789, + "Acc.canopy": 0.20350000381469727, + "Acc.washer": 0.7361000061035157, + "Acc.plaything": 0.37810001373291013, + "Acc.swimming pool": 0.8519000244140625, + "Acc.stool": 0.3770000076293945, + "Acc.barrel": 0.6277999877929688, + "Acc.basket": 0.3666999816894531, + "Acc.waterfall": 0.5806999969482421, + "Acc.tent": 0.9637000274658203, + "Acc.bag": 0.14829999923706055, + "Acc.minibike": 0.6213999938964844, + "Acc.cradle": 0.9487000274658203, + "Acc.oven": 0.31760000228881835, + "Acc.ball": 0.4847999954223633, + "Acc.food": 0.5484000015258789, + "Acc.step": 0.07650000095367432, + "Acc.tank": 0.5791999816894531, + "Acc.trade name": 0.20719999313354492, + "Acc.microwave": 0.7494000244140625, + "Acc.pot": 0.4341999816894531, + "Acc.animal": 0.5734000015258789, + "Acc.bicycle": 0.5990000152587891, + "Acc.lake": 0.048000001907348634, + "Acc.dishwasher": 0.6454000091552734, + "Acc.screen": 0.8241000366210938, + "Acc.blanket": 0.10069999694824219, + "Acc.sculpture": 0.6197999954223633, + "Acc.hood": 0.5697000122070313, + "Acc.sconce": 0.30260000228881834, + "Acc.vase": 0.3675, + "Acc.traffic light": 0.3047999954223633, + "Acc.tray": 0.06639999866485596, + "Acc.ashcan": 0.4702999877929688, + "Acc.fan": 0.43130001068115237, + "Acc.pier": 0.38150001525878907, + "Acc.crt screen": 0.008700000047683716, + "Acc.plate": 0.595, + "Acc.monitor": 0.4502000045776367, + "Acc.bulletin board": 0.24479999542236328, + "Acc.shower": 0.0496999979019165, + "Acc.radiator": 0.5722000122070312, + "Acc.glass": 0.08949999809265137, + "Acc.clock": 0.23649999618530274, + "Acc.flag": 0.37729999542236325 + } + }, + "120": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8122, + "mIoU": 0.44020000000000004, + "mAcc": 0.5535, + "IoU.wall": 0.7481999969482422, + "IoU.building": 0.810199966430664, + "IoU.sky": 0.9355999755859375, + "IoU.floor": 0.7876000213623047, + "IoU.tree": 0.7322000122070312, + "IoU.ceiling": 0.8177999877929687, + "IoU.road": 0.814800033569336, + "IoU.bed ": 0.855999984741211, + "IoU.windowpane": 0.6068000030517579, + "IoU.grass": 0.6844999694824219, + "IoU.cabinet": 0.5706999969482421, + "IoU.sidewalk": 0.6256999969482422, + "IoU.person": 0.7625, + "IoU.earth": 0.3504999923706055, + "IoU.door": 0.43470001220703125, + "IoU.table": 0.5259000015258789, + "IoU.mountain": 0.6093000030517578, + "IoU.plant": 0.49220001220703125, + "IoU.curtain": 0.7037000274658203, + "IoU.chair": 0.48770000457763674, + "IoU.car": 0.7976999664306641, + "IoU.water": 0.5045000076293945, + "IoU.painting": 0.669000015258789, + "IoU.sofa": 0.6052999877929688, + "IoU.shelf": 0.4184999847412109, + "IoU.house": 0.5593999862670899, + "IoU.sea": 0.5565000152587891, + "IoU.mirror": 0.611500015258789, + "IoU.rug": 0.6136000061035156, + "IoU.field": 0.22059999465942381, + "IoU.armchair": 0.3920000076293945, + "IoU.seat": 0.6077999877929687, + "IoU.fence": 0.3409000015258789, + "IoU.desk": 0.46450000762939453, + "IoU.rock": 0.4470000076293945, + "IoU.wardrobe": 0.5213999938964844, + "IoU.lamp": 0.5227000045776368, + "IoU.bathtub": 0.7106999969482422, + "IoU.railing": 0.3345999908447266, + "IoU.cushion": 0.4866999816894531, + "IoU.base": 0.2677000045776367, + "IoU.box": 0.20780000686645508, + "IoU.column": 0.4409000015258789, + "IoU.signboard": 0.3427000045776367, + "IoU.chest of drawers": 0.33860000610351565, + "IoU.counter": 0.29489999771118164, + "IoU.sand": 0.4109000015258789, + "IoU.sink": 0.6156000137329102, + "IoU.skyscraper": 0.5693999862670899, + "IoU.fireplace": 0.6263999938964844, + "IoU.refrigerator": 0.6866000366210937, + "IoU.grandstand": 0.39599998474121095, + "IoU.path": 0.24159999847412109, + "IoU.stairs": 0.21600000381469728, + "IoU.runway": 0.7251000213623047, + "IoU.case": 0.5518000030517578, + "IoU.pool table": 0.9119000244140625, + "IoU.pillow": 0.5022000122070313, + "IoU.screen door": 0.5463000106811523, + "IoU.stairway": 0.32869998931884764, + "IoU.river": 0.2325, + "IoU.bridge": 0.575900001525879, + "IoU.bookcase": 0.3127000045776367, + "IoU.blind": 0.3959000015258789, + "IoU.coffee table": 0.5597000122070312, + "IoU.toilet": 0.7869000244140625, + "IoU.flower": 0.2993000030517578, + "IoU.book": 0.4416999816894531, + "IoU.hill": 0.10359999656677246, + "IoU.bench": 0.4015999984741211, + "IoU.countertop": 0.5363000106811523, + "IoU.stove": 0.6969999694824218, + "IoU.palm": 0.4365999984741211, + "IoU.kitchen island": 0.3427999877929688, + "IoU.computer": 0.5715999984741211, + "IoU.swivel chair": 0.41619998931884766, + "IoU.boat": 0.6795999908447266, + "IoU.bar": 0.5020999908447266, + "IoU.arcade machine": 0.33689998626708983, + "IoU.hovel": 0.31610000610351563, + "IoU.bus": 0.8180000305175781, + "IoU.towel": 0.4722999954223633, + "IoU.light": 0.3078000068664551, + "IoU.truck": 0.1534000015258789, + "IoU.tower": 0.29670000076293945, + "IoU.chandelier": 0.557599983215332, + "IoU.awning": 0.2830999946594238, + "IoU.streetlight": 0.114399995803833, + "IoU.booth": 0.3395999908447266, + "IoU.television receiver": 0.6095999908447266, + "IoU.airplane": 0.5820000076293945, + "IoU.dirt track": 0.11960000038146973, + "IoU.apparel": 0.3436000061035156, + "IoU.pole": 0.09479999542236328, + "IoU.land": 0.018200000524520876, + "IoU.bannister": 0.08890000343322754, + "IoU.escalator": 0.3758000183105469, + "IoU.ottoman": 0.38470001220703126, + "IoU.bottle": 0.17090000152587892, + "IoU.buffet": 0.29620000839233396, + "IoU.poster": 0.23610000610351561, + "IoU.stage": 0.1372999954223633, + "IoU.van": 0.17489999771118164, + "IoU.ship": 0.645, + "IoU.fountain": 0.18420000076293946, + "IoU.conveyer belt": 0.6994999694824219, + "IoU.canopy": 0.20290000915527343, + "IoU.washer": 0.6944999694824219, + "IoU.plaything": 0.223799991607666, + "IoU.swimming pool": 0.7631999969482421, + "IoU.stool": 0.2595000076293945, + "IoU.barrel": 0.5702999877929688, + "IoU.basket": 0.16780000686645508, + "IoU.waterfall": 0.6088000106811523, + "IoU.tent": 0.9230999755859375, + "IoU.bag": 0.09920000076293946, + "IoU.minibike": 0.6377999877929688, + "IoU.cradle": 0.7820999908447266, + "IoU.oven": 0.24420000076293946, + "IoU.ball": 0.39470001220703127, + "IoU.food": 0.543499984741211, + "IoU.step": 0.058699998855590824, + "IoU.tank": 0.4711000061035156, + "IoU.trade name": 0.2459000015258789, + "IoU.microwave": 0.31959999084472657, + "IoU.pot": 0.31510000228881835, + "IoU.animal": 0.5429000091552735, + "IoU.bicycle": 0.5368999862670898, + "IoU.lake": 0.0, + "IoU.dishwasher": 0.4791999816894531, + "IoU.screen": 0.5293999862670898, + "IoU.blanket": 0.0678000020980835, + "IoU.sculpture": 0.41630001068115235, + "IoU.hood": 0.47880001068115235, + "IoU.sconce": 0.27700000762939453, + "IoU.vase": 0.23450000762939452, + "IoU.traffic light": 0.21969999313354494, + "IoU.tray": 0.014500000476837159, + "IoU.ashcan": 0.32310001373291014, + "IoU.fan": 0.4515999984741211, + "IoU.pier": 0.44869998931884764, + "IoU.crt screen": 9.999999776482581e-05, + "IoU.plate": 0.45060001373291014, + "IoU.monitor": 0.15079999923706056, + "IoU.bulletin board": 0.35650001525878905, + "IoU.shower": 0.00699999988079071, + "IoU.radiator": 0.5329999923706055, + "IoU.glass": 0.0640999984741211, + "IoU.clock": 0.14350000381469727, + "IoU.flag": 0.36459999084472655, + "Acc.wall": 0.8662000274658204, + "Acc.building": 0.9181999969482422, + "Acc.sky": 0.9761000061035157, + "Acc.floor": 0.9048999786376953, + "Acc.tree": 0.8638999938964844, + "Acc.ceiling": 0.9183999633789063, + "Acc.road": 0.8898999786376953, + "Acc.bed ": 0.9430000305175781, + "Acc.windowpane": 0.7622000122070313, + "Acc.grass": 0.8041000366210938, + "Acc.cabinet": 0.6772000122070313, + "Acc.sidewalk": 0.777300033569336, + "Acc.person": 0.9065000152587891, + "Acc.earth": 0.5008000183105469, + "Acc.door": 0.6129999923706054, + "Acc.table": 0.6931999969482422, + "Acc.mountain": 0.759000015258789, + "Acc.plant": 0.6108000183105469, + "Acc.curtain": 0.8187999725341797, + "Acc.chair": 0.6131000137329101, + "Acc.car": 0.9127999877929688, + "Acc.water": 0.6877999877929688, + "Acc.painting": 0.8458000183105469, + "Acc.sofa": 0.8016000366210938, + "Acc.shelf": 0.6302000045776367, + "Acc.house": 0.6654000091552734, + "Acc.sea": 0.8193000030517578, + "Acc.mirror": 0.7025, + "Acc.rug": 0.6791000366210938, + "Acc.field": 0.41630001068115235, + "Acc.armchair": 0.5813999938964843, + "Acc.seat": 0.7919999694824219, + "Acc.fence": 0.45310001373291015, + "Acc.desk": 0.6579000091552735, + "Acc.rock": 0.6725, + "Acc.wardrobe": 0.6458000183105469, + "Acc.lamp": 0.6612000274658203, + "Acc.bathtub": 0.8080000305175781, + "Acc.railing": 0.49400001525878906, + "Acc.cushion": 0.6265000152587891, + "Acc.base": 0.42630001068115236, + "Acc.box": 0.2953000068664551, + "Acc.column": 0.5443000030517579, + "Acc.signboard": 0.4490999984741211, + "Acc.chest of drawers": 0.6419000244140625, + "Acc.counter": 0.3956999969482422, + "Acc.sand": 0.5715999984741211, + "Acc.sink": 0.7131999969482422, + "Acc.skyscraper": 0.6675, + "Acc.fireplace": 0.8923999786376953, + "Acc.refrigerator": 0.7908999633789062, + "Acc.grandstand": 0.721500015258789, + "Acc.path": 0.32169998168945313, + "Acc.stairs": 0.278799991607666, + "Acc.runway": 0.8777999877929688, + "Acc.case": 0.6958999633789062, + "Acc.pool table": 0.9573000335693359, + "Acc.pillow": 0.5891999816894531, + "Acc.screen door": 0.6191999816894531, + "Acc.stairway": 0.45799999237060546, + "Acc.river": 0.3114999961853027, + "Acc.bridge": 0.7248999786376953, + "Acc.bookcase": 0.4693000030517578, + "Acc.blind": 0.4556999969482422, + "Acc.coffee table": 0.7912999725341797, + "Acc.toilet": 0.8694999694824219, + "Acc.flower": 0.4777000045776367, + "Acc.book": 0.6104000091552735, + "Acc.hill": 0.16559999465942382, + "Acc.bench": 0.49689998626708987, + "Acc.countertop": 0.6723999786376953, + "Acc.stove": 0.778499984741211, + "Acc.palm": 0.6140999984741211, + "Acc.kitchen island": 0.6623999786376953, + "Acc.computer": 0.6852999877929687, + "Acc.swivel chair": 0.6011000061035157, + "Acc.boat": 0.8243000030517578, + "Acc.bar": 0.6513999938964844, + "Acc.arcade machine": 0.35959999084472655, + "Acc.hovel": 0.42270000457763673, + "Acc.bus": 0.8963999938964844, + "Acc.towel": 0.6870999908447266, + "Acc.light": 0.3338999938964844, + "Acc.truck": 0.19790000915527345, + "Acc.tower": 0.35900001525878905, + "Acc.chandelier": 0.7376000213623047, + "Acc.awning": 0.3375, + "Acc.streetlight": 0.13359999656677246, + "Acc.booth": 0.382599983215332, + "Acc.television receiver": 0.7158000183105468, + "Acc.airplane": 0.6616999816894531, + "Acc.dirt track": 0.19229999542236328, + "Acc.apparel": 0.5188999938964843, + "Acc.pole": 0.11869999885559082, + "Acc.land": 0.02430000066757202, + "Acc.bannister": 0.1322999954223633, + "Acc.escalator": 0.5188000106811523, + "Acc.ottoman": 0.527400016784668, + "Acc.bottle": 0.22229999542236328, + "Acc.buffet": 0.335, + "Acc.poster": 0.3206999969482422, + "Acc.stage": 0.26440000534057617, + "Acc.van": 0.21110000610351562, + "Acc.ship": 0.7461000061035157, + "Acc.fountain": 0.2134000015258789, + "Acc.conveyer belt": 0.8888999938964843, + "Acc.canopy": 0.31290000915527344, + "Acc.washer": 0.7004000091552735, + "Acc.plaything": 0.30739999771118165, + "Acc.swimming pool": 0.8443000030517578, + "Acc.stool": 0.3028000068664551, + "Acc.barrel": 0.6231999969482422, + "Acc.basket": 0.22180000305175782, + "Acc.waterfall": 0.6644000244140625, + "Acc.tent": 0.9902999877929688, + "Acc.bag": 0.11470000267028808, + "Acc.minibike": 0.7627999877929688, + "Acc.cradle": 0.9690000152587891, + "Acc.oven": 0.40849998474121096, + "Acc.ball": 0.43259998321533205, + "Acc.food": 0.6769000244140625, + "Acc.step": 0.07559999942779541, + "Acc.tank": 0.5334000015258789, + "Acc.trade name": 0.28530000686645507, + "Acc.microwave": 0.3591999816894531, + "Acc.pot": 0.34950000762939454, + "Acc.animal": 0.6006999969482422, + "Acc.bicycle": 0.7169999694824218, + "Acc.lake": 0.0, + "Acc.dishwasher": 0.5702000045776368, + "Acc.screen": 0.7295999908447266, + "Acc.blanket": 0.0815999984741211, + "Acc.sculpture": 0.5733000183105469, + "Acc.hood": 0.5845000076293946, + "Acc.sconce": 0.3490999984741211, + "Acc.vase": 0.3202000045776367, + "Acc.traffic light": 0.32669998168945313, + "Acc.tray": 0.021700000762939452, + "Acc.ashcan": 0.48580001831054687, + "Acc.fan": 0.59, + "Acc.pier": 0.8504000091552735, + "Acc.crt screen": 0.00019999999552965163, + "Acc.plate": 0.6295999908447265, + "Acc.monitor": 0.2603000068664551, + "Acc.bulletin board": 0.5011000061035156, + "Acc.shower": 0.04980000019073486, + "Acc.radiator": 0.625099983215332, + "Acc.glass": 0.0680999994277954, + "Acc.clock": 0.15699999809265136, + "Acc.flag": 0.4002000045776367 + } + }, + "121": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8176000000000001, + "mIoU": 0.4415, + "mAcc": 0.5533, + "IoU.wall": 0.7566999816894531, + "IoU.building": 0.8216999816894531, + "IoU.sky": 0.9368000030517578, + "IoU.floor": 0.7987999725341797, + "IoU.tree": 0.7427999877929687, + "IoU.ceiling": 0.8245999908447266, + "IoU.road": 0.821500015258789, + "IoU.bed ": 0.8618000030517579, + "IoU.windowpane": 0.601599998474121, + "IoU.grass": 0.6966999816894531, + "IoU.cabinet": 0.5754999923706055, + "IoU.sidewalk": 0.6290999984741211, + "IoU.person": 0.7780000305175782, + "IoU.earth": 0.3616999816894531, + "IoU.door": 0.43939998626708987, + "IoU.table": 0.5363000106811523, + "IoU.mountain": 0.5877999877929687, + "IoU.plant": 0.509000015258789, + "IoU.curtain": 0.7073000335693359, + "IoU.chair": 0.5129000091552735, + "IoU.car": 0.8194999694824219, + "IoU.water": 0.584900016784668, + "IoU.painting": 0.6648999786376953, + "IoU.sofa": 0.6252000045776367, + "IoU.shelf": 0.41619998931884766, + "IoU.house": 0.5465000152587891, + "IoU.sea": 0.6608000183105469, + "IoU.mirror": 0.6320999908447266, + "IoU.rug": 0.627400016784668, + "IoU.field": 0.23719999313354492, + "IoU.armchair": 0.3997000122070313, + "IoU.seat": 0.607599983215332, + "IoU.fence": 0.3809000015258789, + "IoU.desk": 0.4675, + "IoU.rock": 0.45299999237060545, + "IoU.wardrobe": 0.49790000915527344, + "IoU.lamp": 0.5209999847412109, + "IoU.bathtub": 0.7691000366210937, + "IoU.railing": 0.30459999084472655, + "IoU.cushion": 0.5172999954223633, + "IoU.base": 0.2590999984741211, + "IoU.box": 0.20489999771118164, + "IoU.column": 0.43029998779296874, + "IoU.signboard": 0.34619998931884766, + "IoU.chest of drawers": 0.3256999969482422, + "IoU.counter": 0.3021999931335449, + "IoU.sand": 0.34950000762939454, + "IoU.sink": 0.6031000137329101, + "IoU.skyscraper": 0.5147000122070312, + "IoU.fireplace": 0.7025, + "IoU.refrigerator": 0.6912999725341797, + "IoU.grandstand": 0.49220001220703125, + "IoU.path": 0.2531999969482422, + "IoU.stairs": 0.23170000076293945, + "IoU.runway": 0.6466000366210938, + "IoU.case": 0.5109999847412109, + "IoU.pool table": 0.9226999664306641, + "IoU.pillow": 0.5025, + "IoU.screen door": 0.5666999816894531, + "IoU.stairway": 0.295, + "IoU.river": 0.21, + "IoU.bridge": 0.6480000305175782, + "IoU.bookcase": 0.3039999961853027, + "IoU.blind": 0.4316999816894531, + "IoU.coffee table": 0.5518999862670898, + "IoU.toilet": 0.7838999938964843, + "IoU.flower": 0.3164999961853027, + "IoU.book": 0.419900016784668, + "IoU.hill": 0.13340000152587891, + "IoU.bench": 0.33180000305175783, + "IoU.countertop": 0.5329000091552735, + "IoU.stove": 0.6862999725341797, + "IoU.palm": 0.43990001678466795, + "IoU.kitchen island": 0.37029998779296874, + "IoU.computer": 0.5881999969482422, + "IoU.swivel chair": 0.45040000915527345, + "IoU.boat": 0.6063999938964844, + "IoU.bar": 0.3718000030517578, + "IoU.arcade machine": 0.3683000183105469, + "IoU.hovel": 0.4520999908447266, + "IoU.bus": 0.813499984741211, + "IoU.towel": 0.4740999984741211, + "IoU.light": 0.27799999237060546, + "IoU.truck": 0.15220000267028808, + "IoU.tower": 0.2802000045776367, + "IoU.chandelier": 0.5631000137329102, + "IoU.awning": 0.2610000038146973, + "IoU.streetlight": 0.12409999847412109, + "IoU.booth": 0.48869998931884767, + "IoU.television receiver": 0.6731999969482422, + "IoU.airplane": 0.5047000122070312, + "IoU.dirt track": 0.08850000381469726, + "IoU.apparel": 0.36459999084472655, + "IoU.pole": 0.10329999923706054, + "IoU.land": 0.06590000152587891, + "IoU.bannister": 0.08039999961853027, + "IoU.escalator": 0.2953000068664551, + "IoU.ottoman": 0.44470001220703126, + "IoU.bottle": 0.2468000030517578, + "IoU.buffet": 0.29700000762939455, + "IoU.poster": 0.15520000457763672, + "IoU.stage": 0.18950000762939453, + "IoU.van": 0.3525, + "IoU.ship": 0.5863000106811523, + "IoU.fountain": 0.1868000030517578, + "IoU.conveyer belt": 0.6020000076293945, + "IoU.canopy": 0.22049999237060547, + "IoU.washer": 0.7037000274658203, + "IoU.plaything": 0.21829999923706056, + "IoU.swimming pool": 0.6991000366210938, + "IoU.stool": 0.27170000076293943, + "IoU.barrel": 0.28329999923706056, + "IoU.basket": 0.17909999847412109, + "IoU.waterfall": 0.725999984741211, + "IoU.tent": 0.9116999816894531, + "IoU.bag": 0.09680000305175782, + "IoU.minibike": 0.5791999816894531, + "IoU.cradle": 0.8008000183105469, + "IoU.oven": 0.16059999465942382, + "IoU.ball": 0.25129999160766603, + "IoU.food": 0.5468000030517578, + "IoU.step": 0.06320000171661377, + "IoU.tank": 0.47470001220703123, + "IoU.trade name": 0.24879999160766603, + "IoU.microwave": 0.40099998474121096, + "IoU.pot": 0.3295999908447266, + "IoU.animal": 0.5206000137329102, + "IoU.bicycle": 0.49689998626708987, + "IoU.lake": 0.0, + "IoU.dishwasher": 0.5277000045776368, + "IoU.screen": 0.514000015258789, + "IoU.blanket": 0.10359999656677246, + "IoU.sculpture": 0.46830001831054685, + "IoU.hood": 0.514900016784668, + "IoU.sconce": 0.27180000305175783, + "IoU.vase": 0.255, + "IoU.traffic light": 0.225, + "IoU.tray": 0.008899999856948853, + "IoU.ashcan": 0.37799999237060544, + "IoU.fan": 0.39860000610351565, + "IoU.pier": 0.5059999847412109, + "IoU.crt screen": 0.022400000095367432, + "IoU.plate": 0.3813999938964844, + "IoU.monitor": 0.0856999969482422, + "IoU.bulletin board": 0.18049999237060546, + "IoU.shower": 0.01100000023841858, + "IoU.radiator": 0.5006000137329102, + "IoU.glass": 0.0746999979019165, + "IoU.clock": 0.23920000076293946, + "IoU.flag": 0.31610000610351563, + "Acc.wall": 0.8755999755859375, + "Acc.building": 0.9209999847412109, + "Acc.sky": 0.9741999816894531, + "Acc.floor": 0.9076000213623047, + "Acc.tree": 0.8680999755859375, + "Acc.ceiling": 0.9231999969482422, + "Acc.road": 0.8948999786376953, + "Acc.bed ": 0.9455000305175781, + "Acc.windowpane": 0.7622000122070313, + "Acc.grass": 0.8301000213623047, + "Acc.cabinet": 0.6826000213623047, + "Acc.sidewalk": 0.7812000274658203, + "Acc.person": 0.9080000305175782, + "Acc.earth": 0.518400001525879, + "Acc.door": 0.6058000183105469, + "Acc.table": 0.6983999633789062, + "Acc.mountain": 0.7183999633789062, + "Acc.plant": 0.6512999725341797, + "Acc.curtain": 0.8273000335693359, + "Acc.chair": 0.6472000122070313, + "Acc.car": 0.9180000305175782, + "Acc.water": 0.7869000244140625, + "Acc.painting": 0.8340000152587891, + "Acc.sofa": 0.8, + "Acc.shelf": 0.6254999923706055, + "Acc.house": 0.710999984741211, + "Acc.sea": 0.8237000274658203, + "Acc.mirror": 0.726500015258789, + "Acc.rug": 0.6981999969482422, + "Acc.field": 0.4015999984741211, + "Acc.armchair": 0.6088000106811523, + "Acc.seat": 0.7705000305175781, + "Acc.fence": 0.5127999877929688, + "Acc.desk": 0.6890000152587891, + "Acc.rock": 0.66, + "Acc.wardrobe": 0.6365000152587891, + "Acc.lamp": 0.657300033569336, + "Acc.bathtub": 0.8418000030517578, + "Acc.railing": 0.4590999984741211, + "Acc.cushion": 0.6736000061035157, + "Acc.base": 0.4520000076293945, + "Acc.box": 0.28639999389648435, + "Acc.column": 0.5583000183105469, + "Acc.signboard": 0.43040000915527343, + "Acc.chest of drawers": 0.5761000061035156, + "Acc.counter": 0.3988999938964844, + "Acc.sand": 0.5072999954223633, + "Acc.sink": 0.6819000244140625, + "Acc.skyscraper": 0.6156000137329102, + "Acc.fireplace": 0.8943000030517578, + "Acc.refrigerator": 0.8233999633789062, + "Acc.grandstand": 0.707699966430664, + "Acc.path": 0.3268000030517578, + "Acc.stairs": 0.30040000915527343, + "Acc.runway": 0.7630999755859375, + "Acc.case": 0.6515000152587891, + "Acc.pool table": 0.961500015258789, + "Acc.pillow": 0.5868000030517578, + "Acc.screen door": 0.629900016784668, + "Acc.stairway": 0.402599983215332, + "Acc.river": 0.30059999465942383, + "Acc.bridge": 0.8112999725341797, + "Acc.bookcase": 0.5179999923706055, + "Acc.blind": 0.48319999694824217, + "Acc.coffee table": 0.7893000030517578, + "Acc.toilet": 0.885, + "Acc.flower": 0.49029998779296874, + "Acc.book": 0.5686999893188477, + "Acc.hill": 0.22430000305175782, + "Acc.bench": 0.4386999893188477, + "Acc.countertop": 0.6506999969482422, + "Acc.stove": 0.790199966430664, + "Acc.palm": 0.6018999862670898, + "Acc.kitchen island": 0.7037000274658203, + "Acc.computer": 0.7027999877929687, + "Acc.swivel chair": 0.5627999877929688, + "Acc.boat": 0.8033999633789063, + "Acc.bar": 0.5122000122070313, + "Acc.arcade machine": 0.4125, + "Acc.hovel": 0.48450000762939455, + "Acc.bus": 0.8825, + "Acc.towel": 0.6773999786376953, + "Acc.light": 0.2953000068664551, + "Acc.truck": 0.21760000228881837, + "Acc.tower": 0.3781999969482422, + "Acc.chandelier": 0.7287999725341797, + "Acc.awning": 0.3263999938964844, + "Acc.streetlight": 0.1472000026702881, + "Acc.booth": 0.5304000091552734, + "Acc.television receiver": 0.7780000305175782, + "Acc.airplane": 0.6719999694824219, + "Acc.dirt track": 0.1759000015258789, + "Acc.apparel": 0.5309000015258789, + "Acc.pole": 0.13520000457763673, + "Acc.land": 0.085, + "Acc.bannister": 0.12550000190734864, + "Acc.escalator": 0.38459999084472657, + "Acc.ottoman": 0.6143999862670898, + "Acc.bottle": 0.30739999771118165, + "Acc.buffet": 0.3438999938964844, + "Acc.poster": 0.23219999313354492, + "Acc.stage": 0.3484000015258789, + "Acc.van": 0.4234000015258789, + "Acc.ship": 0.624000015258789, + "Acc.fountain": 0.20799999237060546, + "Acc.conveyer belt": 0.872699966430664, + "Acc.canopy": 0.3333000183105469, + "Acc.washer": 0.7127999877929687, + "Acc.plaything": 0.3215000152587891, + "Acc.swimming pool": 0.8019999694824219, + "Acc.stool": 0.33860000610351565, + "Acc.barrel": 0.4790999984741211, + "Acc.basket": 0.2231999969482422, + "Acc.waterfall": 0.8105999755859375, + "Acc.tent": 0.9916000366210938, + "Acc.bag": 0.11020000457763672, + "Acc.minibike": 0.6516999816894531, + "Acc.cradle": 0.96, + "Acc.oven": 0.3859000015258789, + "Acc.ball": 0.2770999908447266, + "Acc.food": 0.6722000122070313, + "Acc.step": 0.08460000038146973, + "Acc.tank": 0.5331000137329102, + "Acc.trade name": 0.2879999923706055, + "Acc.microwave": 0.4459000015258789, + "Acc.pot": 0.37270000457763675, + "Acc.animal": 0.5800999832153321, + "Acc.bicycle": 0.7037000274658203, + "Acc.lake": 0.0, + "Acc.dishwasher": 0.6006999969482422, + "Acc.screen": 0.7298999786376953, + "Acc.blanket": 0.11869999885559082, + "Acc.sculpture": 0.615099983215332, + "Acc.hood": 0.6161000061035157, + "Acc.sconce": 0.34650001525878904, + "Acc.vase": 0.3509000015258789, + "Acc.traffic light": 0.293799991607666, + "Acc.tray": 0.015399999618530273, + "Acc.ashcan": 0.5479999923706055, + "Acc.fan": 0.474900016784668, + "Acc.pier": 0.8444999694824219, + "Acc.crt screen": 0.07480000019073486, + "Acc.plate": 0.49029998779296874, + "Acc.monitor": 0.14300000190734863, + "Acc.bulletin board": 0.25739999771118166, + "Acc.shower": 0.05, + "Acc.radiator": 0.5488999938964844, + "Acc.glass": 0.08029999732971191, + "Acc.clock": 0.2688999938964844, + "Acc.flag": 0.3454000091552734 + } + }, + "122": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8188, + "mIoU": 0.44630000000000003, + "mAcc": 0.5540999999999999, + "IoU.wall": 0.757300033569336, + "IoU.building": 0.8208999633789062, + "IoU.sky": 0.9358999633789062, + "IoU.floor": 0.7933000183105469, + "IoU.tree": 0.7391000366210938, + "IoU.ceiling": 0.8240000152587891, + "IoU.road": 0.8173999786376953, + "IoU.bed ": 0.8683999633789062, + "IoU.windowpane": 0.599900016784668, + "IoU.grass": 0.7141000366210938, + "IoU.cabinet": 0.5895999908447266, + "IoU.sidewalk": 0.6361999893188477, + "IoU.person": 0.7795999908447265, + "IoU.earth": 0.382599983215332, + "IoU.door": 0.42880001068115237, + "IoU.table": 0.5604000091552734, + "IoU.mountain": 0.5706000137329101, + "IoU.plant": 0.5127999877929688, + "IoU.curtain": 0.7044999694824219, + "IoU.chair": 0.5211000061035156, + "IoU.car": 0.8152999877929688, + "IoU.water": 0.5597999954223633, + "IoU.painting": 0.6751000213623047, + "IoU.sofa": 0.6188999938964844, + "IoU.shelf": 0.41880001068115236, + "IoU.house": 0.5129000091552735, + "IoU.sea": 0.6598000335693359, + "IoU.mirror": 0.6377000045776368, + "IoU.rug": 0.6479000091552735, + "IoU.field": 0.28600000381469726, + "IoU.armchair": 0.38529998779296876, + "IoU.seat": 0.6213999938964844, + "IoU.fence": 0.35520000457763673, + "IoU.desk": 0.46619998931884765, + "IoU.rock": 0.4533000183105469, + "IoU.wardrobe": 0.52, + "IoU.lamp": 0.5218000030517578, + "IoU.bathtub": 0.7787999725341797, + "IoU.railing": 0.3275, + "IoU.cushion": 0.5318999862670899, + "IoU.base": 0.255, + "IoU.box": 0.18829999923706053, + "IoU.column": 0.4393000030517578, + "IoU.signboard": 0.3313999938964844, + "IoU.chest of drawers": 0.31959999084472657, + "IoU.counter": 0.3145000076293945, + "IoU.sand": 0.37090000152587893, + "IoU.sink": 0.6419999694824219, + "IoU.skyscraper": 0.5433000183105469, + "IoU.fireplace": 0.6905999755859376, + "IoU.refrigerator": 0.6944999694824219, + "IoU.grandstand": 0.45689998626708983, + "IoU.path": 0.18229999542236328, + "IoU.stairs": 0.20719999313354492, + "IoU.runway": 0.6530999755859375, + "IoU.case": 0.548499984741211, + "IoU.pool table": 0.9255999755859375, + "IoU.pillow": 0.5116999816894531, + "IoU.screen door": 0.5706000137329101, + "IoU.stairway": 0.30639999389648437, + "IoU.river": 0.21079999923706055, + "IoU.bridge": 0.7056999969482421, + "IoU.bookcase": 0.33310001373291015, + "IoU.blind": 0.35450000762939454, + "IoU.coffee table": 0.5643999862670899, + "IoU.toilet": 0.8152999877929688, + "IoU.flower": 0.3806999969482422, + "IoU.book": 0.405099983215332, + "IoU.hill": 0.07119999885559082, + "IoU.bench": 0.3591999816894531, + "IoU.countertop": 0.5433000183105469, + "IoU.stove": 0.6648000335693359, + "IoU.palm": 0.47459999084472654, + "IoU.kitchen island": 0.38349998474121094, + "IoU.computer": 0.5716999816894531, + "IoU.swivel chair": 0.48950000762939455, + "IoU.boat": 0.6438999938964843, + "IoU.bar": 0.34060001373291016, + "IoU.arcade machine": 0.32549999237060545, + "IoU.hovel": 0.14199999809265137, + "IoU.bus": 0.8583999633789062, + "IoU.towel": 0.5306000137329101, + "IoU.light": 0.24450000762939453, + "IoU.truck": 0.1427999973297119, + "IoU.tower": 0.3159000015258789, + "IoU.chandelier": 0.5777000045776367, + "IoU.awning": 0.2606999969482422, + "IoU.streetlight": 0.13770000457763673, + "IoU.booth": 0.42759998321533205, + "IoU.television receiver": 0.6555999755859375, + "IoU.airplane": 0.5570999908447266, + "IoU.dirt track": 0.1140999984741211, + "IoU.apparel": 0.33430000305175783, + "IoU.pole": 0.1090999984741211, + "IoU.land": 0.05809999942779541, + "IoU.bannister": 0.05409999847412109, + "IoU.escalator": 0.25559999465942385, + "IoU.ottoman": 0.40400001525878904, + "IoU.bottle": 0.14479999542236327, + "IoU.buffet": 0.3995000076293945, + "IoU.poster": 0.20549999237060546, + "IoU.stage": 0.12020000457763672, + "IoU.van": 0.35380001068115235, + "IoU.ship": 0.6027000045776367, + "IoU.fountain": 0.20120000839233398, + "IoU.conveyer belt": 0.6036000061035156, + "IoU.canopy": 0.21979999542236328, + "IoU.washer": 0.7320999908447265, + "IoU.plaything": 0.26299999237060545, + "IoU.swimming pool": 0.706500015258789, + "IoU.stool": 0.29850000381469727, + "IoU.barrel": 0.5315999984741211, + "IoU.basket": 0.17489999771118164, + "IoU.waterfall": 0.6266999816894532, + "IoU.tent": 0.905199966430664, + "IoU.bag": 0.07179999828338623, + "IoU.minibike": 0.6916999816894531, + "IoU.cradle": 0.7981999969482422, + "IoU.oven": 0.1997999954223633, + "IoU.ball": 0.3584000015258789, + "IoU.food": 0.5143999862670898, + "IoU.step": 0.048899998664855955, + "IoU.tank": 0.5315000152587891, + "IoU.trade name": 0.2553000068664551, + "IoU.microwave": 0.595999984741211, + "IoU.pot": 0.31670000076293947, + "IoU.animal": 0.586500015258789, + "IoU.bicycle": 0.5625, + "IoU.lake": 0.0, + "IoU.dishwasher": 0.4693000030517578, + "IoU.screen": 0.6441999816894531, + "IoU.blanket": 0.12399999618530273, + "IoU.sculpture": 0.5075, + "IoU.hood": 0.4445000076293945, + "IoU.sconce": 0.2409000015258789, + "IoU.vase": 0.24690000534057618, + "IoU.traffic light": 0.21389999389648437, + "IoU.tray": 0.01940000057220459, + "IoU.ashcan": 0.3484999847412109, + "IoU.fan": 0.3634999847412109, + "IoU.pier": 0.27170000076293943, + "IoU.crt screen": 0.0, + "IoU.plate": 0.46430000305175784, + "IoU.monitor": 0.06309999942779541, + "IoU.bulletin board": 0.35310001373291017, + "IoU.shower": 0.009599999785423278, + "IoU.radiator": 0.5518000030517578, + "IoU.glass": 0.06039999961853027, + "IoU.clock": 0.18620000839233397, + "IoU.flag": 0.41619998931884766, + "Acc.wall": 0.8825, + "Acc.building": 0.9187000274658204, + "Acc.sky": 0.9762000274658204, + "Acc.floor": 0.9006999969482422, + "Acc.tree": 0.8694000244140625, + "Acc.ceiling": 0.9198999786376953, + "Acc.road": 0.8837999725341796, + "Acc.bed ": 0.9534999847412109, + "Acc.windowpane": 0.767300033569336, + "Acc.grass": 0.8355000305175782, + "Acc.cabinet": 0.707699966430664, + "Acc.sidewalk": 0.7898000335693359, + "Acc.person": 0.9070999908447266, + "Acc.earth": 0.5452000045776367, + "Acc.door": 0.5854999923706055, + "Acc.table": 0.7162000274658203, + "Acc.mountain": 0.7037999725341797, + "Acc.plant": 0.6637999725341797, + "Acc.curtain": 0.8261000061035156, + "Acc.chair": 0.6529000091552735, + "Acc.car": 0.9219000244140625, + "Acc.water": 0.7406999969482422, + "Acc.painting": 0.8208000183105468, + "Acc.sofa": 0.8073999786376953, + "Acc.shelf": 0.6195000076293945, + "Acc.house": 0.694800033569336, + "Acc.sea": 0.8534999847412109, + "Acc.mirror": 0.7138999938964844, + "Acc.rug": 0.7262000274658204, + "Acc.field": 0.4184000015258789, + "Acc.armchair": 0.5620000076293945, + "Acc.seat": 0.8198999786376953, + "Acc.fence": 0.4765999984741211, + "Acc.desk": 0.6762999725341797, + "Acc.rock": 0.6436000061035156, + "Acc.wardrobe": 0.6444999694824218, + "Acc.lamp": 0.6445999908447265, + "Acc.bathtub": 0.8430000305175781, + "Acc.railing": 0.474900016784668, + "Acc.cushion": 0.7006999969482421, + "Acc.base": 0.46049999237060546, + "Acc.box": 0.24879999160766603, + "Acc.column": 0.5595000076293946, + "Acc.signboard": 0.4234000015258789, + "Acc.chest of drawers": 0.5529999923706055, + "Acc.counter": 0.4361000061035156, + "Acc.sand": 0.5338000106811523, + "Acc.sink": 0.7380000305175781, + "Acc.skyscraper": 0.6416999816894531, + "Acc.fireplace": 0.8925, + "Acc.refrigerator": 0.8116999816894531, + "Acc.grandstand": 0.7694000244140625, + "Acc.path": 0.25040000915527344, + "Acc.stairs": 0.29739999771118164, + "Acc.runway": 0.8316999816894531, + "Acc.case": 0.7047000122070313, + "Acc.pool table": 0.955, + "Acc.pillow": 0.58, + "Acc.screen door": 0.6165999984741211, + "Acc.stairway": 0.4075, + "Acc.river": 0.3691999816894531, + "Acc.bridge": 0.8569999694824219, + "Acc.bookcase": 0.5643999862670899, + "Acc.blind": 0.3840999984741211, + "Acc.coffee table": 0.7522000122070313, + "Acc.toilet": 0.8786000061035156, + "Acc.flower": 0.5366999816894531, + "Acc.book": 0.5452999877929687, + "Acc.hill": 0.11630000114440918, + "Acc.bench": 0.4627000045776367, + "Acc.countertop": 0.6687000274658204, + "Acc.stove": 0.7505999755859375, + "Acc.palm": 0.654000015258789, + "Acc.kitchen island": 0.6636000061035157, + "Acc.computer": 0.6894999694824219, + "Acc.swivel chair": 0.6365000152587891, + "Acc.boat": 0.8255999755859375, + "Acc.bar": 0.46919998168945315, + "Acc.arcade machine": 0.35630001068115236, + "Acc.hovel": 0.1477999973297119, + "Acc.bus": 0.9258000183105469, + "Acc.towel": 0.6929000091552734, + "Acc.light": 0.25360000610351563, + "Acc.truck": 0.19590000152587891, + "Acc.tower": 0.43090000152587893, + "Acc.chandelier": 0.7270999908447265, + "Acc.awning": 0.33540000915527346, + "Acc.streetlight": 0.16639999389648438, + "Acc.booth": 0.4961999893188477, + "Acc.television receiver": 0.7426000213623047, + "Acc.airplane": 0.6498999786376953, + "Acc.dirt track": 0.35639999389648436, + "Acc.apparel": 0.46689998626708984, + "Acc.pole": 0.14109999656677247, + "Acc.land": 0.07639999866485596, + "Acc.bannister": 0.08449999809265137, + "Acc.escalator": 0.2925, + "Acc.ottoman": 0.5363999938964844, + "Acc.bottle": 0.1722999954223633, + "Acc.buffet": 0.4308000183105469, + "Acc.poster": 0.33689998626708983, + "Acc.stage": 0.27799999237060546, + "Acc.van": 0.412400016784668, + "Acc.ship": 0.6145999908447266, + "Acc.fountain": 0.20379999160766601, + "Acc.conveyer belt": 0.8605000305175782, + "Acc.canopy": 0.3313999938964844, + "Acc.washer": 0.7452999877929688, + "Acc.plaything": 0.39740001678466796, + "Acc.swimming pool": 0.8168000030517578, + "Acc.stool": 0.357400016784668, + "Acc.barrel": 0.6334000015258789, + "Acc.basket": 0.22399999618530272, + "Acc.waterfall": 0.6934999847412109, + "Acc.tent": 0.9870999908447265, + "Acc.bag": 0.085, + "Acc.minibike": 0.7805000305175781, + "Acc.cradle": 0.9641000366210938, + "Acc.oven": 0.36790000915527343, + "Acc.ball": 0.39369998931884764, + "Acc.food": 0.6127999877929687, + "Acc.step": 0.06219999790191651, + "Acc.tank": 0.5665999984741211, + "Acc.trade name": 0.293700008392334, + "Acc.microwave": 0.665199966430664, + "Acc.pot": 0.3559999847412109, + "Acc.animal": 0.6272000122070313, + "Acc.bicycle": 0.7006999969482421, + "Acc.lake": 0.0, + "Acc.dishwasher": 0.5625, + "Acc.screen": 0.8619999694824219, + "Acc.blanket": 0.13630000114440918, + "Acc.sculpture": 0.6637000274658204, + "Acc.hood": 0.5731000137329102, + "Acc.sconce": 0.31239999771118165, + "Acc.vase": 0.34569999694824216, + "Acc.traffic light": 0.2876000022888184, + "Acc.tray": 0.025999999046325682, + "Acc.ashcan": 0.524900016784668, + "Acc.fan": 0.40299999237060546, + "Acc.pier": 0.5197999954223633, + "Acc.crt screen": 0.0, + "Acc.plate": 0.6081000137329101, + "Acc.monitor": 0.10720000267028809, + "Acc.bulletin board": 0.42689998626708986, + "Acc.shower": 0.05, + "Acc.radiator": 0.5986000061035156, + "Acc.glass": 0.06289999961853027, + "Acc.clock": 0.20610000610351562, + "Acc.flag": 0.44470001220703126 + } + }, + "123": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8201, + "mIoU": 0.4473, + "mAcc": 0.547, + "IoU.wall": 0.7555999755859375, + "IoU.building": 0.8213999938964843, + "IoU.sky": 0.9362999725341797, + "IoU.floor": 0.7958999633789062, + "IoU.tree": 0.7395999908447266, + "IoU.ceiling": 0.8245999908447266, + "IoU.road": 0.8262999725341796, + "IoU.bed ": 0.8716000366210938, + "IoU.windowpane": 0.6029000091552734, + "IoU.grass": 0.7019000244140625, + "IoU.cabinet": 0.5870999908447265, + "IoU.sidewalk": 0.6363999938964844, + "IoU.person": 0.7815000152587891, + "IoU.earth": 0.3625, + "IoU.door": 0.43810001373291013, + "IoU.table": 0.5629999923706055, + "IoU.mountain": 0.5891999816894531, + "IoU.plant": 0.5147999954223633, + "IoU.curtain": 0.707699966430664, + "IoU.chair": 0.5084999847412109, + "IoU.car": 0.8180999755859375, + "IoU.water": 0.558499984741211, + "IoU.painting": 0.6958000183105468, + "IoU.sofa": 0.6329999923706054, + "IoU.shelf": 0.41380001068115235, + "IoU.house": 0.5131000137329101, + "IoU.sea": 0.5852000045776368, + "IoU.mirror": 0.6454000091552734, + "IoU.rug": 0.624000015258789, + "IoU.field": 0.2709000015258789, + "IoU.armchair": 0.40119998931884765, + "IoU.seat": 0.6393999862670898, + "IoU.fence": 0.37090000152587893, + "IoU.desk": 0.47560001373291017, + "IoU.rock": 0.4341999816894531, + "IoU.wardrobe": 0.5177000045776368, + "IoU.lamp": 0.5293000030517578, + "IoU.bathtub": 0.8156999969482421, + "IoU.railing": 0.34450000762939453, + "IoU.cushion": 0.5427000045776367, + "IoU.base": 0.28399999618530275, + "IoU.box": 0.20700000762939452, + "IoU.column": 0.44919998168945313, + "IoU.signboard": 0.3390999984741211, + "IoU.chest of drawers": 0.3302000045776367, + "IoU.counter": 0.3661000061035156, + "IoU.sand": 0.3616999816894531, + "IoU.sink": 0.655199966430664, + "IoU.skyscraper": 0.5222000122070313, + "IoU.fireplace": 0.7319000244140625, + "IoU.refrigerator": 0.6938999938964844, + "IoU.grandstand": 0.4679000091552734, + "IoU.path": 0.2, + "IoU.stairs": 0.18440000534057618, + "IoU.runway": 0.6386999893188476, + "IoU.case": 0.5270000076293946, + "IoU.pool table": 0.9154000091552734, + "IoU.pillow": 0.5159000015258789, + "IoU.screen door": 0.6170999908447266, + "IoU.stairway": 0.3009000015258789, + "IoU.river": 0.19040000915527344, + "IoU.bridge": 0.6636000061035157, + "IoU.bookcase": 0.33369998931884765, + "IoU.blind": 0.34110000610351565, + "IoU.coffee table": 0.5981000137329101, + "IoU.toilet": 0.8152999877929688, + "IoU.flower": 0.35770000457763673, + "IoU.book": 0.41709999084472654, + "IoU.hill": 0.06320000171661377, + "IoU.bench": 0.35770000457763673, + "IoU.countertop": 0.5236000061035156, + "IoU.stove": 0.6687000274658204, + "IoU.palm": 0.4875, + "IoU.kitchen island": 0.35959999084472655, + "IoU.computer": 0.601599998474121, + "IoU.swivel chair": 0.42130001068115236, + "IoU.boat": 0.7284999847412109, + "IoU.bar": 0.4961999893188477, + "IoU.arcade machine": 0.33169998168945314, + "IoU.hovel": 0.12789999961853027, + "IoU.bus": 0.8644000244140625, + "IoU.towel": 0.5318000030517578, + "IoU.light": 0.26479999542236327, + "IoU.truck": 0.13789999961853028, + "IoU.tower": 0.2052000045776367, + "IoU.chandelier": 0.5893000030517578, + "IoU.awning": 0.2506999969482422, + "IoU.streetlight": 0.13789999961853028, + "IoU.booth": 0.3890000152587891, + "IoU.television receiver": 0.6575, + "IoU.airplane": 0.5529000091552735, + "IoU.dirt track": 0.32479999542236326, + "IoU.apparel": 0.36700000762939455, + "IoU.pole": 0.09739999771118164, + "IoU.land": 0.02440000057220459, + "IoU.bannister": 0.05860000133514404, + "IoU.escalator": 0.38419998168945313, + "IoU.ottoman": 0.4072999954223633, + "IoU.bottle": 0.11279999732971191, + "IoU.buffet": 0.3395999908447266, + "IoU.poster": 0.18870000839233397, + "IoU.stage": 0.22620000839233398, + "IoU.van": 0.36939998626708986, + "IoU.ship": 0.48369998931884767, + "IoU.fountain": 0.07880000114440917, + "IoU.conveyer belt": 0.6179000091552734, + "IoU.canopy": 0.23079999923706054, + "IoU.washer": 0.7381999969482422, + "IoU.plaything": 0.22239999771118163, + "IoU.swimming pool": 0.7531999969482421, + "IoU.stool": 0.32799999237060545, + "IoU.barrel": 0.47169998168945315, + "IoU.basket": 0.22040000915527344, + "IoU.waterfall": 0.5063999938964844, + "IoU.tent": 0.9202999877929687, + "IoU.bag": 0.07050000190734863, + "IoU.minibike": 0.6791999816894532, + "IoU.cradle": 0.7986000061035157, + "IoU.oven": 0.1743000030517578, + "IoU.ball": 0.5022000122070313, + "IoU.food": 0.5006000137329102, + "IoU.step": 0.07730000019073487, + "IoU.tank": 0.45029998779296876, + "IoU.trade name": 0.25260000228881835, + "IoU.microwave": 0.4234999847412109, + "IoU.pot": 0.36209999084472655, + "IoU.animal": 0.5577000045776367, + "IoU.bicycle": 0.5738000106811524, + "IoU.lake": 0.09949999809265136, + "IoU.dishwasher": 0.5102000045776367, + "IoU.screen": 0.575999984741211, + "IoU.blanket": 0.09840000152587891, + "IoU.sculpture": 0.5563000106811523, + "IoU.hood": 0.4570999908447266, + "IoU.sconce": 0.22309999465942382, + "IoU.vase": 0.26399999618530273, + "IoU.traffic light": 0.21879999160766603, + "IoU.tray": 0.034000000953674316, + "IoU.ashcan": 0.35220001220703123, + "IoU.fan": 0.35889999389648436, + "IoU.pier": 0.23049999237060548, + "IoU.crt screen": 0.002800000011920929, + "IoU.plate": 0.477400016784668, + "IoU.monitor": 0.20010000228881836, + "IoU.bulletin board": 0.33930000305175784, + "IoU.shower": 0.015199999809265136, + "IoU.radiator": 0.5027999877929688, + "IoU.glass": 0.03430000066757202, + "IoU.clock": 0.19600000381469726, + "IoU.flag": 0.2902000045776367, + "Acc.wall": 0.8941999816894531, + "Acc.building": 0.9202999877929687, + "Acc.sky": 0.9752999877929688, + "Acc.floor": 0.9102999877929687, + "Acc.tree": 0.8769999694824219, + "Acc.ceiling": 0.9256999969482422, + "Acc.road": 0.8991999816894531, + "Acc.bed ": 0.9530000305175781, + "Acc.windowpane": 0.76, + "Acc.grass": 0.8223000335693359, + "Acc.cabinet": 0.6984999847412109, + "Acc.sidewalk": 0.7837000274658203, + "Acc.person": 0.9066000366210938, + "Acc.earth": 0.5247000122070312, + "Acc.door": 0.5741999816894531, + "Acc.table": 0.7276000213623047, + "Acc.mountain": 0.7419000244140626, + "Acc.plant": 0.6456999969482422, + "Acc.curtain": 0.8183000183105469, + "Acc.chair": 0.6266999816894532, + "Acc.car": 0.9198999786376953, + "Acc.water": 0.7601000213623047, + "Acc.painting": 0.8336000061035156, + "Acc.sofa": 0.8487000274658203, + "Acc.shelf": 0.6181000137329101, + "Acc.house": 0.6595999908447265, + "Acc.sea": 0.7316999816894532, + "Acc.mirror": 0.7095999908447266, + "Acc.rug": 0.7005000305175781, + "Acc.field": 0.41259998321533203, + "Acc.armchair": 0.5611000061035156, + "Acc.seat": 0.7961000061035156, + "Acc.fence": 0.49990001678466794, + "Acc.desk": 0.6683000183105469, + "Acc.rock": 0.5800999832153321, + "Acc.wardrobe": 0.6183000183105469, + "Acc.lamp": 0.6293999862670898, + "Acc.bathtub": 0.8758000183105469, + "Acc.railing": 0.48520000457763673, + "Acc.cushion": 0.6891999816894532, + "Acc.base": 0.45310001373291015, + "Acc.box": 0.2759000015258789, + "Acc.column": 0.5629000091552734, + "Acc.signboard": 0.4109999847412109, + "Acc.chest of drawers": 0.5711999893188476, + "Acc.counter": 0.5058000183105469, + "Acc.sand": 0.5220999908447266, + "Acc.sink": 0.7331999969482422, + "Acc.skyscraper": 0.5836999893188477, + "Acc.fireplace": 0.8575, + "Acc.refrigerator": 0.7691000366210937, + "Acc.grandstand": 0.7427999877929687, + "Acc.path": 0.2809000015258789, + "Acc.stairs": 0.25860000610351563, + "Acc.runway": 0.8180000305175781, + "Acc.case": 0.7168000030517578, + "Acc.pool table": 0.9573000335693359, + "Acc.pillow": 0.5802000045776368, + "Acc.screen door": 0.6708000183105469, + "Acc.stairway": 0.43849998474121094, + "Acc.river": 0.38669998168945313, + "Acc.bridge": 0.7794000244140625, + "Acc.bookcase": 0.57, + "Acc.blind": 0.3715999984741211, + "Acc.coffee table": 0.7708000183105469, + "Acc.toilet": 0.8795999908447265, + "Acc.flower": 0.4875, + "Acc.book": 0.545099983215332, + "Acc.hill": 0.09079999923706054, + "Acc.bench": 0.4336000061035156, + "Acc.countertop": 0.6451000213623047, + "Acc.stove": 0.7463999938964844, + "Acc.palm": 0.6922000122070312, + "Acc.kitchen island": 0.590900001525879, + "Acc.computer": 0.6888999938964844, + "Acc.swivel chair": 0.5784000015258789, + "Acc.boat": 0.8294000244140625, + "Acc.bar": 0.5950999832153321, + "Acc.arcade machine": 0.3522999954223633, + "Acc.hovel": 0.12880000114440918, + "Acc.bus": 0.9116000366210938, + "Acc.towel": 0.6812000274658203, + "Acc.light": 0.27829999923706056, + "Acc.truck": 0.18059999465942383, + "Acc.tower": 0.3284000015258789, + "Acc.chandelier": 0.7320999908447265, + "Acc.awning": 0.2982999992370605, + "Acc.streetlight": 0.16430000305175782, + "Acc.booth": 0.40810001373291016, + "Acc.television receiver": 0.7329000091552734, + "Acc.airplane": 0.6354000091552734, + "Acc.dirt track": 0.48080001831054686, + "Acc.apparel": 0.5072999954223633, + "Acc.pole": 0.12159999847412109, + "Acc.land": 0.030399999618530273, + "Acc.bannister": 0.07230000019073486, + "Acc.escalator": 0.46799999237060547, + "Acc.ottoman": 0.5309000015258789, + "Acc.bottle": 0.12779999732971192, + "Acc.buffet": 0.38290000915527345, + "Acc.poster": 0.364900016784668, + "Acc.stage": 0.30059999465942383, + "Acc.van": 0.43509998321533205, + "Acc.ship": 0.4997999954223633, + "Acc.fountain": 0.081899995803833, + "Acc.conveyer belt": 0.8705999755859375, + "Acc.canopy": 0.2954999923706055, + "Acc.washer": 0.7473999786376954, + "Acc.plaything": 0.3004999923706055, + "Acc.swimming pool": 0.8501999664306641, + "Acc.stool": 0.38310001373291014, + "Acc.barrel": 0.6327000045776368, + "Acc.basket": 0.26440000534057617, + "Acc.waterfall": 0.5847999954223633, + "Acc.tent": 0.9823999786376953, + "Acc.bag": 0.08010000228881836, + "Acc.minibike": 0.7691999816894531, + "Acc.cradle": 0.9605000305175782, + "Acc.oven": 0.43990001678466795, + "Acc.ball": 0.5538000106811524, + "Acc.food": 0.5947000122070313, + "Acc.step": 0.09449999809265136, + "Acc.tank": 0.5252999877929687, + "Acc.trade name": 0.2815999984741211, + "Acc.microwave": 0.4672999954223633, + "Acc.pot": 0.39610000610351564, + "Acc.animal": 0.5802999877929688, + "Acc.bicycle": 0.6923999786376953, + "Acc.lake": 0.10899999618530273, + "Acc.dishwasher": 0.5620999908447266, + "Acc.screen": 0.7241000366210938, + "Acc.blanket": 0.10630000114440918, + "Acc.sculpture": 0.6526000213623047, + "Acc.hood": 0.5968999862670898, + "Acc.sconce": 0.28280000686645507, + "Acc.vase": 0.3359000015258789, + "Acc.traffic light": 0.29700000762939455, + "Acc.tray": 0.04230000019073486, + "Acc.ashcan": 0.5065999984741211, + "Acc.fan": 0.4027000045776367, + "Acc.pier": 0.39599998474121095, + "Acc.crt screen": 0.006000000238418579, + "Acc.plate": 0.5854999923706055, + "Acc.monitor": 0.3258000183105469, + "Acc.bulletin board": 0.4022999954223633, + "Acc.shower": 0.05289999961853027, + "Acc.radiator": 0.5486000061035157, + "Acc.glass": 0.03519999980926514, + "Acc.clock": 0.2075, + "Acc.flag": 0.3111000061035156 + } + }, + "124": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8181999999999999, + "mIoU": 0.441, + "mAcc": 0.5313, + "IoU.wall": 0.7562999725341797, + "IoU.building": 0.8166000366210937, + "IoU.sky": 0.9312000274658203, + "IoU.floor": 0.7912999725341797, + "IoU.tree": 0.7262999725341797, + "IoU.ceiling": 0.824800033569336, + "IoU.road": 0.8175, + "IoU.bed ": 0.8629000091552734, + "IoU.windowpane": 0.5947999954223633, + "IoU.grass": 0.6959999847412109, + "IoU.cabinet": 0.5863999938964843, + "IoU.sidewalk": 0.6254000091552734, + "IoU.person": 0.7820999908447266, + "IoU.earth": 0.37509998321533206, + "IoU.door": 0.417400016784668, + "IoU.table": 0.5525, + "IoU.mountain": 0.5854000091552735, + "IoU.plant": 0.5022000122070313, + "IoU.curtain": 0.7038999938964844, + "IoU.chair": 0.5006000137329102, + "IoU.car": 0.8145999908447266, + "IoU.water": 0.5713999938964843, + "IoU.painting": 0.6887999725341797, + "IoU.sofa": 0.6309000015258789, + "IoU.shelf": 0.41830001831054686, + "IoU.house": 0.5061000061035156, + "IoU.sea": 0.5981999969482422, + "IoU.mirror": 0.6088000106811523, + "IoU.rug": 0.6402999877929687, + "IoU.field": 0.2826000022888184, + "IoU.armchair": 0.36150001525878905, + "IoU.seat": 0.6034000015258789, + "IoU.fence": 0.37459999084472656, + "IoU.desk": 0.48310001373291017, + "IoU.rock": 0.43540000915527344, + "IoU.wardrobe": 0.5136999893188476, + "IoU.lamp": 0.5118000030517578, + "IoU.bathtub": 0.7973999786376953, + "IoU.railing": 0.35380001068115235, + "IoU.cushion": 0.5365000152587891, + "IoU.base": 0.27100000381469724, + "IoU.box": 0.21540000915527344, + "IoU.column": 0.4366999816894531, + "IoU.signboard": 0.3110000038146973, + "IoU.chest of drawers": 0.33240001678466796, + "IoU.counter": 0.3661000061035156, + "IoU.sand": 0.4204000091552734, + "IoU.sink": 0.6538999938964843, + "IoU.skyscraper": 0.5215000152587891, + "IoU.fireplace": 0.717699966430664, + "IoU.refrigerator": 0.6780999755859375, + "IoU.grandstand": 0.4697000122070312, + "IoU.path": 0.18510000228881837, + "IoU.stairs": 0.1875, + "IoU.runway": 0.6158000183105469, + "IoU.case": 0.48520000457763673, + "IoU.pool table": 0.9231999969482422, + "IoU.pillow": 0.46700000762939453, + "IoU.screen door": 0.5881999969482422, + "IoU.stairway": 0.2670000076293945, + "IoU.river": 0.18229999542236328, + "IoU.bridge": 0.6643000030517578, + "IoU.bookcase": 0.31409999847412107, + "IoU.blind": 0.30670000076293946, + "IoU.coffee table": 0.5852999877929688, + "IoU.toilet": 0.8212999725341796, + "IoU.flower": 0.3768000030517578, + "IoU.book": 0.4116999816894531, + "IoU.hill": 0.050999999046325684, + "IoU.bench": 0.3777000045776367, + "IoU.countertop": 0.49810001373291013, + "IoU.stove": 0.6655999755859375, + "IoU.palm": 0.4506999969482422, + "IoU.kitchen island": 0.4047999954223633, + "IoU.computer": 0.6068000030517579, + "IoU.swivel chair": 0.4481999969482422, + "IoU.boat": 0.6556999969482422, + "IoU.bar": 0.4595000076293945, + "IoU.arcade machine": 0.355, + "IoU.hovel": 0.12729999542236328, + "IoU.bus": 0.8480999755859375, + "IoU.towel": 0.5429000091552735, + "IoU.light": 0.2646999931335449, + "IoU.truck": 0.10550000190734864, + "IoU.tower": 0.2031999969482422, + "IoU.chandelier": 0.5781000137329102, + "IoU.awning": 0.22620000839233398, + "IoU.streetlight": 0.13859999656677247, + "IoU.booth": 0.3384000015258789, + "IoU.television receiver": 0.6758999633789062, + "IoU.airplane": 0.5645000076293946, + "IoU.dirt track": 0.26110000610351564, + "IoU.apparel": 0.3181999969482422, + "IoU.pole": 0.06639999866485596, + "IoU.land": 0.0475, + "IoU.bannister": 0.0571999979019165, + "IoU.escalator": 0.27649999618530274, + "IoU.ottoman": 0.39119998931884764, + "IoU.bottle": 0.1115999984741211, + "IoU.buffet": 0.3114999961853027, + "IoU.poster": 0.13520000457763673, + "IoU.stage": 0.19139999389648438, + "IoU.van": 0.29450000762939454, + "IoU.ship": 0.46669998168945315, + "IoU.fountain": 0.11970000267028809, + "IoU.conveyer belt": 0.6480000305175782, + "IoU.canopy": 0.20729999542236327, + "IoU.washer": 0.7204000091552735, + "IoU.plaything": 0.23260000228881836, + "IoU.swimming pool": 0.712699966430664, + "IoU.stool": 0.3252000045776367, + "IoU.barrel": 0.5531999969482422, + "IoU.basket": 0.25459999084472656, + "IoU.waterfall": 0.5631999969482422, + "IoU.tent": 0.9390000152587891, + "IoU.bag": 0.10930000305175781, + "IoU.minibike": 0.5422999954223633, + "IoU.cradle": 0.7929000091552735, + "IoU.oven": 0.20180000305175783, + "IoU.ball": 0.4768000030517578, + "IoU.food": 0.4588999938964844, + "IoU.step": 0.08380000114440918, + "IoU.tank": 0.43029998779296874, + "IoU.trade name": 0.19440000534057617, + "IoU.microwave": 0.6591000366210937, + "IoU.pot": 0.3706999969482422, + "IoU.animal": 0.5695999908447266, + "IoU.bicycle": 0.5027999877929688, + "IoU.lake": 0.037799999713897706, + "IoU.dishwasher": 0.4931999969482422, + "IoU.screen": 0.5740000152587891, + "IoU.blanket": 0.08529999732971191, + "IoU.sculpture": 0.53, + "IoU.hood": 0.47720001220703123, + "IoU.sconce": 0.22399999618530272, + "IoU.vase": 0.2646999931335449, + "IoU.traffic light": 0.2027000045776367, + "IoU.tray": 0.04519999980926514, + "IoU.ashcan": 0.37009998321533205, + "IoU.fan": 0.39099998474121095, + "IoU.pier": 0.2593000030517578, + "IoU.crt screen": 0.0005000000074505806, + "IoU.plate": 0.45369998931884764, + "IoU.monitor": 0.21989999771118163, + "IoU.bulletin board": 0.21309999465942384, + "IoU.shower": 0.01, + "IoU.radiator": 0.5268000030517578, + "IoU.glass": 0.06440000057220459, + "IoU.clock": 0.2297999954223633, + "IoU.flag": 0.335099983215332, + "Acc.wall": 0.8926999664306641, + "Acc.building": 0.936500015258789, + "Acc.sky": 0.9806999969482422, + "Acc.floor": 0.9169000244140625, + "Acc.tree": 0.8355999755859375, + "Acc.ceiling": 0.9319000244140625, + "Acc.road": 0.8991000366210937, + "Acc.bed ": 0.9527999877929687, + "Acc.windowpane": 0.790199966430664, + "Acc.grass": 0.8387999725341797, + "Acc.cabinet": 0.7180999755859375, + "Acc.sidewalk": 0.7888999938964844, + "Acc.person": 0.8794999694824219, + "Acc.earth": 0.5458000183105469, + "Acc.door": 0.5213000106811524, + "Acc.table": 0.7433000183105469, + "Acc.mountain": 0.7501000213623047, + "Acc.plant": 0.6040999984741211, + "Acc.curtain": 0.8220999908447265, + "Acc.chair": 0.6040000152587891, + "Acc.car": 0.8938999938964843, + "Acc.water": 0.7863999938964844, + "Acc.painting": 0.8091999816894532, + "Acc.sofa": 0.8688999938964844, + "Acc.shelf": 0.6118999862670899, + "Acc.house": 0.6386000061035156, + "Acc.sea": 0.7591000366210937, + "Acc.mirror": 0.6686000061035157, + "Acc.rug": 0.7187000274658203, + "Acc.field": 0.42009998321533204, + "Acc.armchair": 0.44779998779296876, + "Acc.seat": 0.7691000366210937, + "Acc.fence": 0.48790000915527343, + "Acc.desk": 0.6423999786376953, + "Acc.rock": 0.5629999923706055, + "Acc.wardrobe": 0.6405000305175781, + "Acc.lamp": 0.5927000045776367, + "Acc.bathtub": 0.8343000030517578, + "Acc.railing": 0.489900016784668, + "Acc.cushion": 0.6656999969482422, + "Acc.base": 0.3634999847412109, + "Acc.box": 0.27899999618530275, + "Acc.column": 0.5327999877929688, + "Acc.signboard": 0.40119998931884765, + "Acc.chest of drawers": 0.5595999908447266, + "Acc.counter": 0.5161000061035156, + "Acc.sand": 0.5593999862670899, + "Acc.sink": 0.7166999816894531, + "Acc.skyscraper": 0.5661000061035156, + "Acc.fireplace": 0.7987999725341797, + "Acc.refrigerator": 0.7277999877929687, + "Acc.grandstand": 0.7272000122070312, + "Acc.path": 0.2518000030517578, + "Acc.stairs": 0.24850000381469728, + "Acc.runway": 0.8055000305175781, + "Acc.case": 0.6816999816894531, + "Acc.pool table": 0.9523999786376953, + "Acc.pillow": 0.5181000137329101, + "Acc.screen door": 0.6279999923706054, + "Acc.stairway": 0.4115999984741211, + "Acc.river": 0.31209999084472656, + "Acc.bridge": 0.8291999816894531, + "Acc.bookcase": 0.5175999832153321, + "Acc.blind": 0.32990001678466796, + "Acc.coffee table": 0.7498000335693359, + "Acc.toilet": 0.8584999847412109, + "Acc.flower": 0.5379999923706055, + "Acc.book": 0.5636999893188477, + "Acc.hill": 0.07710000038146973, + "Acc.bench": 0.4247999954223633, + "Acc.countertop": 0.6359000015258789, + "Acc.stove": 0.7169000244140625, + "Acc.palm": 0.5593000030517579, + "Acc.kitchen island": 0.7058000183105468, + "Acc.computer": 0.6858999633789062, + "Acc.swivel chair": 0.5568999862670898, + "Acc.boat": 0.7412000274658204, + "Acc.bar": 0.5647000122070313, + "Acc.arcade machine": 0.3747999954223633, + "Acc.hovel": 0.128100004196167, + "Acc.bus": 0.9151000213623047, + "Acc.towel": 0.7208000183105469, + "Acc.light": 0.28829999923706057, + "Acc.truck": 0.13020000457763672, + "Acc.tower": 0.2698999977111816, + "Acc.chandelier": 0.75, + "Acc.awning": 0.25290000915527344, + "Acc.streetlight": 0.16770000457763673, + "Acc.booth": 0.3788000106811523, + "Acc.television receiver": 0.7161000061035157, + "Acc.airplane": 0.6191999816894531, + "Acc.dirt track": 0.30889999389648437, + "Acc.apparel": 0.4068000030517578, + "Acc.pole": 0.07889999866485596, + "Acc.land": 0.05849999904632568, + "Acc.bannister": 0.07760000228881836, + "Acc.escalator": 0.332599983215332, + "Acc.ottoman": 0.4995000076293945, + "Acc.bottle": 0.12279999732971192, + "Acc.buffet": 0.3656000137329102, + "Acc.poster": 0.20760000228881836, + "Acc.stage": 0.2468000030517578, + "Acc.van": 0.3345999908447266, + "Acc.ship": 0.4958000183105469, + "Acc.fountain": 0.1225, + "Acc.conveyer belt": 0.8794999694824219, + "Acc.canopy": 0.2384000015258789, + "Acc.washer": 0.7297000122070313, + "Acc.plaything": 0.32299999237060545, + "Acc.swimming pool": 0.8419000244140625, + "Acc.stool": 0.37979999542236326, + "Acc.barrel": 0.6197999954223633, + "Acc.basket": 0.31170000076293947, + "Acc.waterfall": 0.6706999969482422, + "Acc.tent": 0.9726000213623047, + "Acc.bag": 0.125, + "Acc.minibike": 0.5706999969482421, + "Acc.cradle": 0.9523999786376953, + "Acc.oven": 0.3163999938964844, + "Acc.ball": 0.528400001525879, + "Acc.food": 0.5177999877929688, + "Acc.step": 0.10010000228881837, + "Acc.tank": 0.46950000762939453, + "Acc.trade name": 0.20829999923706055, + "Acc.microwave": 0.7316000366210937, + "Acc.pot": 0.3977000045776367, + "Acc.animal": 0.5947999954223633, + "Acc.bicycle": 0.6238999938964844, + "Acc.lake": 0.041700000762939456, + "Acc.dishwasher": 0.5556999969482422, + "Acc.screen": 0.7330999755859375, + "Acc.blanket": 0.09470000267028808, + "Acc.sculpture": 0.5709999847412109, + "Acc.hood": 0.5336000061035157, + "Acc.sconce": 0.2646999931335449, + "Acc.vase": 0.34950000762939454, + "Acc.traffic light": 0.25010000228881835, + "Acc.tray": 0.0625, + "Acc.ashcan": 0.5283000183105468, + "Acc.fan": 0.43650001525878906, + "Acc.pier": 0.40849998474121096, + "Acc.crt screen": 0.0010999999940395355, + "Acc.plate": 0.5597000122070312, + "Acc.monitor": 0.36200000762939455, + "Acc.bulletin board": 0.27420000076293943, + "Acc.shower": 0.052100000381469724, + "Acc.radiator": 0.6029999923706054, + "Acc.glass": 0.06829999923706055, + "Acc.clock": 0.24579999923706056, + "Acc.flag": 0.35569999694824217 + } + }, + "125": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8099, + "mIoU": 0.4251, + "mAcc": 0.5376, + "IoU.wall": 0.7458000183105469, + "IoU.building": 0.8133999633789063, + "IoU.sky": 0.9359999847412109, + "IoU.floor": 0.7937999725341797, + "IoU.tree": 0.731500015258789, + "IoU.ceiling": 0.8195999908447266, + "IoU.road": 0.8127999877929688, + "IoU.bed ": 0.8584999847412109, + "IoU.windowpane": 0.601599998474121, + "IoU.grass": 0.6631999969482422, + "IoU.cabinet": 0.5761999893188476, + "IoU.sidewalk": 0.6316999816894531, + "IoU.person": 0.7637999725341796, + "IoU.earth": 0.33299999237060546, + "IoU.door": 0.4377000045776367, + "IoU.table": 0.530099983215332, + "IoU.mountain": 0.5890000152587891, + "IoU.plant": 0.5018000030517578, + "IoU.curtain": 0.7156999969482422, + "IoU.chair": 0.4997999954223633, + "IoU.car": 0.8030999755859375, + "IoU.water": 0.48959999084472655, + "IoU.painting": 0.6529000091552735, + "IoU.sofa": 0.6184999847412109, + "IoU.shelf": 0.4072999954223633, + "IoU.house": 0.532400016784668, + "IoU.sea": 0.5527000045776367, + "IoU.mirror": 0.6266999816894532, + "IoU.rug": 0.6083000183105469, + "IoU.field": 0.25360000610351563, + "IoU.armchair": 0.39310001373291015, + "IoU.seat": 0.6288000106811523, + "IoU.fence": 0.33930000305175784, + "IoU.desk": 0.4463999938964844, + "IoU.rock": 0.4452000045776367, + "IoU.wardrobe": 0.4909000015258789, + "IoU.lamp": 0.5054000091552734, + "IoU.bathtub": 0.7765000152587891, + "IoU.railing": 0.27670000076293944, + "IoU.cushion": 0.4811000061035156, + "IoU.base": 0.2595000076293945, + "IoU.box": 0.20120000839233398, + "IoU.column": 0.43259998321533205, + "IoU.signboard": 0.3308000183105469, + "IoU.chest of drawers": 0.32479999542236326, + "IoU.counter": 0.28719999313354494, + "IoU.sand": 0.3756999969482422, + "IoU.sink": 0.6136000061035156, + "IoU.skyscraper": 0.590999984741211, + "IoU.fireplace": 0.6869000244140625, + "IoU.refrigerator": 0.611599998474121, + "IoU.grandstand": 0.4427000045776367, + "IoU.path": 0.1965999984741211, + "IoU.stairs": 0.18440000534057618, + "IoU.runway": 0.6047999954223633, + "IoU.case": 0.48619998931884767, + "IoU.pool table": 0.91, + "IoU.pillow": 0.48200000762939454, + "IoU.screen door": 0.5193000030517578, + "IoU.stairway": 0.2375, + "IoU.river": 0.09579999923706055, + "IoU.bridge": 0.512400016784668, + "IoU.bookcase": 0.3218000030517578, + "IoU.blind": 0.44990001678466796, + "IoU.coffee table": 0.5602000045776367, + "IoU.toilet": 0.7987000274658204, + "IoU.flower": 0.35720001220703124, + "IoU.book": 0.39229999542236327, + "IoU.hill": 0.11300000190734863, + "IoU.bench": 0.3958000183105469, + "IoU.countertop": 0.46560001373291016, + "IoU.stove": 0.657300033569336, + "IoU.palm": 0.424900016784668, + "IoU.kitchen island": 0.36529998779296874, + "IoU.computer": 0.5559999847412109, + "IoU.swivel chair": 0.38279998779296875, + "IoU.boat": 0.5934000015258789, + "IoU.bar": 0.26049999237060545, + "IoU.arcade machine": 0.3816999816894531, + "IoU.hovel": 0.4804999923706055, + "IoU.bus": 0.7723999786376953, + "IoU.towel": 0.4766999816894531, + "IoU.light": 0.23440000534057617, + "IoU.truck": 0.13520000457763673, + "IoU.tower": 0.17940000534057618, + "IoU.chandelier": 0.5758000183105468, + "IoU.awning": 0.22209999084472656, + "IoU.streetlight": 0.10029999732971191, + "IoU.booth": 0.40400001525878904, + "IoU.television receiver": 0.6163999938964844, + "IoU.airplane": 0.5686999893188477, + "IoU.dirt track": 0.16309999465942382, + "IoU.apparel": 0.3159000015258789, + "IoU.pole": 0.2075, + "IoU.land": 0.057899999618530276, + "IoU.bannister": 0.048000001907348634, + "IoU.escalator": 0.19520000457763673, + "IoU.ottoman": 0.42779998779296874, + "IoU.bottle": 0.32470001220703126, + "IoU.buffet": 0.35450000762939454, + "IoU.poster": 0.2575, + "IoU.stage": 0.146899995803833, + "IoU.van": 0.3479000091552734, + "IoU.ship": 0.7088999938964844, + "IoU.fountain": 0.09470000267028808, + "IoU.conveyer belt": 0.5206999969482422, + "IoU.canopy": 0.15899999618530272, + "IoU.washer": 0.6311999893188477, + "IoU.plaything": 0.22799999237060548, + "IoU.swimming pool": 0.5902999877929688, + "IoU.stool": 0.19079999923706054, + "IoU.barrel": 0.17819999694824218, + "IoU.basket": 0.14039999961853028, + "IoU.waterfall": 0.6813999938964844, + "IoU.tent": 0.8868000030517578, + "IoU.bag": 0.08869999885559082, + "IoU.minibike": 0.5852999877929688, + "IoU.cradle": 0.7722000122070313, + "IoU.oven": 0.25040000915527344, + "IoU.ball": 0.44209999084472656, + "IoU.food": 0.5081000137329101, + "IoU.step": 0.09710000038146972, + "IoU.tank": 0.4791999816894531, + "IoU.trade name": 0.2, + "IoU.microwave": 0.3370000076293945, + "IoU.pot": 0.32970001220703127, + "IoU.animal": 0.47779998779296873, + "IoU.bicycle": 0.4241999816894531, + "IoU.lake": 0.015099999904632568, + "IoU.dishwasher": 0.529000015258789, + "IoU.screen": 0.5522000122070313, + "IoU.blanket": 0.07949999809265136, + "IoU.sculpture": 0.46630001068115234, + "IoU.hood": 0.45669998168945314, + "IoU.sconce": 0.24979999542236328, + "IoU.vase": 0.23940000534057618, + "IoU.traffic light": 0.22459999084472657, + "IoU.tray": 0.009399999976158141, + "IoU.ashcan": 0.35689998626708985, + "IoU.fan": 0.37290000915527344, + "IoU.pier": 0.505, + "IoU.crt screen": 0.027899999618530274, + "IoU.plate": 0.33180000305175783, + "IoU.monitor": 0.049000000953674315, + "IoU.bulletin board": 0.17450000762939452, + "IoU.shower": 0.005600000023841858, + "IoU.radiator": 0.48779998779296874, + "IoU.glass": 0.031700000762939454, + "IoU.clock": 0.25579999923706054, + "IoU.flag": 0.17940000534057618, + "Acc.wall": 0.8695999908447266, + "Acc.building": 0.9111000061035156, + "Acc.sky": 0.9744999694824219, + "Acc.floor": 0.903499984741211, + "Acc.tree": 0.8616999816894532, + "Acc.ceiling": 0.9241000366210937, + "Acc.road": 0.8898999786376953, + "Acc.bed ": 0.9394000244140625, + "Acc.windowpane": 0.7591999816894531, + "Acc.grass": 0.7997000122070312, + "Acc.cabinet": 0.6904000091552734, + "Acc.sidewalk": 0.7837999725341797, + "Acc.person": 0.9066999816894531, + "Acc.earth": 0.4752000045776367, + "Acc.door": 0.6281999969482421, + "Acc.table": 0.6883999633789063, + "Acc.mountain": 0.722699966430664, + "Acc.plant": 0.6494000244140625, + "Acc.curtain": 0.8358000183105468, + "Acc.chair": 0.6336000061035156, + "Acc.car": 0.9008000183105469, + "Acc.water": 0.6458000183105469, + "Acc.painting": 0.8148999786376954, + "Acc.sofa": 0.8105999755859375, + "Acc.shelf": 0.601500015258789, + "Acc.house": 0.7548999786376953, + "Acc.sea": 0.8276000213623047, + "Acc.mirror": 0.7273999786376953, + "Acc.rug": 0.6648999786376953, + "Acc.field": 0.45720001220703127, + "Acc.armchair": 0.5777999877929687, + "Acc.seat": 0.7754000091552734, + "Acc.fence": 0.4656999969482422, + "Acc.desk": 0.6736000061035157, + "Acc.rock": 0.6641000366210937, + "Acc.wardrobe": 0.6177000045776367, + "Acc.lamp": 0.6405999755859375, + "Acc.bathtub": 0.8619999694824219, + "Acc.railing": 0.4215999984741211, + "Acc.cushion": 0.6372999954223633, + "Acc.base": 0.43220001220703125, + "Acc.box": 0.2836000061035156, + "Acc.column": 0.5609000015258789, + "Acc.signboard": 0.42959999084472655, + "Acc.chest of drawers": 0.5688000106811524, + "Acc.counter": 0.47069999694824216, + "Acc.sand": 0.48380001068115236, + "Acc.sink": 0.7018000030517578, + "Acc.skyscraper": 0.706500015258789, + "Acc.fireplace": 0.8645999908447266, + "Acc.refrigerator": 0.7865000152587891, + "Acc.grandstand": 0.6809999847412109, + "Acc.path": 0.283799991607666, + "Acc.stairs": 0.24040000915527343, + "Acc.runway": 0.7779000091552735, + "Acc.case": 0.6759999847412109, + "Acc.pool table": 0.9590000152587891, + "Acc.pillow": 0.5693999862670899, + "Acc.screen door": 0.6197000122070313, + "Acc.stairway": 0.31790000915527344, + "Acc.river": 0.16559999465942382, + "Acc.bridge": 0.6455000305175781, + "Acc.bookcase": 0.52, + "Acc.blind": 0.5368999862670898, + "Acc.coffee table": 0.7872000122070313, + "Acc.toilet": 0.8708999633789063, + "Acc.flower": 0.49470001220703125, + "Acc.book": 0.534099998474121, + "Acc.hill": 0.2043000030517578, + "Acc.bench": 0.48830001831054687, + "Acc.countertop": 0.5843000030517578, + "Acc.stove": 0.7723999786376953, + "Acc.palm": 0.5883000183105469, + "Acc.kitchen island": 0.6708000183105469, + "Acc.computer": 0.6630999755859375, + "Acc.swivel chair": 0.4822999954223633, + "Acc.boat": 0.7954000091552734, + "Acc.bar": 0.3306999969482422, + "Acc.arcade machine": 0.462599983215332, + "Acc.hovel": 0.5615999984741211, + "Acc.bus": 0.8951000213623047, + "Acc.towel": 0.6623999786376953, + "Acc.light": 0.24350000381469727, + "Acc.truck": 0.18549999237060547, + "Acc.tower": 0.21879999160766603, + "Acc.chandelier": 0.7401000213623047, + "Acc.awning": 0.2695999908447266, + "Acc.streetlight": 0.10649999618530273, + "Acc.booth": 0.45310001373291015, + "Acc.television receiver": 0.6995999908447266, + "Acc.airplane": 0.7001999664306641, + "Acc.dirt track": 0.23799999237060546, + "Acc.apparel": 0.46110000610351565, + "Acc.pole": 0.25159999847412107, + "Acc.land": 0.09470000267028808, + "Acc.bannister": 0.06599999904632568, + "Acc.escalator": 0.2159000015258789, + "Acc.ottoman": 0.6004000091552735, + "Acc.bottle": 0.45689998626708983, + "Acc.buffet": 0.38369998931884763, + "Acc.poster": 0.34549999237060547, + "Acc.stage": 0.3295999908447266, + "Acc.van": 0.5113000106811524, + "Acc.ship": 0.8548999786376953, + "Acc.fountain": 0.10350000381469726, + "Acc.conveyer belt": 0.7922000122070313, + "Acc.canopy": 0.21329999923706056, + "Acc.washer": 0.658499984741211, + "Acc.plaything": 0.3306999969482422, + "Acc.swimming pool": 0.7694999694824218, + "Acc.stool": 0.23040000915527345, + "Acc.barrel": 0.21860000610351563, + "Acc.basket": 0.17190000534057617, + "Acc.waterfall": 0.7662999725341797, + "Acc.tent": 0.9929000091552734, + "Acc.bag": 0.10670000076293945, + "Acc.minibike": 0.6631999969482422, + "Acc.cradle": 0.9579000091552734, + "Acc.oven": 0.4875, + "Acc.ball": 0.5029999923706054, + "Acc.food": 0.644000015258789, + "Acc.step": 0.12449999809265137, + "Acc.tank": 0.5518000030517578, + "Acc.trade name": 0.22370000839233398, + "Acc.microwave": 0.36029998779296873, + "Acc.pot": 0.3711999893188477, + "Acc.animal": 0.5404999923706054, + "Acc.bicycle": 0.6213000106811524, + "Acc.lake": 0.016799999475479124, + "Acc.dishwasher": 0.606599998474121, + "Acc.screen": 0.8748999786376953, + "Acc.blanket": 0.08680000305175781, + "Acc.sculpture": 0.5958000183105469, + "Acc.hood": 0.5211999893188477, + "Acc.sconce": 0.2890999984741211, + "Acc.vase": 0.3175, + "Acc.traffic light": 0.3171999931335449, + "Acc.tray": 0.01590000033378601, + "Acc.ashcan": 0.45970001220703127, + "Acc.fan": 0.4402000045776367, + "Acc.pier": 0.8666000366210938, + "Acc.crt screen": 0.08510000228881837, + "Acc.plate": 0.407400016784668, + "Acc.monitor": 0.07260000228881835, + "Acc.bulletin board": 0.2206999969482422, + "Acc.shower": 0.04190000057220459, + "Acc.radiator": 0.5384000015258789, + "Acc.glass": 0.032400000095367434, + "Acc.clock": 0.2795999908447266, + "Acc.flag": 0.203700008392334 + } + }, + "126": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8123, + "mIoU": 0.4303, + "mAcc": 0.54, + "IoU.wall": 0.7497000122070312, + "IoU.building": 0.8161000061035156, + "IoU.sky": 0.9355999755859375, + "IoU.floor": 0.7912000274658203, + "IoU.tree": 0.7277999877929687, + "IoU.ceiling": 0.8237999725341797, + "IoU.road": 0.8105000305175781, + "IoU.bed ": 0.8619000244140625, + "IoU.windowpane": 0.6002000045776367, + "IoU.grass": 0.6736000061035157, + "IoU.cabinet": 0.5831000137329102, + "IoU.sidewalk": 0.6259000015258789, + "IoU.person": 0.7673999786376953, + "IoU.earth": 0.34060001373291016, + "IoU.door": 0.43540000915527344, + "IoU.table": 0.5427000045776367, + "IoU.mountain": 0.5784999847412109, + "IoU.plant": 0.5034999847412109, + "IoU.curtain": 0.7205000305175782, + "IoU.chair": 0.4988000106811523, + "IoU.car": 0.8073999786376953, + "IoU.water": 0.4822999954223633, + "IoU.painting": 0.6538999938964843, + "IoU.sofa": 0.5877000045776367, + "IoU.shelf": 0.4311999893188477, + "IoU.house": 0.5429000091552735, + "IoU.sea": 0.5786000061035156, + "IoU.mirror": 0.645199966430664, + "IoU.rug": 0.6206000137329102, + "IoU.field": 0.27780000686645506, + "IoU.armchair": 0.35259998321533204, + "IoU.seat": 0.6095999908447266, + "IoU.fence": 0.2738999938964844, + "IoU.desk": 0.4484000015258789, + "IoU.rock": 0.46849998474121096, + "IoU.wardrobe": 0.5349000167846679, + "IoU.lamp": 0.5059999847412109, + "IoU.bathtub": 0.7980000305175782, + "IoU.railing": 0.29520000457763673, + "IoU.cushion": 0.5041999816894531, + "IoU.base": 0.27790000915527346, + "IoU.box": 0.18479999542236328, + "IoU.column": 0.4483000183105469, + "IoU.signboard": 0.33869998931884765, + "IoU.chest of drawers": 0.3163999938964844, + "IoU.counter": 0.3422000122070312, + "IoU.sand": 0.34330001831054685, + "IoU.sink": 0.6402999877929687, + "IoU.skyscraper": 0.6455999755859375, + "IoU.fireplace": 0.6502999877929687, + "IoU.refrigerator": 0.6172000122070312, + "IoU.grandstand": 0.3877000045776367, + "IoU.path": 0.20299999237060548, + "IoU.stairs": 0.18549999237060547, + "IoU.runway": 0.6195000076293945, + "IoU.case": 0.4884000015258789, + "IoU.pool table": 0.9205999755859375, + "IoU.pillow": 0.5063999938964844, + "IoU.screen door": 0.6368999862670899, + "IoU.stairway": 0.24829999923706056, + "IoU.river": 0.15829999923706053, + "IoU.bridge": 0.5377000045776367, + "IoU.bookcase": 0.3315999984741211, + "IoU.blind": 0.35759998321533204, + "IoU.coffee table": 0.5477000045776367, + "IoU.toilet": 0.779000015258789, + "IoU.flower": 0.349900016784668, + "IoU.book": 0.3840000152587891, + "IoU.hill": 0.05920000076293945, + "IoU.bench": 0.37659999847412107, + "IoU.countertop": 0.49009998321533205, + "IoU.stove": 0.6805000305175781, + "IoU.palm": 0.4358000183105469, + "IoU.kitchen island": 0.33029998779296876, + "IoU.computer": 0.5379999923706055, + "IoU.swivel chair": 0.4, + "IoU.boat": 0.4929999923706055, + "IoU.bar": 0.3740999984741211, + "IoU.arcade machine": 0.33860000610351565, + "IoU.hovel": 0.3856999969482422, + "IoU.bus": 0.8556999969482422, + "IoU.towel": 0.5108000183105469, + "IoU.light": 0.20479999542236327, + "IoU.truck": 0.225, + "IoU.tower": 0.2552000045776367, + "IoU.chandelier": 0.5609000015258789, + "IoU.awning": 0.246200008392334, + "IoU.streetlight": 0.12779999732971192, + "IoU.booth": 0.32060001373291014, + "IoU.television receiver": 0.6195000076293945, + "IoU.airplane": 0.5886000061035156, + "IoU.dirt track": 0.18350000381469728, + "IoU.apparel": 0.3365000152587891, + "IoU.pole": 0.18299999237060546, + "IoU.land": 0.049200000762939455, + "IoU.bannister": 0.027799999713897704, + "IoU.escalator": 0.1981999969482422, + "IoU.ottoman": 0.3984000015258789, + "IoU.bottle": 0.105, + "IoU.buffet": 0.43470001220703125, + "IoU.poster": 0.2602000045776367, + "IoU.stage": 0.1356999969482422, + "IoU.van": 0.26110000610351564, + "IoU.ship": 0.7151999664306641, + "IoU.fountain": 0.06699999809265136, + "IoU.conveyer belt": 0.6009000015258789, + "IoU.canopy": 0.1534000015258789, + "IoU.washer": 0.658499984741211, + "IoU.plaything": 0.23149999618530273, + "IoU.swimming pool": 0.5813000106811523, + "IoU.stool": 0.2559000015258789, + "IoU.barrel": 0.36279998779296874, + "IoU.basket": 0.13640000343322753, + "IoU.waterfall": 0.5311000061035156, + "IoU.tent": 0.8869000244140625, + "IoU.bag": 0.07679999828338623, + "IoU.minibike": 0.6576999664306641, + "IoU.cradle": 0.7708999633789062, + "IoU.oven": 0.268700008392334, + "IoU.ball": 0.4886000061035156, + "IoU.food": 0.5288000106811523, + "IoU.step": 0.047800002098083494, + "IoU.tank": 0.5411000061035156, + "IoU.trade name": 0.2025, + "IoU.microwave": 0.33419998168945314, + "IoU.pot": 0.3522999954223633, + "IoU.animal": 0.5, + "IoU.bicycle": 0.49599998474121093, + "IoU.lake": 0.2015999984741211, + "IoU.dishwasher": 0.49740001678466794, + "IoU.screen": 0.5938000106811523, + "IoU.blanket": 0.12260000228881836, + "IoU.sculpture": 0.5066999816894531, + "IoU.hood": 0.47439998626708985, + "IoU.sconce": 0.20920000076293946, + "IoU.vase": 0.2534000015258789, + "IoU.traffic light": 0.1815999984741211, + "IoU.tray": 0.013600000143051148, + "IoU.ashcan": 0.33099998474121095, + "IoU.fan": 0.34709999084472654, + "IoU.pier": 0.3940000152587891, + "IoU.crt screen": 0.0023000000417232516, + "IoU.plate": 0.4266999816894531, + "IoU.monitor": 0.03299999952316284, + "IoU.bulletin board": 0.2813999938964844, + "IoU.shower": 0.017200000286102295, + "IoU.radiator": 0.5002999877929688, + "IoU.glass": 0.03940000057220459, + "IoU.clock": 0.1825, + "IoU.flag": 0.23959999084472655, + "Acc.wall": 0.8801999664306641, + "Acc.building": 0.912300033569336, + "Acc.sky": 0.976500015258789, + "Acc.floor": 0.8994999694824218, + "Acc.tree": 0.8612000274658204, + "Acc.ceiling": 0.9219000244140625, + "Acc.road": 0.8794999694824219, + "Acc.bed ": 0.9480000305175781, + "Acc.windowpane": 0.7669999694824219, + "Acc.grass": 0.8169999694824219, + "Acc.cabinet": 0.7066999816894531, + "Acc.sidewalk": 0.7894999694824218, + "Acc.person": 0.9012999725341797, + "Acc.earth": 0.4834000015258789, + "Acc.door": 0.5991999816894531, + "Acc.table": 0.6930000305175781, + "Acc.mountain": 0.7101000213623047, + "Acc.plant": 0.6572000122070313, + "Acc.curtain": 0.8369000244140625, + "Acc.chair": 0.6383000183105468, + "Acc.car": 0.9198000335693359, + "Acc.water": 0.635, + "Acc.painting": 0.7998999786376954, + "Acc.sofa": 0.7794999694824218, + "Acc.shelf": 0.635099983215332, + "Acc.house": 0.7320999908447265, + "Acc.sea": 0.8487000274658203, + "Acc.mirror": 0.7194000244140625, + "Acc.rug": 0.6806999969482422, + "Acc.field": 0.4356999969482422, + "Acc.armchair": 0.5186000061035156, + "Acc.seat": 0.794000015258789, + "Acc.fence": 0.3758000183105469, + "Acc.desk": 0.6691000366210937, + "Acc.rock": 0.667300033569336, + "Acc.wardrobe": 0.6751000213623047, + "Acc.lamp": 0.6427999877929688, + "Acc.bathtub": 0.8788999938964843, + "Acc.railing": 0.44119998931884763, + "Acc.cushion": 0.6641000366210937, + "Acc.base": 0.5206000137329102, + "Acc.box": 0.23950000762939452, + "Acc.column": 0.5502000045776367, + "Acc.signboard": 0.45029998779296876, + "Acc.chest of drawers": 0.5404999923706054, + "Acc.counter": 0.5204000091552734, + "Acc.sand": 0.4861000061035156, + "Acc.sink": 0.7419999694824219, + "Acc.skyscraper": 0.7644000244140625, + "Acc.fireplace": 0.8656999969482422, + "Acc.refrigerator": 0.7883000183105469, + "Acc.grandstand": 0.7452999877929688, + "Acc.path": 0.29219999313354494, + "Acc.stairs": 0.2570000076293945, + "Acc.runway": 0.7944999694824219, + "Acc.case": 0.6848000335693359, + "Acc.pool table": 0.9581999969482422, + "Acc.pillow": 0.5813000106811523, + "Acc.screen door": 0.7387000274658203, + "Acc.stairway": 0.3265999984741211, + "Acc.river": 0.2955999946594238, + "Acc.bridge": 0.6625, + "Acc.bookcase": 0.4986999893188477, + "Acc.blind": 0.3972999954223633, + "Acc.coffee table": 0.725, + "Acc.toilet": 0.8680999755859375, + "Acc.flower": 0.4786000061035156, + "Acc.book": 0.5431000137329102, + "Acc.hill": 0.08270000457763672, + "Acc.bench": 0.48450000762939455, + "Acc.countertop": 0.6413999938964844, + "Acc.stove": 0.7554000091552734, + "Acc.palm": 0.6122000122070312, + "Acc.kitchen island": 0.5395000076293945, + "Acc.computer": 0.6826000213623047, + "Acc.swivel chair": 0.5241999816894531, + "Acc.boat": 0.6366999816894531, + "Acc.bar": 0.45529998779296876, + "Acc.arcade machine": 0.3756999969482422, + "Acc.hovel": 0.44740001678466795, + "Acc.bus": 0.9176000213623047, + "Acc.towel": 0.6781999969482422, + "Acc.light": 0.2140999984741211, + "Acc.truck": 0.31959999084472657, + "Acc.tower": 0.3268999862670898, + "Acc.chandelier": 0.719800033569336, + "Acc.awning": 0.3006999969482422, + "Acc.streetlight": 0.14199999809265137, + "Acc.booth": 0.4484000015258789, + "Acc.television receiver": 0.6891000366210938, + "Acc.airplane": 0.6737999725341797, + "Acc.dirt track": 0.18479999542236328, + "Acc.apparel": 0.4991999816894531, + "Acc.pole": 0.22690000534057617, + "Acc.land": 0.06769999980926514, + "Acc.bannister": 0.037300000190734865, + "Acc.escalator": 0.20399999618530273, + "Acc.ottoman": 0.5656000137329101, + "Acc.bottle": 0.11739999771118165, + "Acc.buffet": 0.4595000076293945, + "Acc.poster": 0.4211000061035156, + "Acc.stage": 0.301299991607666, + "Acc.van": 0.39919998168945314, + "Acc.ship": 0.9180000305175782, + "Acc.fountain": 0.0690999984741211, + "Acc.conveyer belt": 0.8723999786376954, + "Acc.canopy": 0.19739999771118164, + "Acc.washer": 0.7027999877929687, + "Acc.plaything": 0.35939998626708985, + "Acc.swimming pool": 0.7776999664306641, + "Acc.stool": 0.29569999694824217, + "Acc.barrel": 0.6340999984741211, + "Acc.basket": 0.16809999465942382, + "Acc.waterfall": 0.5772999954223633, + "Acc.tent": 0.9925, + "Acc.bag": 0.09149999618530273, + "Acc.minibike": 0.7408999633789063, + "Acc.cradle": 0.9620999908447265, + "Acc.oven": 0.3325, + "Acc.ball": 0.5483000183105469, + "Acc.food": 0.6263999938964844, + "Acc.step": 0.06519999980926514, + "Acc.tank": 0.5815999984741211, + "Acc.trade name": 0.21979999542236328, + "Acc.microwave": 0.37040000915527344, + "Acc.pot": 0.39869998931884765, + "Acc.animal": 0.5311000061035156, + "Acc.bicycle": 0.6352999877929687, + "Acc.lake": 0.20969999313354493, + "Acc.dishwasher": 0.596500015258789, + "Acc.screen": 0.8983000183105468, + "Acc.blanket": 0.13100000381469726, + "Acc.sculpture": 0.7713999938964844, + "Acc.hood": 0.5197000122070312, + "Acc.sconce": 0.26649999618530273, + "Acc.vase": 0.34110000610351565, + "Acc.traffic light": 0.22899999618530273, + "Acc.tray": 0.017300000190734865, + "Acc.ashcan": 0.42970001220703125, + "Acc.fan": 0.38040000915527344, + "Acc.pier": 0.6341999816894531, + "Acc.crt screen": 0.006399999856948852, + "Acc.plate": 0.5545999908447266, + "Acc.monitor": 0.060900001525878905, + "Acc.bulletin board": 0.3365000152587891, + "Acc.shower": 0.040500001907348634, + "Acc.radiator": 0.5436999893188477, + "Acc.glass": 0.04090000152587891, + "Acc.clock": 0.19639999389648438, + "Acc.flag": 0.2642000007629395 + } + }, + "127": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8157, + "mIoU": 0.4409, + "mAcc": 0.5401, + "IoU.wall": 0.7518000030517578, + "IoU.building": 0.8187000274658203, + "IoU.sky": 0.9362999725341797, + "IoU.floor": 0.7954000091552734, + "IoU.tree": 0.7301999664306641, + "IoU.ceiling": 0.8245999908447266, + "IoU.road": 0.8123000335693359, + "IoU.bed ": 0.8591999816894531, + "IoU.windowpane": 0.6040999984741211, + "IoU.grass": 0.6802999877929687, + "IoU.cabinet": 0.5838999938964844, + "IoU.sidewalk": 0.6306999969482422, + "IoU.person": 0.7715000152587891, + "IoU.earth": 0.35200000762939454, + "IoU.door": 0.43650001525878906, + "IoU.table": 0.5650999832153321, + "IoU.mountain": 0.5886000061035156, + "IoU.plant": 0.5088000106811523, + "IoU.curtain": 0.7258000183105469, + "IoU.chair": 0.49740001678466794, + "IoU.car": 0.8083999633789063, + "IoU.water": 0.4890999984741211, + "IoU.painting": 0.6644000244140625, + "IoU.sofa": 0.605999984741211, + "IoU.shelf": 0.4325, + "IoU.house": 0.5034000015258789, + "IoU.sea": 0.5227999877929688, + "IoU.mirror": 0.6336999893188476, + "IoU.rug": 0.6306999969482422, + "IoU.field": 0.27170000076293943, + "IoU.armchair": 0.3634000015258789, + "IoU.seat": 0.6040000152587891, + "IoU.fence": 0.2827000045776367, + "IoU.desk": 0.44849998474121094, + "IoU.rock": 0.4816999816894531, + "IoU.wardrobe": 0.5206000137329102, + "IoU.lamp": 0.5106999969482422, + "IoU.bathtub": 0.8011000061035156, + "IoU.railing": 0.30489999771118165, + "IoU.cushion": 0.5061000061035156, + "IoU.base": 0.3047999954223633, + "IoU.box": 0.20690000534057618, + "IoU.column": 0.445, + "IoU.signboard": 0.33880001068115234, + "IoU.chest of drawers": 0.33529998779296877, + "IoU.counter": 0.33029998779296876, + "IoU.sand": 0.39169998168945314, + "IoU.sink": 0.6433999633789063, + "IoU.skyscraper": 0.6372000122070313, + "IoU.fireplace": 0.6748999786376954, + "IoU.refrigerator": 0.6969999694824218, + "IoU.grandstand": 0.36200000762939455, + "IoU.path": 0.2347999954223633, + "IoU.stairs": 0.16180000305175782, + "IoU.runway": 0.6129000091552734, + "IoU.case": 0.48029998779296873, + "IoU.pool table": 0.9115000152587891, + "IoU.pillow": 0.47779998779296873, + "IoU.screen door": 0.6769999694824219, + "IoU.stairway": 0.23909999847412108, + "IoU.river": 0.15560000419616699, + "IoU.bridge": 0.5720999908447265, + "IoU.bookcase": 0.34560001373291016, + "IoU.blind": 0.37029998779296874, + "IoU.coffee table": 0.5845000076293946, + "IoU.toilet": 0.8155000305175781, + "IoU.flower": 0.3192000007629395, + "IoU.book": 0.4022999954223633, + "IoU.hill": 0.06230000019073487, + "IoU.bench": 0.3938999938964844, + "IoU.countertop": 0.5122999954223633, + "IoU.stove": 0.6691999816894532, + "IoU.palm": 0.44790000915527345, + "IoU.kitchen island": 0.35520000457763673, + "IoU.computer": 0.5934999847412109, + "IoU.swivel chair": 0.35080001831054686, + "IoU.boat": 0.6316999816894531, + "IoU.bar": 0.4006999969482422, + "IoU.arcade machine": 0.31549999237060544, + "IoU.hovel": 0.4640999984741211, + "IoU.bus": 0.8291999816894531, + "IoU.towel": 0.5445000076293945, + "IoU.light": 0.25760000228881835, + "IoU.truck": 0.15600000381469725, + "IoU.tower": 0.27399999618530274, + "IoU.chandelier": 0.5666999816894531, + "IoU.awning": 0.22149999618530272, + "IoU.streetlight": 0.14079999923706055, + "IoU.booth": 0.3209000015258789, + "IoU.television receiver": 0.6261000061035156, + "IoU.airplane": 0.5563999938964844, + "IoU.dirt track": 0.21760000228881837, + "IoU.apparel": 0.35220001220703123, + "IoU.pole": 0.155, + "IoU.land": 0.06150000095367432, + "IoU.bannister": 0.03859999895095825, + "IoU.escalator": 0.3345999908447266, + "IoU.ottoman": 0.400099983215332, + "IoU.bottle": 0.1484000015258789, + "IoU.buffet": 0.38119998931884763, + "IoU.poster": 0.22209999084472656, + "IoU.stage": 0.17969999313354493, + "IoU.van": 0.3290999984741211, + "IoU.ship": 0.7763999938964844, + "IoU.fountain": 0.025099999904632568, + "IoU.conveyer belt": 0.7008999633789063, + "IoU.canopy": 0.2986000061035156, + "IoU.washer": 0.664000015258789, + "IoU.plaything": 0.21690000534057619, + "IoU.swimming pool": 0.5802000045776368, + "IoU.stool": 0.26110000610351564, + "IoU.barrel": 0.577400016784668, + "IoU.basket": 0.14359999656677247, + "IoU.waterfall": 0.5202000045776367, + "IoU.tent": 0.9029000091552735, + "IoU.bag": 0.08699999809265137, + "IoU.minibike": 0.6704000091552734, + "IoU.cradle": 0.7762000274658203, + "IoU.oven": 0.276299991607666, + "IoU.ball": 0.5018999862670899, + "IoU.food": 0.5229999923706055, + "IoU.step": 0.09520000457763672, + "IoU.tank": 0.5297000122070312, + "IoU.trade name": 0.23860000610351562, + "IoU.microwave": 0.3315999984741211, + "IoU.pot": 0.35200000762939454, + "IoU.animal": 0.5063000106811524, + "IoU.bicycle": 0.5377999877929688, + "IoU.lake": 0.1752000045776367, + "IoU.dishwasher": 0.4513999938964844, + "IoU.screen": 0.6375999832153321, + "IoU.blanket": 0.08539999961853027, + "IoU.sculpture": 0.5929000091552734, + "IoU.hood": 0.49470001220703125, + "IoU.sconce": 0.18770000457763672, + "IoU.vase": 0.24760000228881837, + "IoU.traffic light": 0.2031999969482422, + "IoU.tray": 0.02259999990463257, + "IoU.ashcan": 0.37689998626708987, + "IoU.fan": 0.36709999084472655, + "IoU.pier": 0.40599998474121096, + "IoU.crt screen": 0.003499999940395355, + "IoU.plate": 0.4490000152587891, + "IoU.monitor": 0.09350000381469727, + "IoU.bulletin board": 0.1815999984741211, + "IoU.shower": 0.029600000381469725, + "IoU.radiator": 0.4884000015258789, + "IoU.glass": 0.02569999933242798, + "IoU.clock": 0.17979999542236327, + "IoU.flag": 0.23739999771118164, + "Acc.wall": 0.8944999694824218, + "Acc.building": 0.9133999633789063, + "Acc.sky": 0.9748999786376953, + "Acc.floor": 0.9104000091552734, + "Acc.tree": 0.8706999969482422, + "Acc.ceiling": 0.9241999816894532, + "Acc.road": 0.8869000244140625, + "Acc.bed ": 0.9458999633789062, + "Acc.windowpane": 0.7530999755859376, + "Acc.grass": 0.8262999725341796, + "Acc.cabinet": 0.7044999694824219, + "Acc.sidewalk": 0.7873999786376953, + "Acc.person": 0.9008999633789062, + "Acc.earth": 0.49220001220703125, + "Acc.door": 0.580900001525879, + "Acc.table": 0.7123999786376953, + "Acc.mountain": 0.7390000152587891, + "Acc.plant": 0.6444999694824218, + "Acc.curtain": 0.8327999877929687, + "Acc.chair": 0.6177000045776367, + "Acc.car": 0.9127999877929688, + "Acc.water": 0.6494999694824218, + "Acc.painting": 0.8045999908447266, + "Acc.sofa": 0.8394999694824219, + "Acc.shelf": 0.648499984741211, + "Acc.house": 0.6601000213623047, + "Acc.sea": 0.7556999969482422, + "Acc.mirror": 0.6963999938964843, + "Acc.rug": 0.6837999725341797, + "Acc.field": 0.4393000030517578, + "Acc.armchair": 0.5020000076293946, + "Acc.seat": 0.7811000061035156, + "Acc.fence": 0.38150001525878907, + "Acc.desk": 0.6504000091552734, + "Acc.rock": 0.6138999938964844, + "Acc.wardrobe": 0.6494000244140625, + "Acc.lamp": 0.6297999954223633, + "Acc.bathtub": 0.8933999633789063, + "Acc.railing": 0.445099983215332, + "Acc.cushion": 0.6491999816894531, + "Acc.base": 0.4868000030517578, + "Acc.box": 0.2770999908447266, + "Acc.column": 0.5447999954223632, + "Acc.signboard": 0.42029998779296873, + "Acc.chest of drawers": 0.5761000061035156, + "Acc.counter": 0.49020000457763674, + "Acc.sand": 0.5545000076293946, + "Acc.sink": 0.7383000183105469, + "Acc.skyscraper": 0.7519000244140625, + "Acc.fireplace": 0.8319000244140625, + "Acc.refrigerator": 0.7951000213623047, + "Acc.grandstand": 0.7694000244140625, + "Acc.path": 0.3584000015258789, + "Acc.stairs": 0.2159000015258789, + "Acc.runway": 0.7755000305175781, + "Acc.case": 0.6683999633789063, + "Acc.pool table": 0.9579000091552734, + "Acc.pillow": 0.543499984741211, + "Acc.screen door": 0.7743000030517578, + "Acc.stairway": 0.34119998931884765, + "Acc.river": 0.3240999984741211, + "Acc.bridge": 0.6725, + "Acc.bookcase": 0.5404000091552734, + "Acc.blind": 0.4061000061035156, + "Acc.coffee table": 0.7494999694824219, + "Acc.toilet": 0.865, + "Acc.flower": 0.43220001220703125, + "Acc.book": 0.5433000183105469, + "Acc.hill": 0.08729999542236327, + "Acc.bench": 0.4747999954223633, + "Acc.countertop": 0.6483999633789063, + "Acc.stove": 0.7555000305175781, + "Acc.palm": 0.6361999893188477, + "Acc.kitchen island": 0.5522999954223633, + "Acc.computer": 0.6990000152587891, + "Acc.swivel chair": 0.42540000915527343, + "Acc.boat": 0.715, + "Acc.bar": 0.46540000915527346, + "Acc.arcade machine": 0.35939998626708985, + "Acc.hovel": 0.49200000762939455, + "Acc.bus": 0.8944000244140625, + "Acc.towel": 0.7011000061035156, + "Acc.light": 0.2784000015258789, + "Acc.truck": 0.21079999923706055, + "Acc.tower": 0.36270000457763674, + "Acc.chandelier": 0.6858000183105468, + "Acc.awning": 0.24989999771118165, + "Acc.streetlight": 0.15630000114440917, + "Acc.booth": 0.41319999694824217, + "Acc.television receiver": 0.7106999969482422, + "Acc.airplane": 0.6345000076293945, + "Acc.dirt track": 0.21879999160766603, + "Acc.apparel": 0.5070000076293946, + "Acc.pole": 0.1859000015258789, + "Acc.land": 0.08449999809265137, + "Acc.bannister": 0.046900000572204587, + "Acc.escalator": 0.3718000030517578, + "Acc.ottoman": 0.539099998474121, + "Acc.bottle": 0.17120000839233399, + "Acc.buffet": 0.4209999847412109, + "Acc.poster": 0.40490001678466797, + "Acc.stage": 0.2775, + "Acc.van": 0.4179000091552734, + "Acc.ship": 0.9066999816894531, + "Acc.fountain": 0.025499999523162842, + "Acc.conveyer belt": 0.8801000213623047, + "Acc.canopy": 0.344900016784668, + "Acc.washer": 0.6805000305175781, + "Acc.plaything": 0.2929999923706055, + "Acc.swimming pool": 0.7758999633789062, + "Acc.stool": 0.29870000839233396, + "Acc.barrel": 0.6041999816894531, + "Acc.basket": 0.18319999694824218, + "Acc.waterfall": 0.585900001525879, + "Acc.tent": 0.9852999877929688, + "Acc.bag": 0.10289999961853027, + "Acc.minibike": 0.7602999877929687, + "Acc.cradle": 0.9569999694824218, + "Acc.oven": 0.4381999969482422, + "Acc.ball": 0.589900016784668, + "Acc.food": 0.6256999969482422, + "Acc.step": 0.1140999984741211, + "Acc.tank": 0.5952999877929688, + "Acc.trade name": 0.2606999969482422, + "Acc.microwave": 0.36209999084472655, + "Acc.pot": 0.3940999984741211, + "Acc.animal": 0.5325, + "Acc.bicycle": 0.6580000305175782, + "Acc.lake": 0.19100000381469726, + "Acc.dishwasher": 0.5572999954223633, + "Acc.screen": 0.8644000244140625, + "Acc.blanket": 0.08880000114440918, + "Acc.sculpture": 0.6977999877929687, + "Acc.hood": 0.5606000137329101, + "Acc.sconce": 0.23299999237060548, + "Acc.vase": 0.3159000015258789, + "Acc.traffic light": 0.27010000228881836, + "Acc.tray": 0.027300000190734863, + "Acc.ashcan": 0.4672000122070312, + "Acc.fan": 0.41119998931884766, + "Acc.pier": 0.5652000045776367, + "Acc.crt screen": 0.01, + "Acc.plate": 0.5484000015258789, + "Acc.monitor": 0.15319999694824218, + "Acc.bulletin board": 0.20989999771118165, + "Acc.shower": 0.035499999523162844, + "Acc.radiator": 0.5302000045776367, + "Acc.glass": 0.026600000858306886, + "Acc.clock": 0.18770000457763672, + "Acc.flag": 0.2560000038146973 + } + }, + "128": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.813, + "mIoU": 0.42869999999999997, + "mAcc": 0.5212, + "IoU.wall": 0.7505000305175781, + "IoU.building": 0.813499984741211, + "IoU.sky": 0.93, + "IoU.floor": 0.7847000122070312, + "IoU.tree": 0.7180999755859375, + "IoU.ceiling": 0.8204000091552734, + "IoU.road": 0.8070999908447266, + "IoU.bed ": 0.8531999969482422, + "IoU.windowpane": 0.5963000106811523, + "IoU.grass": 0.6619999694824219, + "IoU.cabinet": 0.5897000122070313, + "IoU.sidewalk": 0.6254000091552734, + "IoU.person": 0.7726999664306641, + "IoU.earth": 0.35259998321533204, + "IoU.door": 0.42069999694824217, + "IoU.table": 0.5425, + "IoU.mountain": 0.5818000030517578, + "IoU.plant": 0.5068999862670899, + "IoU.curtain": 0.7222000122070312, + "IoU.chair": 0.48189998626708985, + "IoU.car": 0.8059999847412109, + "IoU.water": 0.5331000137329102, + "IoU.painting": 0.6655000305175781, + "IoU.sofa": 0.5952000045776367, + "IoU.shelf": 0.4315999984741211, + "IoU.house": 0.500999984741211, + "IoU.sea": 0.5736000061035156, + "IoU.mirror": 0.609900016784668, + "IoU.rug": 0.617599983215332, + "IoU.field": 0.26530000686645505, + "IoU.armchair": 0.29739999771118164, + "IoU.seat": 0.6031999969482422, + "IoU.fence": 0.3220000076293945, + "IoU.desk": 0.44290000915527344, + "IoU.rock": 0.4711000061035156, + "IoU.wardrobe": 0.5241999816894531, + "IoU.lamp": 0.5031999969482421, + "IoU.bathtub": 0.7844000244140625, + "IoU.railing": 0.3363999938964844, + "IoU.cushion": 0.5054000091552734, + "IoU.base": 0.2571999931335449, + "IoU.box": 0.205, + "IoU.column": 0.42689998626708986, + "IoU.signboard": 0.31829999923706054, + "IoU.chest of drawers": 0.3384000015258789, + "IoU.counter": 0.3388999938964844, + "IoU.sand": 0.44299999237060544, + "IoU.sink": 0.6356000137329102, + "IoU.skyscraper": 0.617599983215332, + "IoU.fireplace": 0.6758999633789062, + "IoU.refrigerator": 0.6997000122070313, + "IoU.grandstand": 0.37799999237060544, + "IoU.path": 0.24979999542236328, + "IoU.stairs": 0.11539999961853027, + "IoU.runway": 0.6040000152587891, + "IoU.case": 0.4493000030517578, + "IoU.pool table": 0.915, + "IoU.pillow": 0.4266999816894531, + "IoU.screen door": 0.6805999755859375, + "IoU.stairway": 0.21959999084472656, + "IoU.river": 0.13350000381469726, + "IoU.bridge": 0.5693000030517578, + "IoU.bookcase": 0.3196999931335449, + "IoU.blind": 0.3202000045776367, + "IoU.coffee table": 0.5656999969482421, + "IoU.toilet": 0.8130000305175781, + "IoU.flower": 0.31979999542236326, + "IoU.book": 0.38159999847412107, + "IoU.hill": 0.04099999904632568, + "IoU.bench": 0.42790000915527343, + "IoU.countertop": 0.48189998626708985, + "IoU.stove": 0.6488999938964843, + "IoU.palm": 0.40439998626708984, + "IoU.kitchen island": 0.3686000061035156, + "IoU.computer": 0.5950999832153321, + "IoU.swivel chair": 0.35959999084472655, + "IoU.boat": 0.5425, + "IoU.bar": 0.39939998626708983, + "IoU.arcade machine": 0.32299999237060545, + "IoU.hovel": 0.3275, + "IoU.bus": 0.8156999969482421, + "IoU.towel": 0.5347999954223632, + "IoU.light": 0.21770000457763672, + "IoU.truck": 0.1386999988555908, + "IoU.tower": 0.17690000534057618, + "IoU.chandelier": 0.5856999969482422, + "IoU.awning": 0.19760000228881835, + "IoU.streetlight": 0.13579999923706054, + "IoU.booth": 0.36009998321533204, + "IoU.television receiver": 0.6366999816894531, + "IoU.airplane": 0.566500015258789, + "IoU.dirt track": 0.031700000762939454, + "IoU.apparel": 0.31709999084472656, + "IoU.pole": 0.12039999961853028, + "IoU.land": 0.07170000076293945, + "IoU.bannister": 0.04440000057220459, + "IoU.escalator": 0.1803000068664551, + "IoU.ottoman": 0.3436000061035156, + "IoU.bottle": 0.12460000038146973, + "IoU.buffet": 0.42770000457763674, + "IoU.poster": 0.21520000457763672, + "IoU.stage": 0.1284000015258789, + "IoU.van": 0.3477999877929687, + "IoU.ship": 0.7655999755859375, + "IoU.fountain": 0.030199999809265136, + "IoU.conveyer belt": 0.6338999938964843, + "IoU.canopy": 0.22010000228881835, + "IoU.washer": 0.6551000213623047, + "IoU.plaything": 0.20440000534057617, + "IoU.swimming pool": 0.5836000061035156, + "IoU.stool": 0.27719999313354493, + "IoU.barrel": 0.5716999816894531, + "IoU.basket": 0.17030000686645508, + "IoU.waterfall": 0.5045999908447265, + "IoU.tent": 0.9018000030517578, + "IoU.bag": 0.11380000114440918, + "IoU.minibike": 0.49040000915527343, + "IoU.cradle": 0.7747000122070312, + "IoU.oven": 0.18299999237060546, + "IoU.ball": 0.500999984741211, + "IoU.food": 0.41580001831054686, + "IoU.step": 0.11140000343322753, + "IoU.tank": 0.47970001220703123, + "IoU.trade name": 0.18670000076293947, + "IoU.microwave": 0.344900016784668, + "IoU.pot": 0.33119998931884764, + "IoU.animal": 0.513499984741211, + "IoU.bicycle": 0.442599983215332, + "IoU.lake": 0.24290000915527343, + "IoU.dishwasher": 0.42470001220703124, + "IoU.screen": 0.6329000091552734, + "IoU.blanket": 0.07539999961853028, + "IoU.sculpture": 0.5204999923706055, + "IoU.hood": 0.44150001525878907, + "IoU.sconce": 0.16110000610351563, + "IoU.vase": 0.24420000076293946, + "IoU.traffic light": 0.21149999618530274, + "IoU.tray": 0.006600000262260437, + "IoU.ashcan": 0.313700008392334, + "IoU.fan": 0.37509998321533206, + "IoU.pier": 0.5986000061035156, + "IoU.crt screen": 0.0, + "IoU.plate": 0.41080001831054686, + "IoU.monitor": 0.22360000610351563, + "IoU.bulletin board": 0.17040000915527342, + "IoU.shower": 0.027599999904632567, + "IoU.radiator": 0.5243999862670898, + "IoU.glass": 0.051100001335144044, + "IoU.clock": 0.213700008392334, + "IoU.flag": 0.24360000610351562, + "Acc.wall": 0.8909999847412109, + "Acc.building": 0.9298999786376954, + "Acc.sky": 0.9811000061035157, + "Acc.floor": 0.919000015258789, + "Acc.tree": 0.8312000274658203, + "Acc.ceiling": 0.9327999877929688, + "Acc.road": 0.8881999969482421, + "Acc.bed ": 0.9458999633789062, + "Acc.windowpane": 0.7805000305175781, + "Acc.grass": 0.8208000183105468, + "Acc.cabinet": 0.7286000061035156, + "Acc.sidewalk": 0.7919999694824219, + "Acc.person": 0.8762000274658203, + "Acc.earth": 0.5129999923706055, + "Acc.door": 0.5308000183105469, + "Acc.table": 0.7219999694824218, + "Acc.mountain": 0.7423999786376954, + "Acc.plant": 0.6122999954223632, + "Acc.curtain": 0.8380000305175781, + "Acc.chair": 0.5761999893188476, + "Acc.car": 0.8876000213623046, + "Acc.water": 0.7125, + "Acc.painting": 0.7880000305175782, + "Acc.sofa": 0.8502999877929688, + "Acc.shelf": 0.6418000030517578, + "Acc.house": 0.635099983215332, + "Acc.sea": 0.7975, + "Acc.mirror": 0.6694000244140625, + "Acc.rug": 0.680199966430664, + "Acc.field": 0.4286999893188477, + "Acc.armchair": 0.37, + "Acc.seat": 0.787300033569336, + "Acc.fence": 0.40849998474121096, + "Acc.desk": 0.6154999923706055, + "Acc.rock": 0.6013000106811524, + "Acc.wardrobe": 0.6743000030517579, + "Acc.lamp": 0.59, + "Acc.bathtub": 0.8443000030517578, + "Acc.railing": 0.47470001220703123, + "Acc.cushion": 0.6377999877929688, + "Acc.base": 0.3381999969482422, + "Acc.box": 0.24639999389648437, + "Acc.column": 0.5136000061035156, + "Acc.signboard": 0.40970001220703123, + "Acc.chest of drawers": 0.5561999893188476, + "Acc.counter": 0.4995000076293945, + "Acc.sand": 0.5359999847412109, + "Acc.sink": 0.7105999755859375, + "Acc.skyscraper": 0.6841999816894532, + "Acc.fireplace": 0.7919000244140625, + "Acc.refrigerator": 0.7526000213623046, + "Acc.grandstand": 0.7587999725341796, + "Acc.path": 0.3516999816894531, + "Acc.stairs": 0.14640000343322754, + "Acc.runway": 0.7847000122070312, + "Acc.case": 0.6619999694824219, + "Acc.pool table": 0.9561000061035156, + "Acc.pillow": 0.4818000030517578, + "Acc.screen door": 0.7551000213623047, + "Acc.stairway": 0.3420999908447266, + "Acc.river": 0.2475, + "Acc.bridge": 0.6970999908447265, + "Acc.bookcase": 0.48069999694824217, + "Acc.blind": 0.3443000030517578, + "Acc.coffee table": 0.7169999694824218, + "Acc.toilet": 0.8568000030517579, + "Acc.flower": 0.447599983215332, + "Acc.book": 0.5479999923706055, + "Acc.hill": 0.05920000076293945, + "Acc.bench": 0.4886000061035156, + "Acc.countertop": 0.640199966430664, + "Acc.stove": 0.7077999877929687, + "Acc.palm": 0.514900016784668, + "Acc.kitchen island": 0.6095999908447266, + "Acc.computer": 0.689800033569336, + "Acc.swivel chair": 0.4384000015258789, + "Acc.boat": 0.6234999847412109, + "Acc.bar": 0.4904999923706055, + "Acc.arcade machine": 0.37, + "Acc.hovel": 0.3429000091552734, + "Acc.bus": 0.9162999725341797, + "Acc.towel": 0.7027999877929687, + "Acc.light": 0.23639999389648436, + "Acc.truck": 0.17219999313354492, + "Acc.tower": 0.22209999084472656, + "Acc.chandelier": 0.7297000122070313, + "Acc.awning": 0.21170000076293946, + "Acc.streetlight": 0.15319999694824218, + "Acc.booth": 0.41400001525878904, + "Acc.television receiver": 0.6715000152587891, + "Acc.airplane": 0.6227999877929687, + "Acc.dirt track": 0.03190000057220459, + "Acc.apparel": 0.4154999923706055, + "Acc.pole": 0.13640000343322753, + "Acc.land": 0.08989999771118164, + "Acc.bannister": 0.06480000019073487, + "Acc.escalator": 0.1859000015258789, + "Acc.ottoman": 0.45430000305175783, + "Acc.bottle": 0.13930000305175783, + "Acc.buffet": 0.5079999923706054, + "Acc.poster": 0.3413999938964844, + "Acc.stage": 0.20379999160766601, + "Acc.van": 0.40169998168945314, + "Acc.ship": 0.9091000366210937, + "Acc.fountain": 0.03069999933242798, + "Acc.conveyer belt": 0.8743000030517578, + "Acc.canopy": 0.24870000839233397, + "Acc.washer": 0.6780999755859375, + "Acc.plaything": 0.28399999618530275, + "Acc.swimming pool": 0.7537000274658203, + "Acc.stool": 0.3134000015258789, + "Acc.barrel": 0.609900016784668, + "Acc.basket": 0.2102000045776367, + "Acc.waterfall": 0.5836999893188477, + "Acc.tent": 0.9802999877929688, + "Acc.bag": 0.1356999969482422, + "Acc.minibike": 0.5147999954223633, + "Acc.cradle": 0.9445999908447266, + "Acc.oven": 0.3283000183105469, + "Acc.ball": 0.6479000091552735, + "Acc.food": 0.4709000015258789, + "Acc.step": 0.13210000038146974, + "Acc.tank": 0.5465000152587891, + "Acc.trade name": 0.19870000839233398, + "Acc.microwave": 0.37540000915527344, + "Acc.pot": 0.35450000762939454, + "Acc.animal": 0.5395999908447265, + "Acc.bicycle": 0.5961999893188477, + "Acc.lake": 0.25989999771118166, + "Acc.dishwasher": 0.512400016784668, + "Acc.screen": 0.8737999725341797, + "Acc.blanket": 0.07920000076293945, + "Acc.sculpture": 0.5558000183105469, + "Acc.hood": 0.45619998931884764, + "Acc.sconce": 0.18799999237060547, + "Acc.vase": 0.3072999954223633, + "Acc.traffic light": 0.2715999984741211, + "Acc.tray": 0.007799999713897705, + "Acc.ashcan": 0.3977000045776367, + "Acc.fan": 0.41650001525878905, + "Acc.pier": 0.7711000061035156, + "Acc.crt screen": 0.0, + "Acc.plate": 0.4936000061035156, + "Acc.monitor": 0.38849998474121095, + "Acc.bulletin board": 0.2175, + "Acc.shower": 0.03789999961853027, + "Acc.radiator": 0.5811999893188476, + "Acc.glass": 0.05449999809265137, + "Acc.clock": 0.22739999771118163, + "Acc.flag": 0.2635000038146973 + } + }, + "129": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8131999999999999, + "mIoU": 0.4288, + "mAcc": 0.5355, + "IoU.wall": 0.755, + "IoU.building": 0.814800033569336, + "IoU.sky": 0.9375, + "IoU.floor": 0.7916999816894531, + "IoU.tree": 0.7344000244140625, + "IoU.ceiling": 0.8255000305175781, + "IoU.road": 0.8179000091552734, + "IoU.bed ": 0.8601999664306641, + "IoU.windowpane": 0.5961000061035157, + "IoU.grass": 0.6418000030517578, + "IoU.cabinet": 0.5888999938964844, + "IoU.sidewalk": 0.6238000106811523, + "IoU.person": 0.7633999633789063, + "IoU.earth": 0.3325, + "IoU.door": 0.42939998626708986, + "IoU.table": 0.5425, + "IoU.mountain": 0.5847999954223633, + "IoU.plant": 0.5072999954223633, + "IoU.curtain": 0.7094000244140625, + "IoU.chair": 0.4986999893188477, + "IoU.car": 0.7973999786376953, + "IoU.water": 0.5025, + "IoU.painting": 0.6579000091552735, + "IoU.sofa": 0.5943999862670899, + "IoU.shelf": 0.4443000030517578, + "IoU.house": 0.47259998321533203, + "IoU.sea": 0.5875, + "IoU.mirror": 0.643499984741211, + "IoU.rug": 0.6272999954223633, + "IoU.field": 0.295, + "IoU.armchair": 0.3647999954223633, + "IoU.seat": 0.5984000015258789, + "IoU.fence": 0.3234000015258789, + "IoU.desk": 0.40490001678466797, + "IoU.rock": 0.4386999893188477, + "IoU.wardrobe": 0.47580001831054686, + "IoU.lamp": 0.48900001525878906, + "IoU.bathtub": 0.7330000305175781, + "IoU.railing": 0.3118000030517578, + "IoU.cushion": 0.4972999954223633, + "IoU.base": 0.23180000305175782, + "IoU.box": 0.18780000686645507, + "IoU.column": 0.4215999984741211, + "IoU.signboard": 0.3295999908447266, + "IoU.chest of drawers": 0.3395999908447266, + "IoU.counter": 0.28350000381469725, + "IoU.sand": 0.3666999816894531, + "IoU.sink": 0.6595999908447265, + "IoU.skyscraper": 0.6331999969482421, + "IoU.fireplace": 0.6491000366210937, + "IoU.refrigerator": 0.7169000244140625, + "IoU.grandstand": 0.4336999893188477, + "IoU.path": 0.24870000839233397, + "IoU.stairs": 0.2539999961853027, + "IoU.runway": 0.6737000274658204, + "IoU.case": 0.5177000045776368, + "IoU.pool table": 0.9191000366210937, + "IoU.pillow": 0.5281999969482422, + "IoU.screen door": 0.5652000045776367, + "IoU.stairway": 0.3240000152587891, + "IoU.river": 0.3097999954223633, + "IoU.bridge": 0.5691999816894531, + "IoU.bookcase": 0.3313999938964844, + "IoU.blind": 0.37599998474121094, + "IoU.coffee table": 0.5268999862670899, + "IoU.toilet": 0.7862000274658203, + "IoU.flower": 0.32560001373291014, + "IoU.book": 0.4127999877929687, + "IoU.hill": 0.055, + "IoU.bench": 0.40049999237060546, + "IoU.countertop": 0.5170999908447266, + "IoU.stove": 0.635099983215332, + "IoU.palm": 0.41310001373291017, + "IoU.kitchen island": 0.3259000015258789, + "IoU.computer": 0.5393000030517578, + "IoU.swivel chair": 0.3422999954223633, + "IoU.boat": 0.61, + "IoU.bar": 0.47189998626708984, + "IoU.arcade machine": 0.32599998474121095, + "IoU.hovel": 0.3527000045776367, + "IoU.bus": 0.7944000244140625, + "IoU.towel": 0.5084999847412109, + "IoU.light": 0.22360000610351563, + "IoU.truck": 0.1621999931335449, + "IoU.tower": 0.40900001525878904, + "IoU.chandelier": 0.5361000061035156, + "IoU.awning": 0.21579999923706056, + "IoU.streetlight": 0.10960000038146972, + "IoU.booth": 0.40330001831054685, + "IoU.television receiver": 0.6190999984741211, + "IoU.airplane": 0.5677000045776367, + "IoU.dirt track": 0.16639999389648438, + "IoU.apparel": 0.3345000076293945, + "IoU.pole": 0.18979999542236328, + "IoU.land": 0.057899999618530276, + "IoU.bannister": 0.030899999141693116, + "IoU.escalator": 0.22059999465942381, + "IoU.ottoman": 0.43599998474121093, + "IoU.bottle": 0.15069999694824218, + "IoU.buffet": 0.5175999832153321, + "IoU.poster": 0.24569999694824218, + "IoU.stage": 0.18479999542236328, + "IoU.van": 0.3007999992370605, + "IoU.ship": 0.8079000091552735, + "IoU.fountain": 0.20979999542236327, + "IoU.conveyer belt": 0.5954999923706055, + "IoU.canopy": 0.18790000915527344, + "IoU.washer": 0.6713999938964844, + "IoU.plaything": 0.2338999938964844, + "IoU.swimming pool": 0.5870000076293945, + "IoU.stool": 0.1934000015258789, + "IoU.barrel": 0.22739999771118163, + "IoU.basket": 0.13649999618530273, + "IoU.waterfall": 0.5122999954223633, + "IoU.tent": 0.9076000213623047, + "IoU.bag": 0.08149999618530274, + "IoU.minibike": 0.601500015258789, + "IoU.cradle": 0.7051999664306641, + "IoU.oven": 0.13329999923706054, + "IoU.ball": 0.45860000610351564, + "IoU.food": 0.4175, + "IoU.step": 0.08340000152587891, + "IoU.tank": 0.47380001068115235, + "IoU.trade name": 0.2093000030517578, + "IoU.microwave": 0.3081999969482422, + "IoU.pot": 0.3363999938964844, + "IoU.animal": 0.5086999893188476, + "IoU.bicycle": 0.49650001525878906, + "IoU.lake": 0.001599999964237213, + "IoU.dishwasher": 0.5043999862670898, + "IoU.screen": 0.6081999969482422, + "IoU.blanket": 0.10420000076293945, + "IoU.sculpture": 0.46189998626708983, + "IoU.hood": 0.4247999954223633, + "IoU.sconce": 0.23209999084472657, + "IoU.vase": 0.2347999954223633, + "IoU.traffic light": 0.16930000305175782, + "IoU.tray": 0.010099999904632569, + "IoU.ashcan": 0.2759000015258789, + "IoU.fan": 0.4025, + "IoU.pier": 0.2118000030517578, + "IoU.crt screen": 0.009100000262260438, + "IoU.plate": 0.34650001525878904, + "IoU.monitor": 0.03609999895095825, + "IoU.bulletin board": 0.32049999237060545, + "IoU.shower": 0.0007999999821186065, + "IoU.radiator": 0.49779998779296875, + "IoU.glass": 0.020799999237060548, + "IoU.clock": 0.17399999618530274, + "IoU.flag": 0.2775, + "Acc.wall": 0.8813999938964844, + "Acc.building": 0.915199966430664, + "Acc.sky": 0.9755000305175782, + "Acc.floor": 0.905199966430664, + "Acc.tree": 0.8630000305175781, + "Acc.ceiling": 0.9254000091552734, + "Acc.road": 0.8933999633789063, + "Acc.bed ": 0.9480999755859375, + "Acc.windowpane": 0.7633999633789063, + "Acc.grass": 0.770199966430664, + "Acc.cabinet": 0.7120999908447265, + "Acc.sidewalk": 0.7805000305175781, + "Acc.person": 0.9063999938964844, + "Acc.earth": 0.472400016784668, + "Acc.door": 0.5863999938964843, + "Acc.table": 0.7027999877929687, + "Acc.mountain": 0.7344000244140625, + "Acc.plant": 0.6555999755859375, + "Acc.curtain": 0.8223000335693359, + "Acc.chair": 0.6515000152587891, + "Acc.car": 0.9126000213623047, + "Acc.water": 0.6545999908447265, + "Acc.painting": 0.8312999725341796, + "Acc.sofa": 0.7913999938964844, + "Acc.shelf": 0.6602999877929687, + "Acc.house": 0.6680000305175782, + "Acc.sea": 0.8930000305175781, + "Acc.mirror": 0.716500015258789, + "Acc.rug": 0.691500015258789, + "Acc.field": 0.5147000122070312, + "Acc.armchair": 0.5263000106811524, + "Acc.seat": 0.7659999847412109, + "Acc.fence": 0.45810001373291015, + "Acc.desk": 0.5843999862670899, + "Acc.rock": 0.6754000091552734, + "Acc.wardrobe": 0.580999984741211, + "Acc.lamp": 0.5950999832153321, + "Acc.bathtub": 0.7991000366210937, + "Acc.railing": 0.48400001525878905, + "Acc.cushion": 0.637400016784668, + "Acc.base": 0.4259000015258789, + "Acc.box": 0.2518000030517578, + "Acc.column": 0.5331999969482422, + "Acc.signboard": 0.43590000152587893, + "Acc.chest of drawers": 0.4961000061035156, + "Acc.counter": 0.39310001373291015, + "Acc.sand": 0.4911000061035156, + "Acc.sink": 0.7566000366210938, + "Acc.skyscraper": 0.7613999938964844, + "Acc.fireplace": 0.8308999633789063, + "Acc.refrigerator": 0.8336000061035156, + "Acc.grandstand": 0.7261000061035157, + "Acc.path": 0.34099998474121096, + "Acc.stairs": 0.31209999084472656, + "Acc.runway": 0.8734999847412109, + "Acc.case": 0.7363999938964844, + "Acc.pool table": 0.9587000274658203, + "Acc.pillow": 0.6213999938964844, + "Acc.screen door": 0.6233000183105468, + "Acc.stairway": 0.40290000915527346, + "Acc.river": 0.47209999084472654, + "Acc.bridge": 0.8361000061035156, + "Acc.bookcase": 0.505099983215332, + "Acc.blind": 0.4197999954223633, + "Acc.coffee table": 0.6786000061035157, + "Acc.toilet": 0.8743000030517578, + "Acc.flower": 0.43590000152587893, + "Acc.book": 0.5656999969482421, + "Acc.hill": 0.09369999885559083, + "Acc.bench": 0.469900016784668, + "Acc.countertop": 0.6408999633789062, + "Acc.stove": 0.7172000122070312, + "Acc.palm": 0.562599983215332, + "Acc.kitchen island": 0.5479000091552735, + "Acc.computer": 0.657300033569336, + "Acc.swivel chair": 0.4281999969482422, + "Acc.boat": 0.8180000305175781, + "Acc.bar": 0.5820999908447265, + "Acc.arcade machine": 0.35560001373291017, + "Acc.hovel": 0.40330001831054685, + "Acc.bus": 0.8855999755859375, + "Acc.towel": 0.6531999969482422, + "Acc.light": 0.23370000839233399, + "Acc.truck": 0.24, + "Acc.tower": 0.5590999984741211, + "Acc.chandelier": 0.6583000183105469, + "Acc.awning": 0.26110000610351564, + "Acc.streetlight": 0.11939999580383301, + "Acc.booth": 0.45470001220703127, + "Acc.television receiver": 0.6962000274658203, + "Acc.airplane": 0.6425, + "Acc.dirt track": 0.19780000686645507, + "Acc.apparel": 0.48970001220703124, + "Acc.pole": 0.25290000915527344, + "Acc.land": 0.0705999994277954, + "Acc.bannister": 0.037100000381469725, + "Acc.escalator": 0.2347999954223633, + "Acc.ottoman": 0.5729000091552734, + "Acc.bottle": 0.17479999542236327, + "Acc.buffet": 0.563499984741211, + "Acc.poster": 0.3883000183105469, + "Acc.stage": 0.32290000915527345, + "Acc.van": 0.3397000122070313, + "Acc.ship": 0.9225, + "Acc.fountain": 0.21379999160766602, + "Acc.conveyer belt": 0.8588999938964844, + "Acc.canopy": 0.21790000915527344, + "Acc.washer": 0.7019000244140625, + "Acc.plaything": 0.3525, + "Acc.swimming pool": 0.8269999694824218, + "Acc.stool": 0.21870000839233397, + "Acc.barrel": 0.6430999755859375, + "Acc.basket": 0.17090000152587892, + "Acc.waterfall": 0.5502999877929687, + "Acc.tent": 0.9905000305175782, + "Acc.bag": 0.08970000267028809, + "Acc.minibike": 0.6891000366210938, + "Acc.cradle": 0.8648999786376953, + "Acc.oven": 0.31510000228881835, + "Acc.ball": 0.5195999908447265, + "Acc.food": 0.49200000762939455, + "Acc.step": 0.11260000228881836, + "Acc.tank": 0.5129000091552735, + "Acc.trade name": 0.23200000762939454, + "Acc.microwave": 0.3472999954223633, + "Acc.pot": 0.365, + "Acc.animal": 0.5409999847412109, + "Acc.bicycle": 0.6619999694824219, + "Acc.lake": 0.001599999964237213, + "Acc.dishwasher": 0.5983000183105469, + "Acc.screen": 0.9048000335693359, + "Acc.blanket": 0.11020000457763672, + "Acc.sculpture": 0.6438999938964843, + "Acc.hood": 0.45439998626708983, + "Acc.sconce": 0.29079999923706057, + "Acc.vase": 0.295, + "Acc.traffic light": 0.20180000305175783, + "Acc.tray": 0.013700000047683715, + "Acc.ashcan": 0.3477999877929687, + "Acc.fan": 0.45169998168945313, + "Acc.pier": 0.46880001068115235, + "Acc.crt screen": 0.02369999885559082, + "Acc.plate": 0.40700000762939453, + "Acc.monitor": 0.04929999828338623, + "Acc.bulletin board": 0.3743000030517578, + "Acc.shower": 0.004600000083446503, + "Acc.radiator": 0.545099983215332, + "Acc.glass": 0.021300001144409178, + "Acc.clock": 0.18600000381469728, + "Acc.flag": 0.2978000068664551 + } + }, + "130": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8158, + "mIoU": 0.43450000000000005, + "mAcc": 0.536, + "IoU.wall": 0.7547000122070312, + "IoU.building": 0.8219000244140625, + "IoU.sky": 0.9372000122070312, + "IoU.floor": 0.7897000122070312, + "IoU.tree": 0.7306999969482422, + "IoU.ceiling": 0.8266000366210937, + "IoU.road": 0.8186000061035156, + "IoU.bed ": 0.8662000274658204, + "IoU.windowpane": 0.5958000183105469, + "IoU.grass": 0.6455000305175781, + "IoU.cabinet": 0.6047000122070313, + "IoU.sidewalk": 0.6224000167846679, + "IoU.person": 0.7645999908447265, + "IoU.earth": 0.35700000762939454, + "IoU.door": 0.43259998321533205, + "IoU.table": 0.530099983215332, + "IoU.mountain": 0.5702000045776368, + "IoU.plant": 0.5209999847412109, + "IoU.curtain": 0.7154000091552735, + "IoU.chair": 0.48200000762939454, + "IoU.car": 0.7909999847412109, + "IoU.water": 0.5209999847412109, + "IoU.painting": 0.6802999877929687, + "IoU.sofa": 0.6079999923706054, + "IoU.shelf": 0.43529998779296875, + "IoU.house": 0.49970001220703125, + "IoU.sea": 0.5956000137329102, + "IoU.mirror": 0.6447000122070312, + "IoU.rug": 0.5956000137329102, + "IoU.field": 0.29239999771118164, + "IoU.armchair": 0.36369998931884767, + "IoU.seat": 0.597599983215332, + "IoU.fence": 0.34959999084472654, + "IoU.desk": 0.4047999954223633, + "IoU.rock": 0.4134000015258789, + "IoU.wardrobe": 0.51, + "IoU.lamp": 0.5027999877929688, + "IoU.bathtub": 0.7455999755859375, + "IoU.railing": 0.3138999938964844, + "IoU.cushion": 0.514900016784668, + "IoU.base": 0.2325, + "IoU.box": 0.1972999954223633, + "IoU.column": 0.42029998779296873, + "IoU.signboard": 0.3315999984741211, + "IoU.chest of drawers": 0.3875, + "IoU.counter": 0.3096999931335449, + "IoU.sand": 0.3547999954223633, + "IoU.sink": 0.6404000091552734, + "IoU.skyscraper": 0.6072000122070312, + "IoU.fireplace": 0.7001999664306641, + "IoU.refrigerator": 0.7030999755859375, + "IoU.grandstand": 0.43759998321533206, + "IoU.path": 0.268799991607666, + "IoU.stairs": 0.25579999923706054, + "IoU.runway": 0.6658000183105469, + "IoU.case": 0.5359000015258789, + "IoU.pool table": 0.9231999969482422, + "IoU.pillow": 0.4906999969482422, + "IoU.screen door": 0.5993999862670898, + "IoU.stairway": 0.32479999542236326, + "IoU.river": 0.3313999938964844, + "IoU.bridge": 0.63, + "IoU.bookcase": 0.3725, + "IoU.blind": 0.349900016784668, + "IoU.coffee table": 0.5231999969482422, + "IoU.toilet": 0.7601000213623047, + "IoU.flower": 0.32290000915527345, + "IoU.book": 0.39939998626708983, + "IoU.hill": 0.055, + "IoU.bench": 0.3686000061035156, + "IoU.countertop": 0.539099998474121, + "IoU.stove": 0.601599998474121, + "IoU.palm": 0.4468999862670898, + "IoU.kitchen island": 0.29079999923706057, + "IoU.computer": 0.5725, + "IoU.swivel chair": 0.31959999084472657, + "IoU.boat": 0.7079000091552734, + "IoU.bar": 0.49700000762939456, + "IoU.arcade machine": 0.2752000045776367, + "IoU.hovel": 0.4284999847412109, + "IoU.bus": 0.8166999816894531, + "IoU.towel": 0.5133000183105468, + "IoU.light": 0.258700008392334, + "IoU.truck": 0.13100000381469726, + "IoU.tower": 0.3802000045776367, + "IoU.chandelier": 0.5404999923706054, + "IoU.awning": 0.22309999465942382, + "IoU.streetlight": 0.11979999542236328, + "IoU.booth": 0.3820000076293945, + "IoU.television receiver": 0.6313000106811524, + "IoU.airplane": 0.5068000030517578, + "IoU.dirt track": 0.24860000610351562, + "IoU.apparel": 0.3565999984741211, + "IoU.pole": 0.15989999771118163, + "IoU.land": 0.176200008392334, + "IoU.bannister": 0.041500000953674315, + "IoU.escalator": 0.29079999923706057, + "IoU.ottoman": 0.41220001220703123, + "IoU.bottle": 0.2621999931335449, + "IoU.buffet": 0.46650001525878904, + "IoU.poster": 0.20100000381469726, + "IoU.stage": 0.13649999618530273, + "IoU.van": 0.32860000610351564, + "IoU.ship": 0.5213999938964844, + "IoU.fountain": 0.16760000228881836, + "IoU.conveyer belt": 0.6773999786376953, + "IoU.canopy": 0.21520000457763672, + "IoU.washer": 0.6609999847412109, + "IoU.plaything": 0.24770000457763672, + "IoU.swimming pool": 0.6586000061035157, + "IoU.stool": 0.15800000190734864, + "IoU.barrel": 0.47720001220703123, + "IoU.basket": 0.15460000038146973, + "IoU.waterfall": 0.5183000183105468, + "IoU.tent": 0.9081999969482422, + "IoU.bag": 0.06920000076293946, + "IoU.minibike": 0.5684000015258789, + "IoU.cradle": 0.765199966430664, + "IoU.oven": 0.1706999969482422, + "IoU.ball": 0.37560001373291013, + "IoU.food": 0.4129999923706055, + "IoU.step": 0.1256999969482422, + "IoU.tank": 0.5072000122070313, + "IoU.trade name": 0.22459999084472657, + "IoU.microwave": 0.2904000091552734, + "IoU.pot": 0.3306999969482422, + "IoU.animal": 0.5086999893188476, + "IoU.bicycle": 0.507400016784668, + "IoU.lake": 0.003100000023841858, + "IoU.dishwasher": 0.4365999984741211, + "IoU.screen": 0.65, + "IoU.blanket": 0.059499998092651364, + "IoU.sculpture": 0.4468999862670898, + "IoU.hood": 0.4368000030517578, + "IoU.sconce": 0.2118000030517578, + "IoU.vase": 0.24479999542236328, + "IoU.traffic light": 0.19680000305175782, + "IoU.tray": 0.019800000190734864, + "IoU.ashcan": 0.288799991607666, + "IoU.fan": 0.4704000091552734, + "IoU.pier": 0.3320000076293945, + "IoU.crt screen": 0.0375, + "IoU.plate": 0.432400016784668, + "IoU.monitor": 0.016100000143051147, + "IoU.bulletin board": 0.15619999885559083, + "IoU.shower": 0.003199999928474426, + "IoU.radiator": 0.5177999877929688, + "IoU.glass": 0.03700000047683716, + "IoU.clock": 0.17260000228881836, + "IoU.flag": 0.24629999160766602, + "Acc.wall": 0.8911000061035156, + "Acc.building": 0.9148000335693359, + "Acc.sky": 0.975, + "Acc.floor": 0.9127999877929688, + "Acc.tree": 0.8806999969482422, + "Acc.ceiling": 0.9291999816894532, + "Acc.road": 0.8956999969482422, + "Acc.bed ": 0.9445999908447266, + "Acc.windowpane": 0.7526999664306641, + "Acc.grass": 0.7837000274658203, + "Acc.cabinet": 0.7241999816894531, + "Acc.sidewalk": 0.7793000030517578, + "Acc.person": 0.9058000183105469, + "Acc.earth": 0.5018000030517578, + "Acc.door": 0.5663000106811523, + "Acc.table": 0.685, + "Acc.mountain": 0.7623999786376953, + "Acc.plant": 0.6486000061035156, + "Acc.curtain": 0.8112000274658203, + "Acc.chair": 0.6179000091552734, + "Acc.car": 0.8904000091552734, + "Acc.water": 0.67, + "Acc.painting": 0.8320999908447265, + "Acc.sofa": 0.8352999877929688, + "Acc.shelf": 0.6695999908447265, + "Acc.house": 0.6570999908447266, + "Acc.sea": 0.8933999633789063, + "Acc.mirror": 0.7122000122070312, + "Acc.rug": 0.6704000091552734, + "Acc.field": 0.5004000091552734, + "Acc.armchair": 0.5177000045776368, + "Acc.seat": 0.7625, + "Acc.fence": 0.47639999389648435, + "Acc.desk": 0.5663999938964843, + "Acc.rock": 0.5420999908447266, + "Acc.wardrobe": 0.6045000076293945, + "Acc.lamp": 0.615, + "Acc.bathtub": 0.8186000061035156, + "Acc.railing": 0.4722999954223633, + "Acc.cushion": 0.6656999969482422, + "Acc.base": 0.33860000610351565, + "Acc.box": 0.2514999961853027, + "Acc.column": 0.5345999908447265, + "Acc.signboard": 0.42759998321533205, + "Acc.chest of drawers": 0.504000015258789, + "Acc.counter": 0.4236000061035156, + "Acc.sand": 0.5188000106811523, + "Acc.sink": 0.7529000091552734, + "Acc.skyscraper": 0.7344000244140625, + "Acc.fireplace": 0.8351000213623047, + "Acc.refrigerator": 0.8448999786376953, + "Acc.grandstand": 0.7233999633789062, + "Acc.path": 0.35200000762939454, + "Acc.stairs": 0.3160000038146973, + "Acc.runway": 0.799000015258789, + "Acc.case": 0.7111000061035156, + "Acc.pool table": 0.9580999755859375, + "Acc.pillow": 0.5483000183105469, + "Acc.screen door": 0.7145999908447266, + "Acc.stairway": 0.42279998779296873, + "Acc.river": 0.5318999862670899, + "Acc.bridge": 0.7651000213623047, + "Acc.bookcase": 0.5683000183105469, + "Acc.blind": 0.395, + "Acc.coffee table": 0.6956999969482421, + "Acc.toilet": 0.8705999755859375, + "Acc.flower": 0.4475, + "Acc.book": 0.534000015258789, + "Acc.hill": 0.0734000015258789, + "Acc.bench": 0.4306999969482422, + "Acc.countertop": 0.6554000091552734, + "Acc.stove": 0.6934999847412109, + "Acc.palm": 0.5902000045776368, + "Acc.kitchen island": 0.5045999908447265, + "Acc.computer": 0.672300033569336, + "Acc.swivel chair": 0.3920999908447266, + "Acc.boat": 0.8320999908447265, + "Acc.bar": 0.6202000045776367, + "Acc.arcade machine": 0.37540000915527344, + "Acc.hovel": 0.47560001373291017, + "Acc.bus": 0.8830000305175781, + "Acc.towel": 0.6737000274658204, + "Acc.light": 0.27899999618530275, + "Acc.truck": 0.178700008392334, + "Acc.tower": 0.5266999816894531, + "Acc.chandelier": 0.6473999786376953, + "Acc.awning": 0.25299999237060544, + "Acc.streetlight": 0.1331999969482422, + "Acc.booth": 0.4247999954223633, + "Acc.television receiver": 0.7277999877929687, + "Acc.airplane": 0.6393000030517578, + "Acc.dirt track": 0.288700008392334, + "Acc.apparel": 0.49689998626708987, + "Acc.pole": 0.205, + "Acc.land": 0.22809999465942382, + "Acc.bannister": 0.047699999809265134, + "Acc.escalator": 0.33310001373291015, + "Acc.ottoman": 0.522400016784668, + "Acc.bottle": 0.33279998779296877, + "Acc.buffet": 0.52, + "Acc.poster": 0.3934000015258789, + "Acc.stage": 0.2804000091552734, + "Acc.van": 0.37990001678466795, + "Acc.ship": 0.524000015258789, + "Acc.fountain": 0.1697999954223633, + "Acc.conveyer belt": 0.865, + "Acc.canopy": 0.23209999084472657, + "Acc.washer": 0.6768000030517578, + "Acc.plaything": 0.34580001831054685, + "Acc.swimming pool": 0.8069000244140625, + "Acc.stool": 0.17909999847412109, + "Acc.barrel": 0.6333000183105468, + "Acc.basket": 0.19180000305175782, + "Acc.waterfall": 0.5652999877929688, + "Acc.tent": 0.9831999969482422, + "Acc.bag": 0.07599999904632568, + "Acc.minibike": 0.695, + "Acc.cradle": 0.9612999725341796, + "Acc.oven": 0.4052000045776367, + "Acc.ball": 0.4672999954223633, + "Acc.food": 0.49080001831054687, + "Acc.step": 0.15109999656677245, + "Acc.tank": 0.5508000183105469, + "Acc.trade name": 0.2571999931335449, + "Acc.microwave": 0.3238999938964844, + "Acc.pot": 0.36459999084472655, + "Acc.animal": 0.5327000045776367, + "Acc.bicycle": 0.6791000366210938, + "Acc.lake": 0.003199999928474426, + "Acc.dishwasher": 0.5875, + "Acc.screen": 0.87, + "Acc.blanket": 0.06059999942779541, + "Acc.sculpture": 0.6011999893188477, + "Acc.hood": 0.5158000183105469, + "Acc.sconce": 0.26290000915527345, + "Acc.vase": 0.33470001220703127, + "Acc.traffic light": 0.26260000228881836, + "Acc.tray": 0.02490000009536743, + "Acc.ashcan": 0.3591999816894531, + "Acc.fan": 0.5554999923706054, + "Acc.pier": 0.46830001831054685, + "Acc.crt screen": 0.10069999694824219, + "Acc.plate": 0.5225999832153321, + "Acc.monitor": 0.024100000858306884, + "Acc.bulletin board": 0.17659999847412108, + "Acc.shower": 0.012000000476837159, + "Acc.radiator": 0.5611000061035156, + "Acc.glass": 0.03809999942779541, + "Acc.clock": 0.18139999389648437, + "Acc.flag": 0.26110000610351564 + } + }, + "131": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8128, + "mIoU": 0.42200000000000004, + "mAcc": 0.5151, + "IoU.wall": 0.7519000244140625, + "IoU.building": 0.8155999755859376, + "IoU.sky": 0.9341000366210938, + "IoU.floor": 0.7858000183105469, + "IoU.tree": 0.721500015258789, + "IoU.ceiling": 0.8198999786376953, + "IoU.road": 0.8125, + "IoU.bed ": 0.8655000305175782, + "IoU.windowpane": 0.5952999877929688, + "IoU.grass": 0.6365999984741211, + "IoU.cabinet": 0.5977000045776367, + "IoU.sidewalk": 0.6308000183105469, + "IoU.person": 0.7715000152587891, + "IoU.earth": 0.3668000030517578, + "IoU.door": 0.41709999084472654, + "IoU.table": 0.5265000152587891, + "IoU.mountain": 0.5727000045776367, + "IoU.plant": 0.5165999984741211, + "IoU.curtain": 0.7062999725341796, + "IoU.chair": 0.48020000457763673, + "IoU.car": 0.7905000305175781, + "IoU.water": 0.5377000045776367, + "IoU.painting": 0.6777999877929688, + "IoU.sofa": 0.5958000183105469, + "IoU.shelf": 0.44009998321533206, + "IoU.house": 0.49529998779296874, + "IoU.sea": 0.5706999969482421, + "IoU.mirror": 0.6311000061035156, + "IoU.rug": 0.6097000122070313, + "IoU.field": 0.27930000305175784, + "IoU.armchair": 0.32599998474121095, + "IoU.seat": 0.5920000076293945, + "IoU.fence": 0.283700008392334, + "IoU.desk": 0.39880001068115234, + "IoU.rock": 0.41470001220703123, + "IoU.wardrobe": 0.5113999938964844, + "IoU.lamp": 0.4997999954223633, + "IoU.bathtub": 0.7469999694824219, + "IoU.railing": 0.3090999984741211, + "IoU.cushion": 0.519000015258789, + "IoU.base": 0.19510000228881835, + "IoU.box": 0.20049999237060548, + "IoU.column": 0.41869998931884767, + "IoU.signboard": 0.3265000152587891, + "IoU.chest of drawers": 0.3452999877929688, + "IoU.counter": 0.2954000091552734, + "IoU.sand": 0.39299999237060546, + "IoU.sink": 0.645199966430664, + "IoU.skyscraper": 0.5961999893188477, + "IoU.fireplace": 0.7043000030517578, + "IoU.refrigerator": 0.7355999755859375, + "IoU.grandstand": 0.4293000030517578, + "IoU.path": 0.21540000915527344, + "IoU.stairs": 0.2034000015258789, + "IoU.runway": 0.6197000122070313, + "IoU.case": 0.4988999938964844, + "IoU.pool table": 0.9195999908447265, + "IoU.pillow": 0.4911999893188477, + "IoU.screen door": 0.6405999755859375, + "IoU.stairway": 0.29350000381469726, + "IoU.river": 0.21829999923706056, + "IoU.bridge": 0.6090999984741211, + "IoU.bookcase": 0.3438999938964844, + "IoU.blind": 0.31989999771118166, + "IoU.coffee table": 0.5225, + "IoU.toilet": 0.7566000366210938, + "IoU.flower": 0.3159000015258789, + "IoU.book": 0.3972999954223633, + "IoU.hill": 0.05119999885559082, + "IoU.bench": 0.3747999954223633, + "IoU.countertop": 0.5206000137329102, + "IoU.stove": 0.597599983215332, + "IoU.palm": 0.39240001678466796, + "IoU.kitchen island": 0.30370000839233396, + "IoU.computer": 0.6165999984741211, + "IoU.swivel chair": 0.30940000534057616, + "IoU.boat": 0.6716000366210938, + "IoU.bar": 0.4734999847412109, + "IoU.arcade machine": 0.29870000839233396, + "IoU.hovel": 0.2879999923706055, + "IoU.bus": 0.7929000091552735, + "IoU.towel": 0.5195000076293945, + "IoU.light": 0.20190000534057617, + "IoU.truck": 0.13300000190734862, + "IoU.tower": 0.22420000076293944, + "IoU.chandelier": 0.5620999908447266, + "IoU.awning": 0.19809999465942382, + "IoU.streetlight": 0.1422000026702881, + "IoU.booth": 0.3893000030517578, + "IoU.television receiver": 0.6311999893188477, + "IoU.airplane": 0.5565000152587891, + "IoU.dirt track": 0.09010000228881836, + "IoU.apparel": 0.3277000045776367, + "IoU.pole": 0.181200008392334, + "IoU.land": 0.07780000209808349, + "IoU.bannister": 0.04820000171661377, + "IoU.escalator": 0.16229999542236329, + "IoU.ottoman": 0.40330001831054685, + "IoU.bottle": 0.28530000686645507, + "IoU.buffet": 0.44709999084472657, + "IoU.poster": 0.18860000610351563, + "IoU.stage": 0.138100004196167, + "IoU.van": 0.3188999938964844, + "IoU.ship": 0.5970999908447265, + "IoU.fountain": 0.15460000038146973, + "IoU.conveyer belt": 0.6245999908447266, + "IoU.canopy": 0.22120000839233397, + "IoU.washer": 0.5927999877929687, + "IoU.plaything": 0.23690000534057618, + "IoU.swimming pool": 0.6402999877929687, + "IoU.stool": 0.1625, + "IoU.barrel": 0.5427000045776367, + "IoU.basket": 0.1475, + "IoU.waterfall": 0.5225999832153321, + "IoU.tent": 0.9016999816894531, + "IoU.bag": 0.08680000305175781, + "IoU.minibike": 0.2959000015258789, + "IoU.cradle": 0.7527999877929688, + "IoU.oven": 0.11880000114440918, + "IoU.ball": 0.3959000015258789, + "IoU.food": 0.3491999816894531, + "IoU.step": 0.12640000343322755, + "IoU.tank": 0.4584000015258789, + "IoU.trade name": 0.19450000762939454, + "IoU.microwave": 0.2982999992370605, + "IoU.pot": 0.32930000305175783, + "IoU.animal": 0.5191999816894531, + "IoU.bicycle": 0.4509000015258789, + "IoU.lake": 0.04860000133514404, + "IoU.dishwasher": 0.455099983215332, + "IoU.screen": 0.6668000030517578, + "IoU.blanket": 0.07869999885559081, + "IoU.sculpture": 0.4545999908447266, + "IoU.hood": 0.43349998474121093, + "IoU.sconce": 0.1990999984741211, + "IoU.vase": 0.23940000534057618, + "IoU.traffic light": 0.21239999771118165, + "IoU.tray": 0.0025999999046325685, + "IoU.ashcan": 0.29139999389648436, + "IoU.fan": 0.4079000091552734, + "IoU.pier": 0.4140999984741211, + "IoU.crt screen": 0.0, + "IoU.plate": 0.37099998474121093, + "IoU.monitor": 0.020099999904632567, + "IoU.bulletin board": 0.14890000343322754, + "IoU.shower": 0.0010999999940395355, + "IoU.radiator": 0.5177000045776368, + "IoU.glass": 0.05510000228881836, + "IoU.clock": 0.18129999160766602, + "IoU.flag": 0.25760000228881835, + "Acc.wall": 0.8869999694824219, + "Acc.building": 0.9281999969482422, + "Acc.sky": 0.9795999908447266, + "Acc.floor": 0.9169999694824219, + "Acc.tree": 0.8411000061035157, + "Acc.ceiling": 0.9395999908447266, + "Acc.road": 0.8933000183105468, + "Acc.bed ": 0.944800033569336, + "Acc.windowpane": 0.7765000152587891, + "Acc.grass": 0.79, + "Acc.cabinet": 0.7358999633789063, + "Acc.sidewalk": 0.795199966430664, + "Acc.person": 0.885199966430664, + "Acc.earth": 0.5209999847412109, + "Acc.door": 0.5336000061035157, + "Acc.table": 0.7137999725341797, + "Acc.mountain": 0.7566000366210938, + "Acc.plant": 0.617599983215332, + "Acc.curtain": 0.8258999633789063, + "Acc.chair": 0.6, + "Acc.car": 0.8838999938964843, + "Acc.water": 0.6991999816894531, + "Acc.painting": 0.8216000366210937, + "Acc.sofa": 0.8268000030517578, + "Acc.shelf": 0.6469000244140625, + "Acc.house": 0.6402999877929687, + "Acc.sea": 0.8348999786376953, + "Acc.mirror": 0.6958000183105468, + "Acc.rug": 0.705999984741211, + "Acc.field": 0.4931000137329102, + "Acc.armchair": 0.4127999877929687, + "Acc.seat": 0.7873999786376953, + "Acc.fence": 0.37959999084472656, + "Acc.desk": 0.5566999816894531, + "Acc.rock": 0.5443999862670899, + "Acc.wardrobe": 0.6206000137329102, + "Acc.lamp": 0.585900001525879, + "Acc.bathtub": 0.7945999908447265, + "Acc.railing": 0.4577000045776367, + "Acc.cushion": 0.669000015258789, + "Acc.base": 0.25760000228881835, + "Acc.box": 0.25190000534057616, + "Acc.column": 0.5270000076293946, + "Acc.signboard": 0.43779998779296875, + "Acc.chest of drawers": 0.5020000076293946, + "Acc.counter": 0.4090999984741211, + "Acc.sand": 0.48639999389648436, + "Acc.sink": 0.7313999938964844, + "Acc.skyscraper": 0.6881999969482422, + "Acc.fireplace": 0.8048000335693359, + "Acc.refrigerator": 0.7966999816894531, + "Acc.grandstand": 0.7319999694824219, + "Acc.path": 0.27860000610351565, + "Acc.stairs": 0.24889999389648437, + "Acc.runway": 0.8072000122070313, + "Acc.case": 0.6815000152587891, + "Acc.pool table": 0.9586000061035156, + "Acc.pillow": 0.5577000045776367, + "Acc.screen door": 0.7055999755859375, + "Acc.stairway": 0.4102999877929687, + "Acc.river": 0.3370000076293945, + "Acc.bridge": 0.7616999816894531, + "Acc.bookcase": 0.5013000106811524, + "Acc.blind": 0.34650001525878904, + "Acc.coffee table": 0.6768000030517578, + "Acc.toilet": 0.8851000213623047, + "Acc.flower": 0.48650001525878905, + "Acc.book": 0.5643000030517578, + "Acc.hill": 0.08010000228881836, + "Acc.bench": 0.4215999984741211, + "Acc.countertop": 0.649000015258789, + "Acc.stove": 0.6616000366210938, + "Acc.palm": 0.48069999694824217, + "Acc.kitchen island": 0.552599983215332, + "Acc.computer": 0.7269000244140625, + "Acc.swivel chair": 0.36369998931884767, + "Acc.boat": 0.7781999969482422, + "Acc.bar": 0.6040000152587891, + "Acc.arcade machine": 0.36650001525878906, + "Acc.hovel": 0.3125, + "Acc.bus": 0.8847000122070312, + "Acc.towel": 0.6912000274658203, + "Acc.light": 0.21450000762939453, + "Acc.truck": 0.16309999465942382, + "Acc.tower": 0.2855999946594238, + "Acc.chandelier": 0.695, + "Acc.awning": 0.2181999969482422, + "Acc.streetlight": 0.1584000015258789, + "Acc.booth": 0.43020000457763674, + "Acc.television receiver": 0.6794999694824219, + "Acc.airplane": 0.6279999923706054, + "Acc.dirt track": 0.09289999961853028, + "Acc.apparel": 0.4588999938964844, + "Acc.pole": 0.2322999954223633, + "Acc.land": 0.10699999809265137, + "Acc.bannister": 0.058699998855590824, + "Acc.escalator": 0.17090000152587892, + "Acc.ottoman": 0.5068999862670899, + "Acc.bottle": 0.36470001220703124, + "Acc.buffet": 0.520099983215332, + "Acc.poster": 0.33169998168945314, + "Acc.stage": 0.19739999771118164, + "Acc.van": 0.35889999389648436, + "Acc.ship": 0.6231999969482422, + "Acc.fountain": 0.15689999580383301, + "Acc.conveyer belt": 0.879000015258789, + "Acc.canopy": 0.26790000915527346, + "Acc.washer": 0.6880999755859375, + "Acc.plaything": 0.34639999389648435, + "Acc.swimming pool": 0.7523999786376954, + "Acc.stool": 0.1815999984741211, + "Acc.barrel": 0.6275, + "Acc.basket": 0.18, + "Acc.waterfall": 0.5802000045776368, + "Acc.tent": 0.9745999908447266, + "Acc.bag": 0.09359999656677247, + "Acc.minibike": 0.3103000068664551, + "Acc.cradle": 0.9547000122070313, + "Acc.oven": 0.24719999313354493, + "Acc.ball": 0.504900016784668, + "Acc.food": 0.3920999908447266, + "Acc.step": 0.1413000011444092, + "Acc.tank": 0.5145999908447265, + "Acc.trade name": 0.206200008392334, + "Acc.microwave": 0.3325, + "Acc.pot": 0.3554999923706055, + "Acc.animal": 0.5413000106811523, + "Acc.bicycle": 0.6616999816894531, + "Acc.lake": 0.0603000020980835, + "Acc.dishwasher": 0.5890000152587891, + "Acc.screen": 0.8237999725341797, + "Acc.blanket": 0.08140000343322754, + "Acc.sculpture": 0.5399000167846679, + "Acc.hood": 0.46, + "Acc.sconce": 0.24149999618530274, + "Acc.vase": 0.327599983215332, + "Acc.traffic light": 0.2944000053405762, + "Acc.tray": 0.0033000001311302186, + "Acc.ashcan": 0.3672999954223633, + "Acc.fan": 0.45860000610351564, + "Acc.pier": 0.5486000061035157, + "Acc.crt screen": 0.0, + "Acc.plate": 0.41720001220703123, + "Acc.monitor": 0.030499999523162843, + "Acc.bulletin board": 0.18450000762939453, + "Acc.shower": 0.0036000001430511477, + "Acc.radiator": 0.572400016784668, + "Acc.glass": 0.05829999923706055, + "Acc.clock": 0.19209999084472656, + "Acc.flag": 0.2734000015258789 + } + }, + "132": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8125, + "mIoU": 0.4165, + "mAcc": 0.5115999999999999, + "IoU.wall": 0.7530000305175781, + "IoU.building": 0.8112999725341797, + "IoU.sky": 0.9362999725341797, + "IoU.floor": 0.7926000213623047, + "IoU.tree": 0.7244000244140625, + "IoU.ceiling": 0.8245999908447266, + "IoU.road": 0.8197000122070313, + "IoU.bed ": 0.8595999908447266, + "IoU.windowpane": 0.5977000045776367, + "IoU.grass": 0.6631999969482422, + "IoU.cabinet": 0.5890999984741211, + "IoU.sidewalk": 0.6279000091552734, + "IoU.person": 0.7676000213623047, + "IoU.earth": 0.35470001220703123, + "IoU.door": 0.4293000030517578, + "IoU.table": 0.5456999969482422, + "IoU.mountain": 0.5870000076293945, + "IoU.plant": 0.5109999847412109, + "IoU.curtain": 0.721500015258789, + "IoU.chair": 0.4931999969482422, + "IoU.car": 0.8045999908447266, + "IoU.water": 0.5106000137329102, + "IoU.painting": 0.6718000030517578, + "IoU.sofa": 0.5952999877929688, + "IoU.shelf": 0.4118000030517578, + "IoU.house": 0.41619998931884766, + "IoU.sea": 0.5443999862670899, + "IoU.mirror": 0.6254999923706055, + "IoU.rug": 0.614900016784668, + "IoU.field": 0.3, + "IoU.armchair": 0.3793999862670898, + "IoU.seat": 0.6004999923706055, + "IoU.fence": 0.3679999923706055, + "IoU.desk": 0.4281000137329102, + "IoU.rock": 0.3759000015258789, + "IoU.wardrobe": 0.46119998931884765, + "IoU.lamp": 0.510099983215332, + "IoU.bathtub": 0.7437999725341797, + "IoU.railing": 0.31090000152587893, + "IoU.cushion": 0.5045000076293945, + "IoU.base": 0.21940000534057616, + "IoU.box": 0.20329999923706055, + "IoU.column": 0.4383000183105469, + "IoU.signboard": 0.3135000038146973, + "IoU.chest of drawers": 0.3338999938964844, + "IoU.counter": 0.2825, + "IoU.sand": 0.38529998779296876, + "IoU.sink": 0.6124000167846679, + "IoU.skyscraper": 0.5229000091552735, + "IoU.fireplace": 0.6848000335693359, + "IoU.refrigerator": 0.7358000183105469, + "IoU.grandstand": 0.37759998321533206, + "IoU.path": 0.24639999389648437, + "IoU.stairs": 0.2695000076293945, + "IoU.runway": 0.6602999877929687, + "IoU.case": 0.5140999984741211, + "IoU.pool table": 0.908499984741211, + "IoU.pillow": 0.4752999877929687, + "IoU.screen door": 0.505, + "IoU.stairway": 0.2961000061035156, + "IoU.river": 0.18920000076293944, + "IoU.bridge": 0.5959000015258789, + "IoU.bookcase": 0.34130001068115234, + "IoU.blind": 0.2720999908447266, + "IoU.coffee table": 0.5683000183105469, + "IoU.toilet": 0.745, + "IoU.flower": 0.31489999771118166, + "IoU.book": 0.3686999893188477, + "IoU.hill": 0.060799999237060545, + "IoU.bench": 0.45610000610351564, + "IoU.countertop": 0.5658000183105468, + "IoU.stove": 0.6263999938964844, + "IoU.palm": 0.4259999847412109, + "IoU.kitchen island": 0.30290000915527343, + "IoU.computer": 0.5477000045776367, + "IoU.swivel chair": 0.4006999969482422, + "IoU.boat": 0.5829000091552734, + "IoU.bar": 0.48279998779296873, + "IoU.arcade machine": 0.33360000610351564, + "IoU.hovel": 0.18700000762939453, + "IoU.bus": 0.7762999725341797, + "IoU.towel": 0.5281999969482422, + "IoU.light": 0.141899995803833, + "IoU.truck": 0.05940000057220459, + "IoU.tower": 0.28739999771118163, + "IoU.chandelier": 0.576500015258789, + "IoU.awning": 0.21649999618530275, + "IoU.streetlight": 0.08109999656677246, + "IoU.booth": 0.43290000915527344, + "IoU.television receiver": 0.5831999969482422, + "IoU.airplane": 0.5536999893188477, + "IoU.dirt track": 0.23139999389648438, + "IoU.apparel": 0.3086000061035156, + "IoU.pole": 0.18860000610351563, + "IoU.land": 0.045, + "IoU.bannister": 0.05119999885559082, + "IoU.escalator": 0.3018000030517578, + "IoU.ottoman": 0.45810001373291015, + "IoU.bottle": 0.07449999809265137, + "IoU.buffet": 0.29850000381469727, + "IoU.poster": 0.2402000045776367, + "IoU.stage": 0.11579999923706055, + "IoU.van": 0.2930999946594238, + "IoU.ship": 0.6102999877929688, + "IoU.fountain": 0.03150000095367431, + "IoU.conveyer belt": 0.48709999084472655, + "IoU.canopy": 0.15710000038146973, + "IoU.washer": 0.6359999847412109, + "IoU.plaything": 0.2610000038146973, + "IoU.swimming pool": 0.6705000305175781, + "IoU.stool": 0.118100004196167, + "IoU.barrel": 0.38529998779296876, + "IoU.basket": 0.16239999771118163, + "IoU.waterfall": 0.5383000183105469, + "IoU.tent": 0.9266000366210938, + "IoU.bag": 0.06710000038146972, + "IoU.minibike": 0.5915999984741211, + "IoU.cradle": 0.7922000122070313, + "IoU.oven": 0.16600000381469726, + "IoU.ball": 0.3425, + "IoU.food": 0.5343000030517578, + "IoU.step": 0.14710000038146973, + "IoU.tank": 0.40099998474121096, + "IoU.trade name": 0.15729999542236328, + "IoU.microwave": 0.3188999938964844, + "IoU.pot": 0.2811000061035156, + "IoU.animal": 0.49630001068115237, + "IoU.bicycle": 0.4931000137329102, + "IoU.lake": 0.0, + "IoU.dishwasher": 0.3370000076293945, + "IoU.screen": 0.5881000137329102, + "IoU.blanket": 0.07969999790191651, + "IoU.sculpture": 0.46099998474121096, + "IoU.hood": 0.35770000457763673, + "IoU.sconce": 0.19149999618530272, + "IoU.vase": 0.25729999542236326, + "IoU.traffic light": 0.15850000381469725, + "IoU.tray": 0.01759999990463257, + "IoU.ashcan": 0.24450000762939453, + "IoU.fan": 0.40430000305175784, + "IoU.pier": 0.3345000076293945, + "IoU.crt screen": 0.02430000066757202, + "IoU.plate": 0.4272999954223633, + "IoU.monitor": 0.030899999141693116, + "IoU.bulletin board": 0.12699999809265136, + "IoU.shower": 0.003199999928474426, + "IoU.radiator": 0.4559000015258789, + "IoU.glass": 0.035, + "IoU.clock": 0.123100004196167, + "IoU.flag": 0.2593000030517578, + "Acc.wall": 0.8905000305175781, + "Acc.building": 0.9230999755859375, + "Acc.sky": 0.9762000274658204, + "Acc.floor": 0.9148000335693359, + "Acc.tree": 0.8663999938964844, + "Acc.ceiling": 0.9216999816894531, + "Acc.road": 0.8909999847412109, + "Acc.bed ": 0.9402999877929688, + "Acc.windowpane": 0.7681999969482421, + "Acc.grass": 0.8063999938964844, + "Acc.cabinet": 0.7169000244140625, + "Acc.sidewalk": 0.7991000366210937, + "Acc.person": 0.907300033569336, + "Acc.earth": 0.4854000091552734, + "Acc.door": 0.5652999877929688, + "Acc.table": 0.7030999755859375, + "Acc.mountain": 0.7609999847412109, + "Acc.plant": 0.6211999893188477, + "Acc.curtain": 0.8319999694824218, + "Acc.chair": 0.6313999938964844, + "Acc.car": 0.9012999725341797, + "Acc.water": 0.678499984741211, + "Acc.painting": 0.8358000183105468, + "Acc.sofa": 0.7838999938964843, + "Acc.shelf": 0.6352999877929687, + "Acc.house": 0.5622000122070312, + "Acc.sea": 0.8202999877929688, + "Acc.mirror": 0.6788999938964844, + "Acc.rug": 0.6716000366210938, + "Acc.field": 0.5288999938964843, + "Acc.armchair": 0.5811000061035156, + "Acc.seat": 0.7516000366210938, + "Acc.fence": 0.46689998626708984, + "Acc.desk": 0.5881999969482422, + "Acc.rock": 0.5984000015258789, + "Acc.wardrobe": 0.5572999954223633, + "Acc.lamp": 0.6416000366210938, + "Acc.bathtub": 0.8177999877929687, + "Acc.railing": 0.47779998779296873, + "Acc.cushion": 0.6604000091552734, + "Acc.base": 0.32799999237060545, + "Acc.box": 0.27600000381469725, + "Acc.column": 0.5295999908447265, + "Acc.signboard": 0.38880001068115233, + "Acc.chest of drawers": 0.475, + "Acc.counter": 0.3838999938964844, + "Acc.sand": 0.5458000183105469, + "Acc.sink": 0.7008000183105468, + "Acc.skyscraper": 0.5908000183105468, + "Acc.fireplace": 0.8676000213623047, + "Acc.refrigerator": 0.8055999755859375, + "Acc.grandstand": 0.6887000274658203, + "Acc.path": 0.30840000152587893, + "Acc.stairs": 0.33470001220703127, + "Acc.runway": 0.7829000091552735, + "Acc.case": 0.6455000305175781, + "Acc.pool table": 0.9501999664306641, + "Acc.pillow": 0.5263999938964844, + "Acc.screen door": 0.5495999908447265, + "Acc.stairway": 0.4084000015258789, + "Acc.river": 0.3225, + "Acc.bridge": 0.7666999816894531, + "Acc.bookcase": 0.5768000030517578, + "Acc.blind": 0.28329999923706056, + "Acc.coffee table": 0.727699966430664, + "Acc.toilet": 0.864800033569336, + "Acc.flower": 0.46919998168945315, + "Acc.book": 0.5225999832153321, + "Acc.hill": 0.08779999732971192, + "Acc.bench": 0.5138999938964843, + "Acc.countertop": 0.6966000366210937, + "Acc.stove": 0.7026000213623047, + "Acc.palm": 0.6145000076293945, + "Acc.kitchen island": 0.489900016784668, + "Acc.computer": 0.6765000152587891, + "Acc.swivel chair": 0.49349998474121093, + "Acc.boat": 0.6655000305175781, + "Acc.bar": 0.5811999893188476, + "Acc.arcade machine": 0.3858000183105469, + "Acc.hovel": 0.1946999931335449, + "Acc.bus": 0.8748999786376953, + "Acc.towel": 0.6691000366210937, + "Acc.light": 0.14649999618530274, + "Acc.truck": 0.07829999923706055, + "Acc.tower": 0.4006999969482422, + "Acc.chandelier": 0.6980999755859375, + "Acc.awning": 0.23350000381469727, + "Acc.streetlight": 0.08390000343322754, + "Acc.booth": 0.46490001678466797, + "Acc.television receiver": 0.6856999969482422, + "Acc.airplane": 0.6363000106811524, + "Acc.dirt track": 0.23790000915527343, + "Acc.apparel": 0.475, + "Acc.pole": 0.23020000457763673, + "Acc.land": 0.05409999847412109, + "Acc.bannister": 0.06, + "Acc.escalator": 0.33689998626708983, + "Acc.ottoman": 0.5743999862670899, + "Acc.bottle": 0.08210000038146972, + "Acc.buffet": 0.32919998168945314, + "Acc.poster": 0.505099983215332, + "Acc.stage": 0.23040000915527345, + "Acc.van": 0.3579000091552734, + "Acc.ship": 0.7152999877929688, + "Acc.fountain": 0.03180000066757202, + "Acc.conveyer belt": 0.6579000091552735, + "Acc.canopy": 0.18420000076293946, + "Acc.washer": 0.6591999816894532, + "Acc.plaything": 0.4420000076293945, + "Acc.swimming pool": 0.768499984741211, + "Acc.stool": 0.1256999969482422, + "Acc.barrel": 0.39779998779296877, + "Acc.basket": 0.18440000534057618, + "Acc.waterfall": 0.591500015258789, + "Acc.tent": 0.9870999908447265, + "Acc.bag": 0.07960000038146972, + "Acc.minibike": 0.6847000122070312, + "Acc.cradle": 0.9580999755859375, + "Acc.oven": 0.4370000076293945, + "Acc.ball": 0.46029998779296877, + "Acc.food": 0.6356000137329102, + "Acc.step": 0.1584000015258789, + "Acc.tank": 0.4122999954223633, + "Acc.trade name": 0.17010000228881836, + "Acc.microwave": 0.35209999084472654, + "Acc.pot": 0.31, + "Acc.animal": 0.5238999938964843, + "Acc.bicycle": 0.5806000137329101, + "Acc.lake": 0.0, + "Acc.dishwasher": 0.41310001373291017, + "Acc.screen": 0.7498000335693359, + "Acc.blanket": 0.08640000343322754, + "Acc.sculpture": 0.585900001525879, + "Acc.hood": 0.3870000076293945, + "Acc.sconce": 0.21920000076293944, + "Acc.vase": 0.35, + "Acc.traffic light": 0.1906999969482422, + "Acc.tray": 0.022200000286102296, + "Acc.ashcan": 0.32610000610351564, + "Acc.fan": 0.45419998168945314, + "Acc.pier": 0.414900016784668, + "Acc.crt screen": 0.06829999923706055, + "Acc.plate": 0.5243999862670898, + "Acc.monitor": 0.04570000171661377, + "Acc.bulletin board": 0.13140000343322755, + "Acc.shower": 0.02049999952316284, + "Acc.radiator": 0.47639999389648435, + "Acc.glass": 0.03609999895095825, + "Acc.clock": 0.1325, + "Acc.flag": 0.27670000076293944 + } + }, + "133": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8106, + "mIoU": 0.4086, + "mAcc": 0.5004, + "IoU.wall": 0.7520999908447266, + "IoU.building": 0.8059999847412109, + "IoU.sky": 0.9351000213623046, + "IoU.floor": 0.7862999725341797, + "IoU.tree": 0.7158999633789063, + "IoU.ceiling": 0.8234999847412109, + "IoU.road": 0.8051000213623047, + "IoU.bed ": 0.8593000030517578, + "IoU.windowpane": 0.5934000015258789, + "IoU.grass": 0.6518000030517578, + "IoU.cabinet": 0.5838999938964844, + "IoU.sidewalk": 0.6181000137329101, + "IoU.person": 0.7766000366210938, + "IoU.earth": 0.36650001525878906, + "IoU.door": 0.4254999923706055, + "IoU.table": 0.5372999954223633, + "IoU.mountain": 0.585999984741211, + "IoU.plant": 0.4986000061035156, + "IoU.curtain": 0.7151000213623047, + "IoU.chair": 0.4872999954223633, + "IoU.car": 0.7916000366210938, + "IoU.water": 0.5204000091552734, + "IoU.painting": 0.6576999664306641, + "IoU.sofa": 0.6122000122070312, + "IoU.shelf": 0.41450000762939454, + "IoU.house": 0.42060001373291017, + "IoU.sea": 0.5365999984741211, + "IoU.mirror": 0.6361999893188477, + "IoU.rug": 0.6234999847412109, + "IoU.field": 0.293799991607666, + "IoU.armchair": 0.3566999816894531, + "IoU.seat": 0.6058000183105469, + "IoU.fence": 0.3358000183105469, + "IoU.desk": 0.41779998779296873, + "IoU.rock": 0.3604999923706055, + "IoU.wardrobe": 0.47389999389648435, + "IoU.lamp": 0.48709999084472655, + "IoU.bathtub": 0.7494999694824219, + "IoU.railing": 0.308799991607666, + "IoU.cushion": 0.5077000045776368, + "IoU.base": 0.18850000381469725, + "IoU.box": 0.2036000061035156, + "IoU.column": 0.42209999084472655, + "IoU.signboard": 0.33, + "IoU.chest of drawers": 0.32619998931884764, + "IoU.counter": 0.27110000610351564, + "IoU.sand": 0.4020999908447266, + "IoU.sink": 0.620999984741211, + "IoU.skyscraper": 0.45619998931884764, + "IoU.fireplace": 0.6983999633789062, + "IoU.refrigerator": 0.7151999664306641, + "IoU.grandstand": 0.37220001220703125, + "IoU.path": 0.23920000076293946, + "IoU.stairs": 0.3032999992370605, + "IoU.runway": 0.6408999633789062, + "IoU.case": 0.48509998321533204, + "IoU.pool table": 0.895199966430664, + "IoU.pillow": 0.5104999923706055, + "IoU.screen door": 0.6666999816894531, + "IoU.stairway": 0.3447000122070312, + "IoU.river": 0.17219999313354492, + "IoU.bridge": 0.634900016784668, + "IoU.bookcase": 0.34810001373291016, + "IoU.blind": 0.2509000015258789, + "IoU.coffee table": 0.5359000015258789, + "IoU.toilet": 0.7829000091552735, + "IoU.flower": 0.342599983215332, + "IoU.book": 0.3788000106811523, + "IoU.hill": 0.05179999828338623, + "IoU.bench": 0.4015000152587891, + "IoU.countertop": 0.5690999984741211, + "IoU.stove": 0.6190999984741211, + "IoU.palm": 0.3413999938964844, + "IoU.kitchen island": 0.3265999984741211, + "IoU.computer": 0.5611999893188476, + "IoU.swivel chair": 0.39369998931884764, + "IoU.boat": 0.5518999862670898, + "IoU.bar": 0.4718000030517578, + "IoU.arcade machine": 0.33439998626708983, + "IoU.hovel": 0.1484000015258789, + "IoU.bus": 0.7352999877929688, + "IoU.towel": 0.5265000152587891, + "IoU.light": 0.24350000381469727, + "IoU.truck": 0.07530000209808349, + "IoU.tower": 0.17200000762939452, + "IoU.chandelier": 0.5329999923706055, + "IoU.awning": 0.23920000076293946, + "IoU.streetlight": 0.116899995803833, + "IoU.booth": 0.4433000183105469, + "IoU.television receiver": 0.592599983215332, + "IoU.airplane": 0.563499984741211, + "IoU.dirt track": 0.16170000076293944, + "IoU.apparel": 0.31260000228881835, + "IoU.pole": 0.1834000015258789, + "IoU.land": 0.027899999618530274, + "IoU.bannister": 0.036600000858306884, + "IoU.escalator": 0.1868000030517578, + "IoU.ottoman": 0.40130001068115234, + "IoU.bottle": 0.08510000228881837, + "IoU.buffet": 0.41639999389648436, + "IoU.poster": 0.21690000534057619, + "IoU.stage": 0.12930000305175782, + "IoU.van": 0.27030000686645506, + "IoU.ship": 0.4816999816894531, + "IoU.fountain": 0.024200000762939454, + "IoU.conveyer belt": 0.47330001831054686, + "IoU.canopy": 0.22489999771118163, + "IoU.washer": 0.5438000106811524, + "IoU.plaything": 0.20540000915527343, + "IoU.swimming pool": 0.6154999923706055, + "IoU.stool": 0.09529999732971191, + "IoU.barrel": 0.17549999237060546, + "IoU.basket": 0.173700008392334, + "IoU.waterfall": 0.5181999969482421, + "IoU.tent": 0.9141999816894532, + "IoU.bag": 0.08829999923706054, + "IoU.minibike": 0.3146999931335449, + "IoU.cradle": 0.7586000061035156, + "IoU.oven": 0.12289999961853028, + "IoU.ball": 0.4056999969482422, + "IoU.food": 0.48880001068115236, + "IoU.step": 0.1265999984741211, + "IoU.tank": 0.41720001220703123, + "IoU.trade name": 0.10779999732971192, + "IoU.microwave": 0.3202000045776367, + "IoU.pot": 0.29700000762939455, + "IoU.animal": 0.5097000122070312, + "IoU.bicycle": 0.445, + "IoU.lake": 9.999999776482581e-05, + "IoU.dishwasher": 0.40049999237060546, + "IoU.screen": 0.5872999954223633, + "IoU.blanket": 0.08050000190734863, + "IoU.sculpture": 0.5091999816894531, + "IoU.hood": 0.3118000030517578, + "IoU.sconce": 0.17450000762939452, + "IoU.vase": 0.2453000068664551, + "IoU.traffic light": 0.1759000015258789, + "IoU.tray": 0.020799999237060548, + "IoU.ashcan": 0.2705999946594238, + "IoU.fan": 0.35619998931884767, + "IoU.pier": 0.31209999084472656, + "IoU.crt screen": 0.0, + "IoU.plate": 0.41830001831054686, + "IoU.monitor": 0.053499999046325686, + "IoU.bulletin board": 0.12829999923706054, + "IoU.shower": 0.012300000190734864, + "IoU.radiator": 0.46700000762939453, + "IoU.glass": 0.04380000114440918, + "IoU.clock": 0.2084000015258789, + "IoU.flag": 0.25260000228881835, + "Acc.wall": 0.8829000091552734, + "Acc.building": 0.9326999664306641, + "Acc.sky": 0.977699966430664, + "Acc.floor": 0.9191999816894532, + "Acc.tree": 0.8416999816894531, + "Acc.ceiling": 0.9325, + "Acc.road": 0.8833999633789062, + "Acc.bed ": 0.9373999786376953, + "Acc.windowpane": 0.7845999908447265, + "Acc.grass": 0.8187000274658203, + "Acc.cabinet": 0.719800033569336, + "Acc.sidewalk": 0.8069999694824219, + "Acc.person": 0.8866000366210938, + "Acc.earth": 0.5084000015258789, + "Acc.door": 0.5465000152587891, + "Acc.table": 0.7322000122070312, + "Acc.mountain": 0.7622000122070313, + "Acc.plant": 0.5970999908447265, + "Acc.curtain": 0.8234999847412109, + "Acc.chair": 0.6184000015258789, + "Acc.car": 0.899000015258789, + "Acc.water": 0.6916999816894531, + "Acc.painting": 0.8295999908447266, + "Acc.sofa": 0.8261000061035156, + "Acc.shelf": 0.6365999984741211, + "Acc.house": 0.5349000167846679, + "Acc.sea": 0.8004000091552734, + "Acc.mirror": 0.6983999633789062, + "Acc.rug": 0.6994999694824219, + "Acc.field": 0.5015000152587891, + "Acc.armchair": 0.4577000045776367, + "Acc.seat": 0.7830999755859375, + "Acc.fence": 0.435, + "Acc.desk": 0.5915999984741211, + "Acc.rock": 0.5543999862670899, + "Acc.wardrobe": 0.6022000122070312, + "Acc.lamp": 0.5706999969482421, + "Acc.bathtub": 0.8011000061035156, + "Acc.railing": 0.46439998626708984, + "Acc.cushion": 0.6676999664306641, + "Acc.base": 0.2428000068664551, + "Acc.box": 0.275, + "Acc.column": 0.5279000091552735, + "Acc.signboard": 0.4477000045776367, + "Acc.chest of drawers": 0.4493000030517578, + "Acc.counter": 0.3781999969482422, + "Acc.sand": 0.4718000030517578, + "Acc.sink": 0.7030999755859375, + "Acc.skyscraper": 0.4936999893188477, + "Acc.fireplace": 0.8312000274658203, + "Acc.refrigerator": 0.7654000091552734, + "Acc.grandstand": 0.6744000244140625, + "Acc.path": 0.291200008392334, + "Acc.stairs": 0.3516999816894531, + "Acc.runway": 0.7658999633789062, + "Acc.case": 0.6315000152587891, + "Acc.pool table": 0.9523999786376953, + "Acc.pillow": 0.5840999984741211, + "Acc.screen door": 0.722699966430664, + "Acc.stairway": 0.4718000030517578, + "Acc.river": 0.2798999977111816, + "Acc.bridge": 0.7605999755859375, + "Acc.bookcase": 0.5365999984741211, + "Acc.blind": 0.25829999923706054, + "Acc.coffee table": 0.6991000366210938, + "Acc.toilet": 0.8625, + "Acc.flower": 0.5338000106811523, + "Acc.book": 0.5668000030517578, + "Acc.hill": 0.081899995803833, + "Acc.bench": 0.43259998321533205, + "Acc.countertop": 0.7333000183105469, + "Acc.stove": 0.7219000244140625, + "Acc.palm": 0.4356999969482422, + "Acc.kitchen island": 0.6593000030517578, + "Acc.computer": 0.6759999847412109, + "Acc.swivel chair": 0.5125, + "Acc.boat": 0.6426000213623047, + "Acc.bar": 0.569900016784668, + "Acc.arcade machine": 0.3686000061035156, + "Acc.hovel": 0.15939999580383302, + "Acc.bus": 0.890199966430664, + "Acc.towel": 0.677300033569336, + "Acc.light": 0.2617000007629395, + "Acc.truck": 0.09770000457763672, + "Acc.tower": 0.21299999237060546, + "Acc.chandelier": 0.6406999969482422, + "Acc.awning": 0.2511000061035156, + "Acc.streetlight": 0.12539999961853027, + "Acc.booth": 0.46540000915527346, + "Acc.television receiver": 0.6636000061035157, + "Acc.airplane": 0.6302000045776367, + "Acc.dirt track": 0.1665999984741211, + "Acc.apparel": 0.45799999237060546, + "Acc.pole": 0.21760000228881837, + "Acc.land": 0.03619999885559082, + "Acc.bannister": 0.04400000095367432, + "Acc.escalator": 0.2134000015258789, + "Acc.ottoman": 0.5147999954223633, + "Acc.bottle": 0.09640000343322754, + "Acc.buffet": 0.4818000030517578, + "Acc.poster": 0.2972999954223633, + "Acc.stage": 0.1802000045776367, + "Acc.van": 0.2981999969482422, + "Acc.ship": 0.5697000122070313, + "Acc.fountain": 0.02430000066757202, + "Acc.conveyer belt": 0.6613999938964844, + "Acc.canopy": 0.2493000030517578, + "Acc.washer": 0.6405000305175781, + "Acc.plaything": 0.3763000106811523, + "Acc.swimming pool": 0.6708000183105469, + "Acc.stool": 0.09979999542236329, + "Acc.barrel": 0.2606999969482422, + "Acc.basket": 0.22459999084472657, + "Acc.waterfall": 0.5690000152587891, + "Acc.tent": 0.9801999664306641, + "Acc.bag": 0.10289999961853027, + "Acc.minibike": 0.32860000610351564, + "Acc.cradle": 0.9479000091552734, + "Acc.oven": 0.26290000915527345, + "Acc.ball": 0.5670000076293945, + "Acc.food": 0.5547999954223632, + "Acc.step": 0.1411999988555908, + "Acc.tank": 0.46669998168945315, + "Acc.trade name": 0.11090000152587891, + "Acc.microwave": 0.35060001373291017, + "Acc.pot": 0.32299999237060545, + "Acc.animal": 0.535099983215332, + "Acc.bicycle": 0.6031999969482422, + "Acc.lake": 9.999999776482581e-05, + "Acc.dishwasher": 0.5213999938964844, + "Acc.screen": 0.731500015258789, + "Acc.blanket": 0.08380000114440918, + "Acc.sculpture": 0.577599983215332, + "Acc.hood": 0.32360000610351564, + "Acc.sconce": 0.19030000686645507, + "Acc.vase": 0.34119998931884765, + "Acc.traffic light": 0.20790000915527343, + "Acc.tray": 0.027699999809265137, + "Acc.ashcan": 0.3893999862670898, + "Acc.fan": 0.40380001068115234, + "Acc.pier": 0.3959000015258789, + "Acc.crt screen": 0.0, + "Acc.plate": 0.5409000015258789, + "Acc.monitor": 0.08850000381469726, + "Acc.bulletin board": 0.1584000015258789, + "Acc.shower": 0.030999999046325683, + "Acc.radiator": 0.4929999923706055, + "Acc.glass": 0.045799999237060546, + "Acc.clock": 0.22190000534057616, + "Acc.flag": 0.2665999984741211 + } + }, + "134": { + "config": "configs/setr/setr_naive_512x512_160k_b16_ade20k_deit_3_s_stitch_l_224_plus_flops_sampling_lora_16.py", + "metric": { + "aAcc": 0.8125, + "mIoU": 0.42340000000000005, + "mAcc": 0.5177, + "IoU.wall": 0.7520999908447266, + "IoU.building": 0.8109999847412109, + "IoU.sky": 0.9345999908447266, + "IoU.floor": 0.7887999725341797, + "IoU.tree": 0.7212999725341797, + "IoU.ceiling": 0.8248999786376953, + "IoU.road": 0.8130999755859375, + "IoU.bed ": 0.86, + "IoU.windowpane": 0.5931999969482422, + "IoU.grass": 0.6480000305175782, + "IoU.cabinet": 0.5779999923706055, + "IoU.sidewalk": 0.6138999938964844, + "IoU.person": 0.7737999725341796, + "IoU.earth": 0.3313999938964844, + "IoU.door": 0.45369998931884764, + "IoU.table": 0.5402000045776367, + "IoU.mountain": 0.5802000045776368, + "IoU.plant": 0.5077999877929688, + "IoU.curtain": 0.7168000030517578, + "IoU.chair": 0.49200000762939455, + "IoU.car": 0.788499984741211, + "IoU.water": 0.5166999816894531, + "IoU.painting": 0.6662999725341797, + "IoU.sofa": 0.6077000045776367, + "IoU.shelf": 0.41450000762939454, + "IoU.house": 0.4891999816894531, + "IoU.sea": 0.6220000076293946, + "IoU.mirror": 0.637400016784668, + "IoU.rug": 0.6002000045776367, + "IoU.field": 0.2909000015258789, + "IoU.armchair": 0.40970001220703123, + "IoU.seat": 0.5997000122070313, + "IoU.fence": 0.3763000106811523, + "IoU.desk": 0.4377000045776367, + "IoU.rock": 0.3890000152587891, + "IoU.wardrobe": 0.4854999923706055, + "IoU.lamp": 0.5056999969482422, + "IoU.bathtub": 0.7765000152587891, + "IoU.railing": 0.30540000915527343, + "IoU.cushion": 0.5111999893188477, + "IoU.base": 0.22049999237060547, + "IoU.box": 0.21170000076293946, + "IoU.column": 0.41319999694824217, + "IoU.signboard": 0.3208000183105469, + "IoU.chest of drawers": 0.34150001525878904, + "IoU.counter": 0.33360000610351564, + "IoU.sand": 0.3843999862670898, + "IoU.sink": 0.6206999969482422, + "IoU.skyscraper": 0.5266999816894531, + "IoU.fireplace": 0.74, + "IoU.refrigerator": 0.715, + "IoU.grandstand": 0.41830001831054686, + "IoU.path": 0.18899999618530272, + "IoU.stairs": 0.22920000076293945, + "IoU.runway": 0.7008000183105468, + "IoU.case": 0.4509000015258789, + "IoU.pool table": 0.8968000030517578, + "IoU.pillow": 0.5090999984741211, + "IoU.screen door": 0.5615999984741211, + "IoU.stairway": 0.27899999618530275, + "IoU.river": 0.2015999984741211, + "IoU.bridge": 0.6788999938964844, + "IoU.bookcase": 0.31879999160766603, + "IoU.blind": 0.18670000076293947, + "IoU.coffee table": 0.5263000106811524, + "IoU.toilet": 0.7930000305175782, + "IoU.flower": 0.3131999969482422, + "IoU.book": 0.4134000015258789, + "IoU.hill": 0.06940000057220459, + "IoU.bench": 0.38060001373291014, + "IoU.countertop": 0.5286999893188477, + "IoU.stove": 0.6531999969482422, + "IoU.palm": 0.4184999847412109, + "IoU.kitchen island": 0.29889999389648436, + "IoU.computer": 0.5963000106811523, + "IoU.swivel chair": 0.38619998931884764, + "IoU.boat": 0.6343000030517578, + "IoU.bar": 0.4520999908447266, + "IoU.arcade machine": 0.332599983215332, + "IoU.hovel": 0.28079999923706056, + "IoU.bus": 0.7138999938964844, + "IoU.towel": 0.5504999923706054, + "IoU.light": 0.22770000457763673, + "IoU.truck": 0.11319999694824219, + "IoU.tower": 0.16190000534057616, + "IoU.chandelier": 0.5556000137329101, + "IoU.awning": 0.24170000076293946, + "IoU.streetlight": 0.1075, + "IoU.booth": 0.35380001068115235, + "IoU.television receiver": 0.632400016784668, + "IoU.airplane": 0.5740999984741211, + "IoU.dirt track": 0.18469999313354493, + "IoU.apparel": 0.33049999237060546, + "IoU.pole": 0.19559999465942382, + "IoU.land": 0.0653000020980835, + "IoU.bannister": 0.10819999694824219, + "IoU.escalator": 0.20510000228881836, + "IoU.ottoman": 0.39880001068115234, + "IoU.bottle": 0.1290999984741211, + "IoU.buffet": 0.36189998626708986, + "IoU.poster": 0.2109000015258789, + "IoU.stage": 0.17040000915527342, + "IoU.van": 0.2535000038146973, + "IoU.ship": 0.7269000244140625, + "IoU.fountain": 0.019500000476837157, + "IoU.conveyer belt": 0.45380001068115233, + "IoU.canopy": 0.17319999694824217, + "IoU.washer": 0.653499984741211, + "IoU.plaything": 0.2427000045776367, + "IoU.swimming pool": 0.6309999847412109, + "IoU.stool": 0.1427999973297119, + "IoU.barrel": 0.48220001220703124, + "IoU.basket": 0.1853000068664551, + "IoU.waterfall": 0.6676000213623047, + "IoU.tent": 0.9401999664306641, + "IoU.bag": 0.11180000305175782, + "IoU.minibike": 0.475, + "IoU.cradle": 0.7361000061035157, + "IoU.oven": 0.21809999465942384, + "IoU.ball": 0.34169998168945315, + "IoU.food": 0.5297000122070312, + "IoU.step": 0.10770000457763672, + "IoU.tank": 0.37869998931884763, + "IoU.trade name": 0.14390000343322754, + "IoU.microwave": 0.29, + "IoU.pot": 0.30959999084472656, + "IoU.animal": 0.5775, + "IoU.bicycle": 0.44229999542236326, + "IoU.lake": 0.06320000171661377, + "IoU.dishwasher": 0.514000015258789, + "IoU.screen": 0.5988999938964844, + "IoU.blanket": 0.06989999771118165, + "IoU.sculpture": 0.5081000137329101, + "IoU.hood": 0.4515000152587891, + "IoU.sconce": 0.2246999931335449, + "IoU.vase": 0.275, + "IoU.traffic light": 0.1802000045776367, + "IoU.tray": 0.032899999618530275, + "IoU.ashcan": 0.26229999542236326, + "IoU.fan": 0.2570000076293945, + "IoU.pier": 0.33169998168945314, + "IoU.crt screen": 9.999999776482581e-05, + "IoU.plate": 0.3568000030517578, + "IoU.monitor": 0.018300000429153442, + "IoU.bulletin board": 0.1463000011444092, + "IoU.shower": 0.0025999999046325685, + "IoU.radiator": 0.547599983215332, + "IoU.glass": 0.0584000015258789, + "IoU.clock": 0.2181999969482422, + "IoU.flag": 0.27950000762939453, + "Acc.wall": 0.8784999847412109, + "Acc.building": 0.9394999694824219, + "Acc.sky": 0.9762999725341797, + "Acc.floor": 0.9180999755859375, + "Acc.tree": 0.8191999816894531, + "Acc.ceiling": 0.9333999633789063, + "Acc.road": 0.9051000213623047, + "Acc.bed ": 0.9388999938964844, + "Acc.windowpane": 0.779000015258789, + "Acc.grass": 0.8216999816894531, + "Acc.cabinet": 0.7094999694824219, + "Acc.sidewalk": 0.7697000122070312, + "Acc.person": 0.88, + "Acc.earth": 0.44310001373291014, + "Acc.door": 0.6316999816894531, + "Acc.table": 0.7223999786376953, + "Acc.mountain": 0.7727999877929688, + "Acc.plant": 0.6334999847412109, + "Acc.curtain": 0.8452999877929688, + "Acc.chair": 0.6347000122070312, + "Acc.car": 0.8768000030517578, + "Acc.water": 0.6956999969482421, + "Acc.painting": 0.8048000335693359, + "Acc.sofa": 0.7831999969482422, + "Acc.shelf": 0.620999984741211, + "Acc.house": 0.6006999969482422, + "Acc.sea": 0.903499984741211, + "Acc.mirror": 0.7004000091552735, + "Acc.rug": 0.6347999954223633, + "Acc.field": 0.49349998474121093, + "Acc.armchair": 0.5752000045776368, + "Acc.seat": 0.8161000061035156, + "Acc.fence": 0.5008000183105469, + "Acc.desk": 0.5918000030517578, + "Acc.rock": 0.6011999893188477, + "Acc.wardrobe": 0.6170999908447266, + "Acc.lamp": 0.5863999938964843, + "Acc.bathtub": 0.8418000030517578, + "Acc.railing": 0.47759998321533204, + "Acc.cushion": 0.65, + "Acc.base": 0.3004999923706055, + "Acc.box": 0.2669000053405762, + "Acc.column": 0.505099983215332, + "Acc.signboard": 0.3975, + "Acc.chest of drawers": 0.5586999893188477, + "Acc.counter": 0.47619998931884766, + "Acc.sand": 0.5084999847412109, + "Acc.sink": 0.6909999847412109, + "Acc.skyscraper": 0.6059000015258789, + "Acc.fireplace": 0.8547000122070313, + "Acc.refrigerator": 0.7970999908447266, + "Acc.grandstand": 0.6020000076293945, + "Acc.path": 0.22079999923706053, + "Acc.stairs": 0.2756999969482422, + "Acc.runway": 0.8987000274658203, + "Acc.case": 0.6737000274658204, + "Acc.pool table": 0.9613999938964843, + "Acc.pillow": 0.5870999908447265, + "Acc.screen door": 0.6469999694824219, + "Acc.stairway": 0.4234999847412109, + "Acc.river": 0.2904999923706055, + "Acc.bridge": 0.8151999664306641, + "Acc.bookcase": 0.43220001220703125, + "Acc.blind": 0.1884000015258789, + "Acc.coffee table": 0.6045999908447266, + "Acc.toilet": 0.8325, + "Acc.flower": 0.41569999694824217, + "Acc.book": 0.6427999877929688, + "Acc.hill": 0.1284000015258789, + "Acc.bench": 0.4056999969482422, + "Acc.countertop": 0.7147000122070313, + "Acc.stove": 0.7387999725341797, + "Acc.palm": 0.5397000122070312, + "Acc.kitchen island": 0.6290999984741211, + "Acc.computer": 0.7241000366210938, + "Acc.swivel chair": 0.5515999984741211, + "Acc.boat": 0.7238999938964844, + "Acc.bar": 0.5418999862670898, + "Acc.arcade machine": 0.3636000061035156, + "Acc.hovel": 0.30209999084472655, + "Acc.bus": 0.8991000366210937, + "Acc.towel": 0.6901999664306641, + "Acc.light": 0.23969999313354493, + "Acc.truck": 0.13789999961853028, + "Acc.tower": 0.1981999969482422, + "Acc.chandelier": 0.7020999908447265, + "Acc.awning": 0.26309999465942385, + "Acc.streetlight": 0.11470000267028808, + "Acc.booth": 0.36889999389648437, + "Acc.television receiver": 0.7163999938964843, + "Acc.airplane": 0.6441000366210937, + "Acc.dirt track": 0.23719999313354492, + "Acc.apparel": 0.43529998779296875, + "Acc.pole": 0.24149999618530274, + "Acc.land": 0.09100000381469726, + "Acc.bannister": 0.14039999961853028, + "Acc.escalator": 0.24079999923706055, + "Acc.ottoman": 0.4693000030517578, + "Acc.bottle": 0.1534000015258789, + "Acc.buffet": 0.40380001068115234, + "Acc.poster": 0.29270000457763673, + "Acc.stage": 0.23940000534057618, + "Acc.van": 0.27540000915527346, + "Acc.ship": 0.7819000244140625, + "Acc.fountain": 0.019600000381469727, + "Acc.conveyer belt": 0.7116000366210937, + "Acc.canopy": 0.2375, + "Acc.washer": 0.670199966430664, + "Acc.plaything": 0.359900016784668, + "Acc.swimming pool": 0.7194999694824219, + "Acc.stool": 0.15350000381469728, + "Acc.barrel": 0.567400016784668, + "Acc.basket": 0.22180000305175782, + "Acc.waterfall": 0.7880000305175782, + "Acc.tent": 0.9766999816894532, + "Acc.bag": 0.1438000011444092, + "Acc.minibike": 0.5204999923706055, + "Acc.cradle": 0.9672000122070312, + "Acc.oven": 0.484900016784668, + "Acc.ball": 0.3752000045776367, + "Acc.food": 0.6322000122070313, + "Acc.step": 0.11539999961853027, + "Acc.tank": 0.4336999893188477, + "Acc.trade name": 0.15119999885559082, + "Acc.microwave": 0.3184000015258789, + "Acc.pot": 0.3365999984741211, + "Acc.animal": 0.6143000030517578, + "Acc.bicycle": 0.6352999877929687, + "Acc.lake": 0.06840000152587891, + "Acc.dishwasher": 0.6086999893188476, + "Acc.screen": 0.8251999664306641, + "Acc.blanket": 0.07699999809265137, + "Acc.sculpture": 0.5552999877929687, + "Acc.hood": 0.46549999237060546, + "Acc.sconce": 0.24959999084472656, + "Acc.vase": 0.3584999847412109, + "Acc.traffic light": 0.21610000610351562, + "Acc.tray": 0.042600002288818356, + "Acc.ashcan": 0.3431999969482422, + "Acc.fan": 0.2784000015258789, + "Acc.pier": 0.42130001068115236, + "Acc.crt screen": 0.00039999999105930326, + "Acc.plate": 0.3990000152587891, + "Acc.monitor": 0.028299999237060548, + "Acc.bulletin board": 0.2181999969482422, + "Acc.shower": 0.0225, + "Acc.radiator": 0.5970999908447265, + "Acc.glass": 0.06219999790191651, + "Acc.clock": 0.23010000228881836, + "Acc.flag": 0.28989999771118163 + } + } +} \ No newline at end of file diff --git a/setr_naive_512x512_160k_b16_ade20k_snnetv2_deit3_s_l_lora_16_iter_160000.pth b/setr_naive_512x512_160k_b16_ade20k_snnetv2_deit3_s_l_lora_16_iter_160000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ef7ef9233093da8651f1e3fe1d1b9235d7ee5c5 --- /dev/null +++ b/setr_naive_512x512_160k_b16_ade20k_snnetv2_deit3_s_l_lora_16_iter_160000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:910f88a21d09140ff69ea6ed8b7ad794c1920134ab6dd73e2bc26f629ae851d2 +size 1658953368 diff --git a/tools/analysis_tools/analyze_logs.py b/tools/analysis_tools/analyze_logs.py new file mode 100644 index 0000000000000000000000000000000000000000..7464d231621b17249ce69f358479bbba42757362 --- /dev/null +++ b/tools/analysis_tools/analyze_logs.py @@ -0,0 +1,130 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/open- +mmlab/mmdetection/blob/master/tools/analysis_tools/analyze_logs.py.""" +import argparse +import json +from collections import defaultdict + +import matplotlib.pyplot as plt +import seaborn as sns + + +def plot_curve(log_dicts, args): + if args.backend is not None: + plt.switch_backend(args.backend) + sns.set_style(args.style) + # if legend is None, use {filename}_{key} as legend + legend = args.legend + if legend is None: + legend = [] + for json_log in args.json_logs: + for metric in args.keys: + legend.append(f'{json_log}_{metric}') + assert len(legend) == (len(args.json_logs) * len(args.keys)) + metrics = args.keys + + num_metrics = len(metrics) + for i, log_dict in enumerate(log_dicts): + epochs = list(log_dict.keys()) + for j, metric in enumerate(metrics): + print(f'plot curve of {args.json_logs[i]}, metric is {metric}') + plot_epochs = [] + plot_iters = [] + plot_values = [] + # In some log files exist lines of validation, + # `mode` list is used to only collect iter number + # of training line. + for epoch in epochs: + epoch_logs = log_dict[epoch] + if metric not in epoch_logs.keys(): + continue + if metric in ['mIoU', 'mAcc', 'aAcc']: + plot_epochs.append(epoch) + plot_values.append(epoch_logs[metric][0]) + else: + for idx in range(len(epoch_logs[metric])): + plot_iters.append(epoch_logs['step'][idx]) + plot_values.append(epoch_logs[metric][idx]) + ax = plt.gca() + label = legend[i * num_metrics + j] + if metric in ['mIoU', 'mAcc', 'aAcc']: + ax.set_xticks(plot_epochs) + plt.xlabel('step') + plt.plot(plot_epochs, plot_values, label=label, marker='o') + else: + plt.xlabel('iter') + plt.plot(plot_iters, plot_values, label=label, linewidth=0.5) + plt.legend() + if args.title is not None: + plt.title(args.title) + if args.out is None: + plt.show() + else: + print(f'save curve to: {args.out}') + plt.savefig(args.out) + plt.cla() + + +def parse_args(): + parser = argparse.ArgumentParser(description='Analyze Json Log') + parser.add_argument( + 'json_logs', + type=str, + nargs='+', + help='path of train log in json format') + parser.add_argument( + '--keys', + type=str, + nargs='+', + default=['mIoU'], + help='the metric that you want to plot') + parser.add_argument('--title', type=str, help='title of figure') + parser.add_argument( + '--legend', + type=str, + nargs='+', + default=None, + help='legend of each plot') + parser.add_argument( + '--backend', type=str, default=None, help='backend of plt') + parser.add_argument( + '--style', type=str, default='dark', help='style of plt') + parser.add_argument('--out', type=str, default=None) + args = parser.parse_args() + return args + + +def load_json_logs(json_logs): + # load and convert json_logs to log_dict, key is step, value is a sub dict + # keys of sub dict is different metrics + # value of sub dict is a list of corresponding values of all iterations + log_dicts = [dict() for _ in json_logs] + prev_step = 0 + for json_log, log_dict in zip(json_logs, log_dicts): + with open(json_log) as log_file: + for line in log_file: + log = json.loads(line.strip()) + # the final step in json file is 0. + if 'step' in log and log['step'] != 0: + step = log['step'] + prev_step = step + else: + step = prev_step + if step not in log_dict: + log_dict[step] = defaultdict(list) + for k, v in log.items(): + log_dict[step][k].append(v) + return log_dicts + + +def main(): + args = parse_args() + json_logs = args.json_logs + for json_log in json_logs: + assert json_log.endswith('.json') + log_dicts = load_json_logs(json_logs) + plot_curve(log_dicts, args) + + +if __name__ == '__main__': + main() diff --git a/tools/analysis_tools/benchmark.py b/tools/analysis_tools/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..afaeabac85fa642b03c006b8a920c0d95d4cb400 --- /dev/null +++ b/tools/analysis_tools/benchmark.py @@ -0,0 +1,121 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +import time + +import numpy as np +import torch +from mmengine import Config +from mmengine.fileio import dump +from mmengine.model.utils import revert_sync_batchnorm +from mmengine.registry import init_default_scope +from mmengine.runner import Runner, load_checkpoint +from mmengine.utils import mkdir_or_exist + +from mmseg.registry import MODELS + + +def parse_args(): + parser = argparse.ArgumentParser(description='MMSeg benchmark a model') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--log-interval', type=int, default=50, help='interval of logging') + parser.add_argument( + '--work-dir', + help=('if specified, the results will be dumped ' + 'into the directory as json')) + parser.add_argument('--repeat-times', type=int, default=1) + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + cfg = Config.fromfile(args.config) + + init_default_scope(cfg.get('default_scope', 'mmseg')) + + timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + if args.work_dir is not None: + mkdir_or_exist(osp.abspath(args.work_dir)) + json_file = osp.join(args.work_dir, f'fps_{timestamp}.json') + else: + # use config filename as default work_dir if cfg.work_dir is None + work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + mkdir_or_exist(osp.abspath(work_dir)) + json_file = osp.join(work_dir, f'fps_{timestamp}.json') + + repeat_times = args.repeat_times + # set cudnn_benchmark + torch.backends.cudnn.benchmark = False + cfg.model.pretrained = None + + benchmark_dict = dict(config=args.config, unit='img / s') + overall_fps_list = [] + cfg.test_dataloader.batch_size = 1 + for time_index in range(repeat_times): + print(f'Run {time_index + 1}:') + # build the dataloader + data_loader = Runner.build_dataloader(cfg.test_dataloader) + + # build the model and load checkpoint + cfg.model.train_cfg = None + model = MODELS.build(cfg.model) + + if 'checkpoint' in args and osp.exists(args.checkpoint): + load_checkpoint(model, args.checkpoint, map_location='cpu') + + if torch.cuda.is_available(): + model = model.cuda() + + model = revert_sync_batchnorm(model) + + model.eval() + + # the first several iterations may be very slow so skip them + num_warmup = 5 + pure_inf_time = 0 + total_iters = 200 + + # benchmark with 200 batches and take the average + for i, data in enumerate(data_loader): + data = model.data_preprocessor(data, True) + inputs = data['inputs'] + data_samples = data['data_samples'] + if torch.cuda.is_available(): + torch.cuda.synchronize() + start_time = time.perf_counter() + + with torch.no_grad(): + model(inputs, data_samples, mode='predict') + + if torch.cuda.is_available(): + torch.cuda.synchronize() + elapsed = time.perf_counter() - start_time + + if i >= num_warmup: + pure_inf_time += elapsed + if (i + 1) % args.log_interval == 0: + fps = (i + 1 - num_warmup) / pure_inf_time + print(f'Done image [{i + 1:<3}/ {total_iters}], ' + f'fps: {fps:.2f} img / s') + + if (i + 1) == total_iters: + fps = (i + 1 - num_warmup) / pure_inf_time + print(f'Overall fps: {fps:.2f} img / s\n') + benchmark_dict[f'overall_fps_{time_index + 1}'] = round(fps, 2) + overall_fps_list.append(fps) + break + benchmark_dict['average_fps'] = round(np.mean(overall_fps_list), 2) + benchmark_dict['fps_variance'] = round(np.var(overall_fps_list), 4) + print(f'Average fps of {repeat_times} evaluations: ' + f'{benchmark_dict["average_fps"]}') + print(f'The variance of {repeat_times} evaluations: ' + f'{benchmark_dict["fps_variance"]}') + dump(benchmark_dict, json_file, indent=4) + + +if __name__ == '__main__': + main() diff --git a/tools/analysis_tools/browse_dataset.py b/tools/analysis_tools/browse_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..925c14a8ab63b4e38950b6c6af58e37dba002a4c --- /dev/null +++ b/tools/analysis_tools/browse_dataset.py @@ -0,0 +1,77 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +from mmengine.config import Config, DictAction +from mmengine.utils import ProgressBar + +from mmseg.registry import DATASETS, VISUALIZERS +from mmseg.utils import register_all_modules + + +def parse_args(): + parser = argparse.ArgumentParser(description='Browse a dataset') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--output-dir', + default=None, + type=str, + help='If there is no display interface, you can save it') + parser.add_argument('--not-show', default=False, action='store_true') + parser.add_argument( + '--show-interval', + type=float, + default=2, + help='the interval of show (s)') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # register all modules in mmdet into the registries + register_all_modules() + + dataset = DATASETS.build(cfg.train_dataloader.dataset) + visualizer = VISUALIZERS.build(cfg.visualizer) + visualizer.dataset_meta = dataset.metainfo + + progress_bar = ProgressBar(len(dataset)) + for item in dataset: + img = item['inputs'].permute(1, 2, 0).numpy() + img = img[..., [2, 1, 0]] # bgr to rgb + data_sample = item['data_samples'].numpy() + img_path = osp.basename(item['data_samples'].img_path) + + out_file = osp.join( + args.output_dir, + osp.basename(img_path)) if args.output_dir is not None else None + + visualizer.add_datasample( + name=osp.basename(img_path), + image=img, + data_sample=data_sample, + draw_gt=True, + draw_pred=False, + wait_time=args.show_interval, + out_file=out_file, + show=not args.not_show) + progress_bar.update() + + +if __name__ == '__main__': + main() diff --git a/tools/analysis_tools/confusion_matrix.py b/tools/analysis_tools/confusion_matrix.py new file mode 100644 index 0000000000000000000000000000000000000000..39756cdfdd2341e7e02f9de24077da880b6021c3 --- /dev/null +++ b/tools/analysis_tools/confusion_matrix.py @@ -0,0 +1,197 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os + +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.ticker import MultipleLocator +from mmengine.config import Config, DictAction +from mmengine.registry import init_default_scope +from mmengine.utils import mkdir_or_exist, progressbar +from PIL import Image + +from mmseg.registry import DATASETS + +init_default_scope('mmseg') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Generate confusion matrix from segmentation results') + parser.add_argument('config', help='test config file path') + parser.add_argument( + 'prediction_path', help='prediction path where test folder result') + parser.add_argument( + 'save_dir', help='directory where confusion matrix will be saved') + parser.add_argument( + '--show', action='store_true', help='show confusion matrix') + parser.add_argument( + '--color-theme', + default='winter', + help='theme of the matrix color map') + parser.add_argument( + '--title', + default='Normalized Confusion Matrix', + help='title of the matrix color map') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def calculate_confusion_matrix(dataset, results): + """Calculate the confusion matrix. + + Args: + dataset (Dataset): Test or val dataset. + results (list[ndarray]): A list of segmentation results in each image. + """ + n = len(dataset.METAINFO['classes']) + confusion_matrix = np.zeros(shape=[n, n]) + assert len(dataset) == len(results) + ignore_index = dataset.ignore_index + reduce_zero_label = dataset.reduce_zero_label + prog_bar = progressbar.ProgressBar(len(results)) + for idx, per_img_res in enumerate(results): + res_segm = per_img_res + gt_segm = dataset[idx]['data_samples'] \ + .gt_sem_seg.data.squeeze().numpy().astype(np.uint8) + gt_segm, res_segm = gt_segm.flatten(), res_segm.flatten() + if reduce_zero_label: + gt_segm = gt_segm - 1 + to_ignore = gt_segm == ignore_index + + gt_segm, res_segm = gt_segm[~to_ignore], res_segm[~to_ignore] + inds = n * gt_segm + res_segm + mat = np.bincount(inds, minlength=n**2).reshape(n, n) + confusion_matrix += mat + prog_bar.update() + return confusion_matrix + + +def plot_confusion_matrix(confusion_matrix, + labels, + save_dir=None, + show=True, + title='Normalized Confusion Matrix', + color_theme='OrRd'): + """Draw confusion matrix with matplotlib. + + Args: + confusion_matrix (ndarray): The confusion matrix. + labels (list[str]): List of class names. + save_dir (str|optional): If set, save the confusion matrix plot to the + given path. Default: None. + show (bool): Whether to show the plot. Default: True. + title (str): Title of the plot. Default: `Normalized Confusion Matrix`. + color_theme (str): Theme of the matrix color map. Default: `winter`. + """ + # normalize the confusion matrix + per_label_sums = confusion_matrix.sum(axis=1)[:, np.newaxis] + confusion_matrix = \ + confusion_matrix.astype(np.float32) / per_label_sums * 100 + + num_classes = len(labels) + fig, ax = plt.subplots( + figsize=(2 * num_classes, 2 * num_classes * 0.8), dpi=300) + cmap = plt.get_cmap(color_theme) + im = ax.imshow(confusion_matrix, cmap=cmap) + colorbar = plt.colorbar(mappable=im, ax=ax) + colorbar.ax.tick_params(labelsize=20) # 设置 colorbar 标签的字体大小 + + title_font = {'weight': 'bold', 'size': 20} + ax.set_title(title, fontdict=title_font) + label_font = {'size': 40} + plt.ylabel('Ground Truth Label', fontdict=label_font) + plt.xlabel('Prediction Label', fontdict=label_font) + + # draw locator + xmajor_locator = MultipleLocator(1) + xminor_locator = MultipleLocator(0.5) + ax.xaxis.set_major_locator(xmajor_locator) + ax.xaxis.set_minor_locator(xminor_locator) + ymajor_locator = MultipleLocator(1) + yminor_locator = MultipleLocator(0.5) + ax.yaxis.set_major_locator(ymajor_locator) + ax.yaxis.set_minor_locator(yminor_locator) + + # draw grid + ax.grid(True, which='minor', linestyle='-') + + # draw label + ax.set_xticks(np.arange(num_classes)) + ax.set_yticks(np.arange(num_classes)) + ax.set_xticklabels(labels, fontsize=20) + ax.set_yticklabels(labels, fontsize=20) + + ax.tick_params( + axis='x', bottom=False, top=True, labelbottom=False, labeltop=True) + plt.setp( + ax.get_xticklabels(), rotation=45, ha='left', rotation_mode='anchor') + + # draw confusion matrix value + for i in range(num_classes): + for j in range(num_classes): + ax.text( + j, + i, + '{}%'.format( + round(confusion_matrix[i, j], 2 + ) if not np.isnan(confusion_matrix[i, j]) else -1), + ha='center', + va='center', + color='k', + size=20) + + ax.set_ylim(len(confusion_matrix) - 0.5, -0.5) # matplotlib>3.1.1 + + fig.tight_layout() + if save_dir is not None: + mkdir_or_exist(save_dir) + plt.savefig( + os.path.join(save_dir, 'confusion_matrix.png'), format='png') + if show: + plt.show() + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + results = [] + for img in sorted(os.listdir(args.prediction_path)): + img = os.path.join(args.prediction_path, img) + image = Image.open(img) + image = np.copy(image) + results.append(image) + + assert isinstance(results, list) + if isinstance(results[0], np.ndarray): + pass + else: + raise TypeError('invalid type of prediction results') + + dataset = DATASETS.build(cfg.test_dataloader.dataset) + confusion_matrix = calculate_confusion_matrix(dataset, results) + plot_confusion_matrix( + confusion_matrix, + dataset.METAINFO['classes'], + save_dir=args.save_dir, + show=args.show, + title=args.title, + color_theme=args.color_theme) + + +if __name__ == '__main__': + main() diff --git a/tools/analysis_tools/get_flops.py b/tools/analysis_tools/get_flops.py new file mode 100644 index 0000000000000000000000000000000000000000..66b2d52fcd2cb0f19066cfa4dfbfe13bc1e682e2 --- /dev/null +++ b/tools/analysis_tools/get_flops.py @@ -0,0 +1,124 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import tempfile +from pathlib import Path + +import torch +from mmengine import Config, DictAction +from mmengine.logging import MMLogger +from mmengine.model import revert_sync_batchnorm +from mmengine.registry import init_default_scope + +from mmseg.models import BaseSegmentor +from mmseg.registry import MODELS +from mmseg.structures import SegDataSample + +try: + from mmengine.analysis import get_model_complexity_info + from mmengine.analysis.print_helper import _format_size +except ImportError: + raise ImportError('Please upgrade mmengine >= 0.6.0 to use this script.') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Get the FLOPs of a segmentor') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[2048, 1024], + help='input image size') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def inference(args: argparse.Namespace, logger: MMLogger) -> dict: + config_name = Path(args.config) + + if not config_name.exists(): + logger.error(f'Config file {config_name} does not exist') + + cfg: Config = Config.fromfile(config_name) + cfg.work_dir = tempfile.TemporaryDirectory().name + cfg.log_level = 'WARN' + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + init_default_scope(cfg.get('scope', 'mmseg')) + + if len(args.shape) == 1: + input_shape = (3, args.shape[0], args.shape[0]) + elif len(args.shape) == 2: + input_shape = (3, ) + tuple(args.shape) + else: + raise ValueError('invalid input shape') + result = {} + + model: BaseSegmentor = MODELS.build(cfg.model) + if hasattr(model, 'auxiliary_head'): + model.auxiliary_head = None + if torch.cuda.is_available(): + model.cuda() + model = revert_sync_batchnorm(model) + result['ori_shape'] = input_shape[-2:] + result['pad_shape'] = input_shape[-2:] + data_batch = { + 'inputs': [torch.rand(input_shape)], + 'data_samples': [SegDataSample(metainfo=result)] + } + data = model.data_preprocessor(data_batch) + model.eval() + if cfg.model.decode_head.type in ['MaskFormerHead', 'Mask2FormerHead']: + # TODO: Support MaskFormer and Mask2Former + raise NotImplementedError('MaskFormer and Mask2Former are not ' + 'supported yet.') + outputs = get_model_complexity_info( + model, + input_shape, + inputs=data['inputs'], + show_table=False, + show_arch=False) + result['flops'] = _format_size(outputs['flops']) + result['params'] = _format_size(outputs['params']) + result['compute_type'] = 'direct: randomly generate a picture' + return result + + +def main(): + + args = parse_args() + logger = MMLogger.get_instance(name='MMLogger') + + result = inference(args, logger) + split_line = '=' * 30 + ori_shape = result['ori_shape'] + pad_shape = result['pad_shape'] + flops = result['flops'] + params = result['params'] + compute_type = result['compute_type'] + + if pad_shape != ori_shape: + print(f'{split_line}\nUse size divisor set input shape ' + f'from {ori_shape} to {pad_shape}') + print(f'{split_line}\nCompute type: {compute_type}\n' + f'Input shape: {pad_shape}\nFlops: {flops}\n' + f'Params: {params}\n{split_line}') + print('!!!Please be cautious if you use the results in papers. ' + 'You may need to check if all ops are supported and verify ' + 'that the flops computation is correct.') + + +if __name__ == '__main__': + main() diff --git a/tools/analysis_tools/get_flops_snnet.py b/tools/analysis_tools/get_flops_snnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6e3e42453018a420e2b16b49389d044876a87600 --- /dev/null +++ b/tools/analysis_tools/get_flops_snnet.py @@ -0,0 +1,120 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import tempfile +from pathlib import Path + +import torch +from mmengine import Config, DictAction +from mmengine.logging import MMLogger +from mmengine.model import revert_sync_batchnorm +from mmengine.registry import init_default_scope + +from mmseg.models import BaseSegmentor +from mmseg.registry import MODELS +from mmseg.structures import SegDataSample +import os +import json +try: + from mmengine.analysis import get_model_complexity_info + from mmengine.analysis.print_helper import _format_size +except ImportError: + raise ImportError('Please upgrade mmengine >= 0.6.0 to use this script.') + +from fvcore.nn import FlopCountAnalysis +def parse_args(): + parser = argparse.ArgumentParser( + description='Get the FLOPs of a segmentor') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[512, 512], + help='input image size') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def inference(args: argparse.Namespace, logger: MMLogger) -> dict: + config_name = Path(args.config) + + if not config_name.exists(): + logger.error(f'Config file {config_name} does not exist') + + cfg: Config = Config.fromfile(config_name) + cfg.work_dir = tempfile.TemporaryDirectory().name + cfg.log_level = 'WARN' + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + init_default_scope(cfg.get('scope', 'mmseg')) + + if len(args.shape) == 1: + input_shape = (3, args.shape[0], args.shape[0]) + elif len(args.shape) == 2: + input_shape = (3, ) + tuple(args.shape) + else: + raise ValueError('invalid input shape') + result = {} + + model: BaseSegmentor = MODELS.build(cfg.model) + if hasattr(model, 'auxiliary_head'): + model.auxiliary_head = None + if torch.cuda.is_available(): + model.cuda() + model = revert_sync_batchnorm(model) + result['ori_shape'] = input_shape[-2:] + result['pad_shape'] = input_shape[-2:] + data_batch = { + 'inputs': [torch.rand(input_shape)], + 'data_samples': [SegDataSample(metainfo=result)] + } + data = model.data_preprocessor(data_batch) + model.eval() + if cfg.model.decode_head.type in ['MaskFormerHead', 'Mask2FormerHead']: + # TODO: Support MaskFormer and Mask2Former + raise NotImplementedError('MaskFormer and Mask2Former are not ' + 'supported yet.') + + if hasattr(model, 'module'): + all_cfgs = model.module.backbone.all_cfgs + else: + all_cfgs = model.backbone.all_cfgs + stitch_results = {} + + for cfg_id in all_cfgs: + if hasattr(model, 'module'): + model.module.backbone.reset_stitch_id(cfg_id) + else: + model.backbone.reset_stitch_id(cfg_id) + flops = FlopCountAnalysis(model, torch.randn([1]+list(input_shape)).cuda()).total() + stitch_results[cfg_id] = flops + + + save_dir = './model_flops' + if not os.path.exists(save_dir): + os.mkdir(save_dir) + config_name = args.config.split('/')[-1].split('.')[0] + with open(os.path.join(save_dir, f'snnet_flops_{config_name}.json'), 'w+') as f: + json.dump(stitch_results, f, indent=4) + +def main(): + + args = parse_args() + logger = MMLogger.get_instance(name='MMLogger') + + inference(args, logger) + + +if __name__ == '__main__': + main() diff --git a/tools/analysis_tools/visualization_cam.py b/tools/analysis_tools/visualization_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..00cdb3e04ab1f9000844ace781bc138f230d4630 --- /dev/null +++ b/tools/analysis_tools/visualization_cam.py @@ -0,0 +1,127 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Use the pytorch-grad-cam tool to visualize Class Activation Maps (CAM). + +requirement: pip install grad-cam +""" + +from argparse import ArgumentParser + +import numpy as np +import torch +import torch.nn.functional as F +from mmengine import Config +from mmengine.model import revert_sync_batchnorm +from PIL import Image +from pytorch_grad_cam import GradCAM +from pytorch_grad_cam.utils.image import preprocess_image, show_cam_on_image + +from mmseg.apis import inference_model, init_model, show_result_pyplot +from mmseg.utils import register_all_modules + + +class SemanticSegmentationTarget: + """wrap the model. + + requirement: pip install grad-cam + + Args: + category (int): Visualization class. + mask (ndarray): Mask of class. + size (tuple): Image size. + """ + + def __init__(self, category, mask, size): + self.category = category + self.mask = torch.from_numpy(mask) + self.size = size + if torch.cuda.is_available(): + self.mask = self.mask.cuda() + + def __call__(self, model_output): + model_output = torch.unsqueeze(model_output, dim=0) + model_output = F.interpolate( + model_output, size=self.size, mode='bilinear') + model_output = torch.squeeze(model_output, dim=0) + + return (model_output[self.category, :, :] * self.mask).sum() + + +def main(): + parser = ArgumentParser() + parser.add_argument('img', help='Image file') + parser.add_argument('config', help='Config file') + parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument( + '--out-file', + default='prediction.png', + help='Path to output prediction file') + parser.add_argument( + '--cam-file', default='vis_cam.png', help='Path to output cam file') + parser.add_argument( + '--target-layers', + default='backbone.layer4[2]', + help='Target layers to visualize CAM') + parser.add_argument( + '--category-index', default='7', help='Category to visualize CAM') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + args = parser.parse_args() + + # build the model from a config file and a checkpoint file + register_all_modules() + model = init_model(args.config, args.checkpoint, device=args.device) + if args.device == 'cpu': + model = revert_sync_batchnorm(model) + + # test a single image + result = inference_model(model, args.img) + + # show the results + show_result_pyplot( + model, + args.img, + result, + draw_gt=False, + show=False if args.out_file is not None else True, + out_file=args.out_file) + + # result data conversion + prediction_data = result.pred_sem_seg.data + pre_np_data = prediction_data.cpu().numpy().squeeze(0) + + target_layers = args.target_layers + target_layers = [eval(f'model.{target_layers}')] + + category = int(args.category_index) + mask_float = np.float32(pre_np_data == category) + + # data processing + image = np.array(Image.open(args.img).convert('RGB')) + height, width = image.shape[0], image.shape[1] + rgb_img = np.float32(image) / 255 + config = Config.fromfile(args.config) + image_mean = config.data_preprocessor['mean'] + image_std = config.data_preprocessor['std'] + input_tensor = preprocess_image( + rgb_img, + mean=[x / 255 for x in image_mean], + std=[x / 255 for x in image_std]) + + # Grad CAM(Class Activation Maps) + # Can also be LayerCAM, XGradCAM, GradCAMPlusPlus, EigenCAM, EigenGradCAM + targets = [ + SemanticSegmentationTarget(category, mask_float, (height, width)) + ] + with GradCAM( + model=model, + target_layers=target_layers, + use_cuda=torch.cuda.is_available()) as cam: + grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0, :] + cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True) + + # save cam file + Image.fromarray(cam_image).save(args.cam_file) + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/chase_db1.py b/tools/dataset_converters/chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..f4fefbd77435c5745d290269cd00f67fda604455 --- /dev/null +++ b/tools/dataset_converters/chase_db1.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv +from mmengine.utils import mkdir_or_exist + +CHASE_DB1_LEN = 28 * 3 +TRAINING_LEN = 60 + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert CHASE_DB1 dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='path of CHASEDB1.zip') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'CHASE_DB1') + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(out_dir) + mkdir_or_exist(osp.join(out_dir, 'images')) + mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mkdir_or_exist(osp.join(out_dir, 'annotations')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + print('Extracting CHASEDB1.zip...') + zip_file = zipfile.ZipFile(dataset_path) + zip_file.extractall(tmp_dir) + + print('Generating training dataset...') + + assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \ + f'len(os.listdir(tmp_dir)) != {CHASE_DB1_LEN}' + + for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, img_name)) + if osp.splitext(img_name)[1] == '.jpg': + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(img_name)[0] + '.png')) + else: + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(img_name)[0] + '.png')) + + for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, img_name)) + if osp.splitext(img_name)[1] == '.jpg': + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(img_name)[0] + '.png')) + else: + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.png')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/cityscapes.py b/tools/dataset_converters/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..0d6a80135d906db7330a736ccbcc908e0a6309c6 --- /dev/null +++ b/tools/dataset_converters/cityscapes.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +from cityscapesscripts.preparation.json2labelImg import json2labelImg +from mmengine.utils import (mkdir_or_exist, scandir, track_parallel_progress, + track_progress) + + +def convert_json_to_label(json_file): + label_file = json_file.replace('_polygons.json', '_labelTrainIds.png') + json2labelImg(json_file, label_file, 'trainIds') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert Cityscapes annotations to TrainIds') + parser.add_argument('cityscapes_path', help='cityscapes data path') + parser.add_argument('--gt-dir', default='gtFine', type=str) + parser.add_argument('-o', '--out-dir', help='output path') + parser.add_argument( + '--nproc', default=1, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + cityscapes_path = args.cityscapes_path + out_dir = args.out_dir if args.out_dir else cityscapes_path + mkdir_or_exist(out_dir) + + gt_dir = osp.join(cityscapes_path, args.gt_dir) + + poly_files = [] + for poly in scandir(gt_dir, '_polygons.json', recursive=True): + poly_file = osp.join(gt_dir, poly) + poly_files.append(poly_file) + if args.nproc > 1: + track_parallel_progress(convert_json_to_label, poly_files, args.nproc) + else: + track_progress(convert_json_to_label, poly_files) + + split_names = ['train', 'val', 'test'] + + for split in split_names: + filenames = [] + for poly in scandir( + osp.join(gt_dir, split), '_polygons.json', recursive=True): + filenames.append(poly.replace('_gtFine_polygons.json', '')) + with open(osp.join(out_dir, f'{split}.txt'), 'w') as f: + f.writelines(f + '\n' for f in filenames) + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/coco_stuff10k.py b/tools/dataset_converters/coco_stuff10k.py new file mode 100644 index 0000000000000000000000000000000000000000..920127ee10fc09b76f8e2344ecdf3b7800d51802 --- /dev/null +++ b/tools/dataset_converters/coco_stuff10k.py @@ -0,0 +1,308 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +import shutil +from functools import partial + +import numpy as np +from mmengine.utils import (mkdir_or_exist, track_parallel_progress, + track_progress) +from PIL import Image +from scipy.io import loadmat + +COCO_LEN = 10000 + +clsID_to_trID = { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 11: 11, + 13: 12, + 14: 13, + 15: 14, + 16: 15, + 17: 16, + 18: 17, + 19: 18, + 20: 19, + 21: 20, + 22: 21, + 23: 22, + 24: 23, + 25: 24, + 27: 25, + 28: 26, + 31: 27, + 32: 28, + 33: 29, + 34: 30, + 35: 31, + 36: 32, + 37: 33, + 38: 34, + 39: 35, + 40: 36, + 41: 37, + 42: 38, + 43: 39, + 44: 40, + 46: 41, + 47: 42, + 48: 43, + 49: 44, + 50: 45, + 51: 46, + 52: 47, + 53: 48, + 54: 49, + 55: 50, + 56: 51, + 57: 52, + 58: 53, + 59: 54, + 60: 55, + 61: 56, + 62: 57, + 63: 58, + 64: 59, + 65: 60, + 67: 61, + 70: 62, + 72: 63, + 73: 64, + 74: 65, + 75: 66, + 76: 67, + 77: 68, + 78: 69, + 79: 70, + 80: 71, + 81: 72, + 82: 73, + 84: 74, + 85: 75, + 86: 76, + 87: 77, + 88: 78, + 89: 79, + 90: 80, + 92: 81, + 93: 82, + 94: 83, + 95: 84, + 96: 85, + 97: 86, + 98: 87, + 99: 88, + 100: 89, + 101: 90, + 102: 91, + 103: 92, + 104: 93, + 105: 94, + 106: 95, + 107: 96, + 108: 97, + 109: 98, + 110: 99, + 111: 100, + 112: 101, + 113: 102, + 114: 103, + 115: 104, + 116: 105, + 117: 106, + 118: 107, + 119: 108, + 120: 109, + 121: 110, + 122: 111, + 123: 112, + 124: 113, + 125: 114, + 126: 115, + 127: 116, + 128: 117, + 129: 118, + 130: 119, + 131: 120, + 132: 121, + 133: 122, + 134: 123, + 135: 124, + 136: 125, + 137: 126, + 138: 127, + 139: 128, + 140: 129, + 141: 130, + 142: 131, + 143: 132, + 144: 133, + 145: 134, + 146: 135, + 147: 136, + 148: 137, + 149: 138, + 150: 139, + 151: 140, + 152: 141, + 153: 142, + 154: 143, + 155: 144, + 156: 145, + 157: 146, + 158: 147, + 159: 148, + 160: 149, + 161: 150, + 162: 151, + 163: 152, + 164: 153, + 165: 154, + 166: 155, + 167: 156, + 168: 157, + 169: 158, + 170: 159, + 171: 160, + 172: 161, + 173: 162, + 174: 163, + 175: 164, + 176: 165, + 177: 166, + 178: 167, + 179: 168, + 180: 169, + 181: 170, + 182: 171 +} + + +def convert_to_trainID(tuple_path, in_img_dir, in_ann_dir, out_img_dir, + out_mask_dir, is_train): + imgpath, maskpath = tuple_path + shutil.copyfile( + osp.join(in_img_dir, imgpath), + osp.join(out_img_dir, 'train2014', imgpath) if is_train else osp.join( + out_img_dir, 'test2014', imgpath)) + annotate = loadmat(osp.join(in_ann_dir, maskpath)) + mask = annotate['S'].astype(np.uint8) + mask_copy = mask.copy() + for clsID, trID in clsID_to_trID.items(): + mask_copy[mask == clsID] = trID + seg_filename = osp.join(out_mask_dir, 'train2014', + maskpath.split('.')[0] + + '_labelTrainIds.png') if is_train else osp.join( + out_mask_dir, 'test2014', + maskpath.split('.')[0] + '_labelTrainIds.png') + Image.fromarray(mask_copy).save(seg_filename, 'PNG') + + +def generate_coco_list(folder): + train_list = osp.join(folder, 'imageLists', 'train.txt') + test_list = osp.join(folder, 'imageLists', 'test.txt') + train_paths = [] + test_paths = [] + + with open(train_list) as f: + for filename in f: + basename = filename.strip() + imgpath = basename + '.jpg' + maskpath = basename + '.mat' + train_paths.append((imgpath, maskpath)) + + with open(test_list) as f: + for filename in f: + basename = filename.strip() + imgpath = basename + '.jpg' + maskpath = basename + '.mat' + test_paths.append((imgpath, maskpath)) + + return train_paths, test_paths + + +def parse_args(): + parser = argparse.ArgumentParser( + description=\ + 'Convert COCO Stuff 10k annotations to mmsegmentation format') # noqa + parser.add_argument('coco_path', help='coco stuff path') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--nproc', default=16, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + coco_path = args.coco_path + nproc = args.nproc + + out_dir = args.out_dir or coco_path + out_img_dir = osp.join(out_dir, 'images') + out_mask_dir = osp.join(out_dir, 'annotations') + + mkdir_or_exist(osp.join(out_img_dir, 'train2014')) + mkdir_or_exist(osp.join(out_img_dir, 'test2014')) + mkdir_or_exist(osp.join(out_mask_dir, 'train2014')) + mkdir_or_exist(osp.join(out_mask_dir, 'test2014')) + + train_list, test_list = generate_coco_list(coco_path) + assert (len(train_list) + + len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format( + len(train_list), len(test_list)) + + if args.nproc > 1: + track_parallel_progress( + partial( + convert_to_trainID, + in_img_dir=osp.join(coco_path, 'images'), + in_ann_dir=osp.join(coco_path, 'annotations'), + out_img_dir=out_img_dir, + out_mask_dir=out_mask_dir, + is_train=True), + train_list, + nproc=nproc) + track_parallel_progress( + partial( + convert_to_trainID, + in_img_dir=osp.join(coco_path, 'images'), + in_ann_dir=osp.join(coco_path, 'annotations'), + out_img_dir=out_img_dir, + out_mask_dir=out_mask_dir, + is_train=False), + test_list, + nproc=nproc) + else: + track_progress( + partial( + convert_to_trainID, + in_img_dir=osp.join(coco_path, 'images'), + in_ann_dir=osp.join(coco_path, 'annotations'), + out_img_dir=out_img_dir, + out_mask_dir=out_mask_dir, + is_train=True), train_list) + track_progress( + partial( + convert_to_trainID, + in_img_dir=osp.join(coco_path, 'images'), + in_ann_dir=osp.join(coco_path, 'annotations'), + out_img_dir=out_img_dir, + out_mask_dir=out_mask_dir, + is_train=False), test_list) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/coco_stuff164k.py b/tools/dataset_converters/coco_stuff164k.py new file mode 100644 index 0000000000000000000000000000000000000000..a13114ab1e0c37675369b2e9ba065cbfb2dca1e7 --- /dev/null +++ b/tools/dataset_converters/coco_stuff164k.py @@ -0,0 +1,265 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +import shutil +from functools import partial +from glob import glob + +import numpy as np +from mmengine.utils import (mkdir_or_exist, track_parallel_progress, + track_progress) +from PIL import Image + +COCO_LEN = 123287 + +clsID_to_trID = { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 12: 11, + 13: 12, + 14: 13, + 15: 14, + 16: 15, + 17: 16, + 18: 17, + 19: 18, + 20: 19, + 21: 20, + 22: 21, + 23: 22, + 24: 23, + 26: 24, + 27: 25, + 30: 26, + 31: 27, + 32: 28, + 33: 29, + 34: 30, + 35: 31, + 36: 32, + 37: 33, + 38: 34, + 39: 35, + 40: 36, + 41: 37, + 42: 38, + 43: 39, + 45: 40, + 46: 41, + 47: 42, + 48: 43, + 49: 44, + 50: 45, + 51: 46, + 52: 47, + 53: 48, + 54: 49, + 55: 50, + 56: 51, + 57: 52, + 58: 53, + 59: 54, + 60: 55, + 61: 56, + 62: 57, + 63: 58, + 64: 59, + 66: 60, + 69: 61, + 71: 62, + 72: 63, + 73: 64, + 74: 65, + 75: 66, + 76: 67, + 77: 68, + 78: 69, + 79: 70, + 80: 71, + 81: 72, + 83: 73, + 84: 74, + 85: 75, + 86: 76, + 87: 77, + 88: 78, + 89: 79, + 91: 80, + 92: 81, + 93: 82, + 94: 83, + 95: 84, + 96: 85, + 97: 86, + 98: 87, + 99: 88, + 100: 89, + 101: 90, + 102: 91, + 103: 92, + 104: 93, + 105: 94, + 106: 95, + 107: 96, + 108: 97, + 109: 98, + 110: 99, + 111: 100, + 112: 101, + 113: 102, + 114: 103, + 115: 104, + 116: 105, + 117: 106, + 118: 107, + 119: 108, + 120: 109, + 121: 110, + 122: 111, + 123: 112, + 124: 113, + 125: 114, + 126: 115, + 127: 116, + 128: 117, + 129: 118, + 130: 119, + 131: 120, + 132: 121, + 133: 122, + 134: 123, + 135: 124, + 136: 125, + 137: 126, + 138: 127, + 139: 128, + 140: 129, + 141: 130, + 142: 131, + 143: 132, + 144: 133, + 145: 134, + 146: 135, + 147: 136, + 148: 137, + 149: 138, + 150: 139, + 151: 140, + 152: 141, + 153: 142, + 154: 143, + 155: 144, + 156: 145, + 157: 146, + 158: 147, + 159: 148, + 160: 149, + 161: 150, + 162: 151, + 163: 152, + 164: 153, + 165: 154, + 166: 155, + 167: 156, + 168: 157, + 169: 158, + 170: 159, + 171: 160, + 172: 161, + 173: 162, + 174: 163, + 175: 164, + 176: 165, + 177: 166, + 178: 167, + 179: 168, + 180: 169, + 181: 170, + 255: 255 +} + + +def convert_to_trainID(maskpath, out_mask_dir, is_train): + mask = np.array(Image.open(maskpath)) + mask_copy = mask.copy() + for clsID, trID in clsID_to_trID.items(): + mask_copy[mask == clsID] = trID + seg_filename = osp.join( + out_mask_dir, 'train2017', + osp.basename(maskpath).split('.')[0] + + '_labelTrainIds.png') if is_train else osp.join( + out_mask_dir, 'val2017', + osp.basename(maskpath).split('.')[0] + '_labelTrainIds.png') + Image.fromarray(mask_copy).save(seg_filename, 'PNG') + + +def parse_args(): + parser = argparse.ArgumentParser( + description=\ + 'Convert COCO Stuff 164k annotations to mmsegmentation format') # noqa + parser.add_argument('coco_path', help='coco stuff path') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--nproc', default=16, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + coco_path = args.coco_path + nproc = args.nproc + + out_dir = args.out_dir or coco_path + out_img_dir = osp.join(out_dir, 'images') + out_mask_dir = osp.join(out_dir, 'annotations') + + mkdir_or_exist(osp.join(out_mask_dir, 'train2017')) + mkdir_or_exist(osp.join(out_mask_dir, 'val2017')) + + if out_dir != coco_path: + shutil.copytree(osp.join(coco_path, 'images'), out_img_dir) + + train_list = glob(osp.join(coco_path, 'annotations', 'train2017', '*.png')) + train_list = [file for file in train_list if '_labelTrainIds' not in file] + test_list = glob(osp.join(coco_path, 'annotations', 'val2017', '*.png')) + test_list = [file for file in test_list if '_labelTrainIds' not in file] + assert (len(train_list) + + len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format( + len(train_list), len(test_list)) + + if args.nproc > 1: + track_parallel_progress( + partial( + convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True), + train_list, + nproc=nproc) + track_parallel_progress( + partial( + convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False), + test_list, + nproc=nproc) + else: + track_progress( + partial( + convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True), + train_list) + track_progress( + partial( + convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False), + test_list) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/drive.py b/tools/dataset_converters/drive.py new file mode 100644 index 0000000000000000000000000000000000000000..076fd05a2029216e0f1a1494610181fdaa7fbef9 --- /dev/null +++ b/tools/dataset_converters/drive.py @@ -0,0 +1,114 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import cv2 +import mmcv +from mmengine.utils import mkdir_or_exist + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert DRIVE dataset to mmsegmentation format') + parser.add_argument( + 'training_path', help='the training part of DRIVE dataset') + parser.add_argument( + 'testing_path', help='the testing part of DRIVE dataset') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + training_path = args.training_path + testing_path = args.testing_path + if args.out_dir is None: + out_dir = osp.join('data', 'DRIVE') + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(out_dir) + mkdir_or_exist(osp.join(out_dir, 'images')) + mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mkdir_or_exist(osp.join(out_dir, 'annotations')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + print('Extracting training.zip...') + zip_file = zipfile.ZipFile(training_path) + zip_file.extractall(tmp_dir) + + print('Generating training dataset...') + now_dir = osp.join(tmp_dir, 'training', 'images') + for img_name in os.listdir(now_dir): + img = mmcv.imread(osp.join(now_dir, img_name)) + mmcv.imwrite( + img, + osp.join( + out_dir, 'images', 'training', + osp.splitext(img_name)[0].replace('_training', '') + + '.png')) + + now_dir = osp.join(tmp_dir, 'training', '1st_manual') + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(img_name)[0] + '.png')) + + print('Extracting test.zip...') + zip_file = zipfile.ZipFile(testing_path) + zip_file.extractall(tmp_dir) + + print('Generating validation dataset...') + now_dir = osp.join(tmp_dir, 'test', 'images') + for img_name in os.listdir(now_dir): + img = mmcv.imread(osp.join(now_dir, img_name)) + mmcv.imwrite( + img, + osp.join( + out_dir, 'images', 'validation', + osp.splitext(img_name)[0].replace('_test', '') + '.png')) + + now_dir = osp.join(tmp_dir, 'test', '1st_manual') + if osp.exists(now_dir): + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.png')) + + now_dir = osp.join(tmp_dir, 'test', '2nd_manual') + if osp.exists(now_dir): + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.png')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/hrf.py b/tools/dataset_converters/hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfd80c9ee42e3b5cba4a12a6c8b32ddbb2f1f11 --- /dev/null +++ b/tools/dataset_converters/hrf.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv +from mmengine.utils import mkdir_or_exist + +HRF_LEN = 15 +TRAINING_LEN = 5 + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert HRF dataset to mmsegmentation format') + parser.add_argument('healthy_path', help='the path of healthy.zip') + parser.add_argument( + 'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip') + parser.add_argument('glaucoma_path', help='the path of glaucoma.zip') + parser.add_argument( + 'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip') + parser.add_argument( + 'diabetic_retinopathy_path', + help='the path of diabetic_retinopathy.zip') + parser.add_argument( + 'diabetic_retinopathy_manualsegm_path', + help='the path of diabetic_retinopathy_manualsegm.zip') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + images_path = [ + args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path + ] + annotations_path = [ + args.healthy_manualsegm_path, args.glaucoma_manualsegm_path, + args.diabetic_retinopathy_manualsegm_path + ] + if args.out_dir is None: + out_dir = osp.join('data', 'HRF') + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(out_dir) + mkdir_or_exist(osp.join(out_dir, 'images')) + mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mkdir_or_exist(osp.join(out_dir, 'annotations')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + print('Generating images...') + for now_path in images_path: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(now_path) + zip_file.extractall(tmp_dir) + + assert len(os.listdir(tmp_dir)) == HRF_LEN, \ + f'len(os.listdir(tmp_dir)) != {HRF_LEN}' + + for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(filename)[0] + '.png')) + for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Generating annotations...') + for now_path in annotations_path: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(now_path) + zip_file.extractall(tmp_dir) + + assert len(os.listdir(tmp_dir)) == HRF_LEN, \ + f'len(os.listdir(tmp_dir)) != {HRF_LEN}' + + for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.png')) + for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/isaid.py b/tools/dataset_converters/isaid.py new file mode 100644 index 0000000000000000000000000000000000000000..1d5ccd9c776e9621c261e6d168bf6aa4f7b451f6 --- /dev/null +++ b/tools/dataset_converters/isaid.py @@ -0,0 +1,246 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import os +import os.path as osp +import shutil +import tempfile +import zipfile + +import mmcv +import numpy as np +from mmengine.utils import ProgressBar, mkdir_or_exist +from PIL import Image + +iSAID_palette = \ + { + 0: (0, 0, 0), + 1: (0, 0, 63), + 2: (0, 63, 63), + 3: (0, 63, 0), + 4: (0, 63, 127), + 5: (0, 63, 191), + 6: (0, 63, 255), + 7: (0, 127, 63), + 8: (0, 127, 127), + 9: (0, 0, 127), + 10: (0, 0, 191), + 11: (0, 0, 255), + 12: (0, 191, 127), + 13: (0, 127, 191), + 14: (0, 127, 255), + 15: (0, 100, 155) + } + +iSAID_invert_palette = {v: k for k, v in iSAID_palette.items()} + + +def iSAID_convert_from_color(arr_3d, palette=iSAID_invert_palette): + """RGB-color encoding to grayscale labels.""" + arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8) + + for c, i in palette.items(): + m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2) + arr_2d[m] = i + + return arr_2d + + +def slide_crop_image(src_path, out_dir, mode, patch_H, patch_W, overlap): + img = np.asarray(Image.open(src_path).convert('RGB')) + + img_H, img_W, _ = img.shape + + if img_H < patch_H and img_W > patch_W: + + img = mmcv.impad(img, shape=(patch_H, img_W), pad_val=0) + + img_H, img_W, _ = img.shape + + elif img_H > patch_H and img_W < patch_W: + + img = mmcv.impad(img, shape=(img_H, patch_W), pad_val=0) + + img_H, img_W, _ = img.shape + + elif img_H < patch_H and img_W < patch_W: + + img = mmcv.impad(img, shape=(patch_H, patch_W), pad_val=0) + + img_H, img_W, _ = img.shape + + for x in range(0, img_W, patch_W - overlap): + for y in range(0, img_H, patch_H - overlap): + x_str = x + x_end = x + patch_W + if x_end > img_W: + diff_x = x_end - img_W + x_str -= diff_x + x_end = img_W + y_str = y + y_end = y + patch_H + if y_end > img_H: + diff_y = y_end - img_H + y_str -= diff_y + y_end = img_H + + img_patch = img[y_str:y_end, x_str:x_end, :] + img_patch = Image.fromarray(img_patch.astype(np.uint8)) + image = osp.basename(src_path).split('.')[0] + '_' + str( + y_str) + '_' + str(y_end) + '_' + str(x_str) + '_' + str( + x_end) + '.png' + # print(image) + save_path_image = osp.join(out_dir, 'img_dir', mode, str(image)) + img_patch.save(save_path_image, format='BMP') + + +def slide_crop_label(src_path, out_dir, mode, patch_H, patch_W, overlap): + label = mmcv.imread(src_path, channel_order='rgb') + label = iSAID_convert_from_color(label) + img_H, img_W = label.shape + + if img_H < patch_H and img_W > patch_W: + + label = mmcv.impad(label, shape=(patch_H, img_W), pad_val=255) + + img_H = patch_H + + elif img_H > patch_H and img_W < patch_W: + + label = mmcv.impad(label, shape=(img_H, patch_W), pad_val=255) + + img_W = patch_W + + elif img_H < patch_H and img_W < patch_W: + + label = mmcv.impad(label, shape=(patch_H, patch_W), pad_val=255) + + img_H = patch_H + img_W = patch_W + + for x in range(0, img_W, patch_W - overlap): + for y in range(0, img_H, patch_H - overlap): + x_str = x + x_end = x + patch_W + if x_end > img_W: + diff_x = x_end - img_W + x_str -= diff_x + x_end = img_W + y_str = y + y_end = y + patch_H + if y_end > img_H: + diff_y = y_end - img_H + y_str -= diff_y + y_end = img_H + + lab_patch = label[y_str:y_end, x_str:x_end] + lab_patch = Image.fromarray(lab_patch.astype(np.uint8), mode='P') + + image = osp.basename(src_path).split('.')[0].split( + '_')[0] + '_' + str(y_str) + '_' + str(y_end) + '_' + str( + x_str) + '_' + str(x_end) + '_instance_color_RGB' + '.png' + lab_patch.save(osp.join(out_dir, 'ann_dir', mode, str(image))) + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert iSAID dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='iSAID folder path') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + + parser.add_argument( + '--patch_width', + default=896, + type=int, + help='Width of the cropped image patch') + parser.add_argument( + '--patch_height', + default=896, + type=int, + help='Height of the cropped image patch') + parser.add_argument( + '--overlap_area', default=384, type=int, help='Overlap area') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + dataset_path = args.dataset_path + # image patch width and height + patch_H, patch_W = args.patch_width, args.patch_height + + overlap = args.overlap_area # overlap area + + if args.out_dir is None: + out_dir = osp.join('data', 'iSAID') + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train')) + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val')) + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test')) + + mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train')) + mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val')) + mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'test')) + + assert os.path.exists(os.path.join(dataset_path, 'train')), \ + f'train is not in {dataset_path}' + assert os.path.exists(os.path.join(dataset_path, 'val')), \ + f'val is not in {dataset_path}' + assert os.path.exists(os.path.join(dataset_path, 'test')), \ + f'test is not in {dataset_path}' + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + for dataset_mode in ['train', 'val', 'test']: + + # for dataset_mode in [ 'test']: + print(f'Extracting {dataset_mode}ing.zip...') + img_zipp_list = glob.glob( + os.path.join(dataset_path, dataset_mode, 'images', '*.zip')) + print('Find the data', img_zipp_list) + for img_zipp in img_zipp_list: + zip_file = zipfile.ZipFile(img_zipp) + zip_file.extractall(os.path.join(tmp_dir, dataset_mode, 'img')) + src_path_list = glob.glob( + os.path.join(tmp_dir, dataset_mode, 'img', 'images', '*.png')) + + src_prog_bar = ProgressBar(len(src_path_list)) + for i, img_path in enumerate(src_path_list): + if dataset_mode != 'test': + slide_crop_image(img_path, out_dir, dataset_mode, patch_H, + patch_W, overlap) + + else: + shutil.move(img_path, + os.path.join(out_dir, 'img_dir', dataset_mode)) + src_prog_bar.update() + + if dataset_mode != 'test': + label_zipp_list = glob.glob( + os.path.join(dataset_path, dataset_mode, 'Semantic_masks', + '*.zip')) + for label_zipp in label_zipp_list: + zip_file = zipfile.ZipFile(label_zipp) + zip_file.extractall( + os.path.join(tmp_dir, dataset_mode, 'lab')) + + lab_path_list = glob.glob( + os.path.join(tmp_dir, dataset_mode, 'lab', 'images', + '*.png')) + lab_prog_bar = ProgressBar(len(lab_path_list)) + for i, lab_path in enumerate(lab_path_list): + slide_crop_label(lab_path, out_dir, dataset_mode, patch_H, + patch_W, overlap) + lab_prog_bar.update() + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/levircd.py b/tools/dataset_converters/levircd.py new file mode 100644 index 0000000000000000000000000000000000000000..8717f3e856ba3f171b511f34d0217e1fda87ccb6 --- /dev/null +++ b/tools/dataset_converters/levircd.py @@ -0,0 +1,99 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import math +import os +import os.path as osp + +import mmcv +import numpy as np +from mmengine.utils import ProgressBar + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert levir-cd dataset to mmsegmentation format') + parser.add_argument('--dataset_path', help='potsdam folder path') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--clip_size', + type=int, + help='clipped size of image after preparation', + default=256) + parser.add_argument( + '--stride_size', + type=int, + help='stride of clipping original images', + default=256) + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + input_folder = args.dataset_path + png_files = glob.glob( + os.path.join(input_folder, '**/*.png'), recursive=True) + output_folder = args.out_dir + prog_bar = ProgressBar(len(png_files)) + for png_file in png_files: + new_path = os.path.join( + output_folder, + os.path.relpath(os.path.dirname(png_file), input_folder)) + os.makedirs(os.path.dirname(new_path), exist_ok=True) + label = False + if 'label' in png_file: + label = True + clip_big_image(png_file, new_path, args, label) + prog_bar.update() + + +def clip_big_image(image_path, clip_save_dir, args, to_label=False): + image = mmcv.imread(image_path) + + h, w, c = image.shape + clip_size = args.clip_size + stride_size = args.stride_size + + num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil( + (h - clip_size) / + stride_size) * stride_size + clip_size >= h else math.ceil( + (h - clip_size) / stride_size) + 1 + num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil( + (w - clip_size) / + stride_size) * stride_size + clip_size >= w else math.ceil( + (w - clip_size) / stride_size) + 1 + + x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1)) + xmin = x * clip_size + ymin = y * clip_size + + xmin = xmin.ravel() + ymin = ymin.ravel() + xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size, + np.zeros_like(xmin)) + ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size, + np.zeros_like(ymin)) + boxes = np.stack([ + xmin + xmin_offset, ymin + ymin_offset, + np.minimum(xmin + clip_size, w), + np.minimum(ymin + clip_size, h) + ], + axis=1) + + if to_label: + image[image == 255] = 1 + image = image[:, :, 0] + for box in boxes: + start_x, start_y, end_x, end_y = box + clipped_image = image[start_y:end_y, start_x:end_x] \ + if to_label else image[start_y:end_y, start_x:end_x, :] + idx = osp.basename(image_path).split('.')[0] + mmcv.imwrite( + clipped_image.astype(np.uint8), + osp.join(clip_save_dir, + f'{idx}_{start_x}_{start_y}_{end_x}_{end_y}.png')) + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/loveda.py b/tools/dataset_converters/loveda.py new file mode 100644 index 0000000000000000000000000000000000000000..5b0ef4bb8bbd07f60dfc0397e9659f0200b96f5d --- /dev/null +++ b/tools/dataset_converters/loveda.py @@ -0,0 +1,73 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import shutil +import tempfile +import zipfile + +from mmengine.utils import mkdir_or_exist + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert LoveDA dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='LoveDA folder path') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'loveDA') + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(out_dir) + mkdir_or_exist(osp.join(out_dir, 'img_dir')) + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train')) + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val')) + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test')) + mkdir_or_exist(osp.join(out_dir, 'ann_dir')) + mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train')) + mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val')) + + assert 'Train.zip' in os.listdir(dataset_path), \ + f'Train.zip is not in {dataset_path}' + assert 'Val.zip' in os.listdir(dataset_path), \ + f'Val.zip is not in {dataset_path}' + assert 'Test.zip' in os.listdir(dataset_path), \ + f'Test.zip is not in {dataset_path}' + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + for dataset in ['Train', 'Val', 'Test']: + zip_file = zipfile.ZipFile( + os.path.join(dataset_path, dataset + '.zip')) + zip_file.extractall(tmp_dir) + data_type = dataset.lower() + for location in ['Rural', 'Urban']: + for image_type in ['images_png', 'masks_png']: + if image_type == 'images_png': + dst = osp.join(out_dir, 'img_dir', data_type) + else: + dst = osp.join(out_dir, 'ann_dir', data_type) + if dataset == 'Test' and image_type == 'masks_png': + continue + else: + src_dir = osp.join(tmp_dir, dataset, location, + image_type) + src_lst = os.listdir(src_dir) + for file in src_lst: + shutil.move(osp.join(src_dir, file), dst) + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/nyu.py b/tools/dataset_converters/nyu.py new file mode 100644 index 0000000000000000000000000000000000000000..49e09e7af6844b709e681f6d9f4df14ed547a00c --- /dev/null +++ b/tools/dataset_converters/nyu.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +import shutil +import tempfile +import zipfile + +from mmengine.utils import mkdir_or_exist + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert NYU Depth dataset to mmsegmentation format') + parser.add_argument('raw_data', help='the path of raw data') + parser.add_argument( + '-o', '--out_dir', help='output path', default='./data/nyu') + args = parser.parse_args() + return args + + +def reorganize(raw_data_dir: str, out_dir: str): + """Reorganize NYU Depth dataset files into the required directory + structure. + + Args: + raw_data_dir (str): Path to the raw data directory. + out_dir (str): Output directory for the organized dataset. + """ + + def move_data(data_list, dst_prefix, fname_func): + """Move data files from source to destination directory. + + Args: + data_list (list): List of data file paths. + dst_prefix (str): Prefix to be added to destination paths. + fname_func (callable): Function to process file names + """ + for data_item in data_list: + data_item = data_item.strip().strip('/') + new_item = fname_func(data_item) + shutil.move( + osp.join(raw_data_dir, data_item), + osp.join(out_dir, dst_prefix, new_item)) + + def process_phase(phase): + """Process a dataset phase (e.g., 'train' or 'test').""" + with open(osp.join(raw_data_dir, f'nyu_{phase}.txt')) as f: + data = filter(lambda x: len(x.strip()) > 0, f.readlines()) + data = map(lambda x: x.split()[:2], data) + images, annos = zip(*data) + + move_data(images, f'images/{phase}', + lambda x: x.replace('/rgb', '')) + move_data(annos, f'annotations/{phase}', + lambda x: x.replace('/sync_depth', '')) + + process_phase('train') + process_phase('test') + + +def main(): + args = parse_args() + + print('Making directories...') + mkdir_or_exist(args.out_dir) + for subdir in [ + 'images/train', 'images/test', 'annotations/train', + 'annotations/test' + ]: + mkdir_or_exist(osp.join(args.out_dir, subdir)) + + print('Generating images and annotations...') + + if args.raw_data.endswith('.zip'): + with tempfile.TemporaryDirectory() as tmp_dir: + zip_file = zipfile.ZipFile(args.raw_data) + zip_file.extractall(tmp_dir) + reorganize(osp.join(tmp_dir, 'nyu'), args.out_dir) + else: + assert osp.isdir( + args.raw_data + ), 'the argument --raw-data should be either a zip file or directory.' + reorganize(args.raw_data, args.out_dir) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/pascal_context.py b/tools/dataset_converters/pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..a92d1dc6411137b92fe67fbde0fc554060194085 --- /dev/null +++ b/tools/dataset_converters/pascal_context.py @@ -0,0 +1,87 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from functools import partial + +import numpy as np +from detail import Detail +from mmengine.utils import mkdir_or_exist, track_progress +from PIL import Image + +_mapping = np.sort( + np.array([ + 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, + 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, + 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, + 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 + ])) +_key = np.array(range(len(_mapping))).astype('uint8') + + +def generate_labels(img_id, detail, out_dir): + + def _class_to_index(mask, _mapping, _key): + # assert the values + values = np.unique(mask) + for i in range(len(values)): + assert (values[i] in _mapping) + index = np.digitize(mask.ravel(), _mapping, right=True) + return _key[index].reshape(mask.shape) + + mask = Image.fromarray( + _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key)) + filename = img_id['file_name'] + mask.save(osp.join(out_dir, filename.replace('jpg', 'png'))) + return osp.splitext(osp.basename(filename))[0] + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert PASCAL VOC annotations to mmsegmentation format') + parser.add_argument('devkit_path', help='pascal voc devkit path') + parser.add_argument('json_path', help='annoation json filepath') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + devkit_path = args.devkit_path + if args.out_dir is None: + out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext') + else: + out_dir = args.out_dir + json_path = args.json_path + mkdir_or_exist(out_dir) + img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages') + + train_detail = Detail(json_path, img_dir, 'train') + train_ids = train_detail.getImgs() + + val_detail = Detail(json_path, img_dir, 'val') + val_ids = val_detail.getImgs() + + mkdir_or_exist( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext')) + + train_list = track_progress( + partial(generate_labels, detail=train_detail, out_dir=out_dir), + train_ids) + with open( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', + 'train.txt'), 'w') as f: + f.writelines(line + '\n' for line in sorted(train_list)) + + val_list = track_progress( + partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids) + with open( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', + 'val.txt'), 'w') as f: + f.writelines(line + '\n' for line in sorted(val_list)) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/potsdam.py b/tools/dataset_converters/potsdam.py new file mode 100644 index 0000000000000000000000000000000000000000..f3c713ee2a08d2f6eaf68fb225899504b8f4e829 --- /dev/null +++ b/tools/dataset_converters/potsdam.py @@ -0,0 +1,158 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import math +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv +import numpy as np +from mmengine.utils import ProgressBar, mkdir_or_exist + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert potsdam dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='potsdam folder path') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--clip_size', + type=int, + help='clipped size of image after preparation', + default=512) + parser.add_argument( + '--stride_size', + type=int, + help='stride of clipping original images', + default=256) + args = parser.parse_args() + return args + + +def clip_big_image(image_path, clip_save_dir, args, to_label=False): + # Original image of Potsdam dataset is very large, thus pre-processing + # of them is adopted. Given fixed clip size and stride size to generate + # clipped image, the intersection of width and height is determined. + # For example, given one 5120 x 5120 original image, the clip size is + # 512 and stride size is 256, thus it would generate 20x20 = 400 images + # whose size are all 512x512. + image = mmcv.imread(image_path) + + h, w, c = image.shape + clip_size = args.clip_size + stride_size = args.stride_size + + num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil( + (h - clip_size) / + stride_size) * stride_size + clip_size >= h else math.ceil( + (h - clip_size) / stride_size) + 1 + num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil( + (w - clip_size) / + stride_size) * stride_size + clip_size >= w else math.ceil( + (w - clip_size) / stride_size) + 1 + + x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1)) + xmin = x * clip_size + ymin = y * clip_size + + xmin = xmin.ravel() + ymin = ymin.ravel() + xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size, + np.zeros_like(xmin)) + ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size, + np.zeros_like(ymin)) + boxes = np.stack([ + xmin + xmin_offset, ymin + ymin_offset, + np.minimum(xmin + clip_size, w), + np.minimum(ymin + clip_size, h) + ], + axis=1) + + if to_label: + color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0], + [255, 255, 0], [0, 255, 0], [0, 255, 255], + [0, 0, 255]]) + flatten_v = np.matmul( + image.reshape(-1, c), + np.array([2, 3, 4]).reshape(3, 1)) + out = np.zeros_like(flatten_v) + for idx, class_color in enumerate(color_map): + value_idx = np.matmul(class_color, + np.array([2, 3, 4]).reshape(3, 1)) + out[flatten_v == value_idx] = idx + image = out.reshape(h, w) + + for box in boxes: + start_x, start_y, end_x, end_y = box + clipped_image = image[start_y:end_y, + start_x:end_x] if to_label else image[ + start_y:end_y, start_x:end_x, :] + idx_i, idx_j = osp.basename(image_path).split('_')[2:4] + mmcv.imwrite( + clipped_image.astype(np.uint8), + osp.join( + clip_save_dir, + f'{idx_i}_{idx_j}_{start_x}_{start_y}_{end_x}_{end_y}.png')) + + +def main(): + args = parse_args() + splits = { + 'train': [ + '2_10', '2_11', '2_12', '3_10', '3_11', '3_12', '4_10', '4_11', + '4_12', '5_10', '5_11', '5_12', '6_10', '6_11', '6_12', '6_7', + '6_8', '6_9', '7_10', '7_11', '7_12', '7_7', '7_8', '7_9' + ], + 'val': [ + '5_15', '6_15', '6_13', '3_13', '4_14', '6_14', '5_14', '2_13', + '4_15', '2_14', '5_13', '4_13', '3_14', '7_13' + ] + } + + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'potsdam') + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train')) + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val')) + mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train')) + mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val')) + + zipp_list = glob.glob(os.path.join(dataset_path, '*.zip')) + print('Find the data', zipp_list) + + for zipp in zipp_list: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(zipp) + zip_file.extractall(tmp_dir) + src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif')) + if not len(src_path_list): + sub_tmp_dir = os.path.join(tmp_dir, os.listdir(tmp_dir)[0]) + src_path_list = glob.glob(os.path.join(sub_tmp_dir, '*.tif')) + + prog_bar = ProgressBar(len(src_path_list)) + for i, src_path in enumerate(src_path_list): + idx_i, idx_j = osp.basename(src_path).split('_')[2:4] + data_type = 'train' if f'{idx_i}_{idx_j}' in splits[ + 'train'] else 'val' + if 'label' in src_path: + dst_dir = osp.join(out_dir, 'ann_dir', data_type) + clip_big_image(src_path, dst_dir, args, to_label=True) + else: + dst_dir = osp.join(out_dir, 'img_dir', data_type) + clip_big_image(src_path, dst_dir, args, to_label=False) + prog_bar.update() + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/refuge.py b/tools/dataset_converters/refuge.py new file mode 100644 index 0000000000000000000000000000000000000000..1186866ab3fd58c4d72e5f573938053a8d7c80b2 --- /dev/null +++ b/tools/dataset_converters/refuge.py @@ -0,0 +1,110 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv +import numpy as np +from mmengine.utils import mkdir_or_exist + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert REFUGE dataset to mmsegmentation format') + parser.add_argument('--raw_data_root', help='the root path of raw data') + + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def extract_img(root: str, + cur_dir: str, + out_dir: str, + mode: str = 'train', + file_type: str = 'img') -> None: + """_summary_ + + Args: + Args: + root (str): root where the extracted data is saved + cur_dir (cur_dir): dir where the zip_file exists + out_dir (str): root dir where the data is saved + + mode (str, optional): Defaults to 'train'. + file_type (str, optional): Defaults to 'img',else to 'mask'. + """ + zip_file = zipfile.ZipFile(cur_dir) + zip_file.extractall(root) + for cur_dir, dirs, files in os.walk(root): + # filter child dirs and directories with "Illustration" and "MACOSX" + if len(dirs) == 0 and \ + cur_dir.split('\\')[-1].find('Illustration') == -1 and \ + cur_dir.find('MACOSX') == -1: + + file_names = [ + file for file in files + if file.endswith('.jpg') or file.endswith('.bmp') + ] + for filename in sorted(file_names): + img = mmcv.imread(osp.join(cur_dir, filename)) + + if file_type == 'annotations': + img = img[:, :, 0] + img[np.where(img == 0)] = 1 + img[np.where(img == 128)] = 2 + img[np.where(img == 255)] = 0 + mmcv.imwrite( + img, + osp.join(out_dir, file_type, mode, + osp.splitext(filename)[0] + '.png')) + + +def main(): + args = parse_args() + + raw_data_root = args.raw_data_root + if args.out_dir is None: + out_dir = osp.join('./data', 'REFUGE') + + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(out_dir) + mkdir_or_exist(osp.join(out_dir, 'images')) + mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mkdir_or_exist(osp.join(out_dir, 'images', 'test')) + mkdir_or_exist(osp.join(out_dir, 'annotations')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'test')) + + print('Generating images and annotations...') + # process data from the child dir on the first rank + cur_dir, dirs, files = list(os.walk(raw_data_root))[0] + print('====================') + + files = list(filter(lambda x: x.endswith('.zip'), files)) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + for file in files: + # search data folders for training,validation,test + mode = list( + filter(lambda x: file.lower().find(x) != -1, + ['training', 'test', 'validation']))[0] + file_root = osp.join(tmp_dir, file[:-4]) + file_type = 'images' if file.find('Anno') == -1 and file.find( + 'GT') == -1 else 'annotations' + extract_img(file_root, osp.join(cur_dir, file), out_dir, mode, + file_type) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/stare.py b/tools/dataset_converters/stare.py new file mode 100644 index 0000000000000000000000000000000000000000..4a23ba4dd8a4744bca9d1a506c79131c0e42c73d --- /dev/null +++ b/tools/dataset_converters/stare.py @@ -0,0 +1,167 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import gzip +import os +import os.path as osp +import tarfile +import tempfile + +import mmcv +from mmengine.utils import mkdir_or_exist + +STARE_LEN = 20 +TRAINING_LEN = 10 + + +def un_gz(src, dst): + g_file = gzip.GzipFile(src) + with open(dst, 'wb+') as f: + f.write(g_file.read()) + g_file.close() + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert STARE dataset to mmsegmentation format') + parser.add_argument('image_path', help='the path of stare-images.tar') + parser.add_argument('labels_ah', help='the path of labels-ah.tar') + parser.add_argument('labels_vk', help='the path of labels-vk.tar') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + image_path = args.image_path + labels_ah = args.labels_ah + labels_vk = args.labels_vk + if args.out_dir is None: + out_dir = osp.join('data', 'STARE') + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(out_dir) + mkdir_or_exist(osp.join(out_dir, 'images')) + mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mkdir_or_exist(osp.join(out_dir, 'annotations')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting stare-images.tar...') + with tarfile.open(image_path) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + f'len(os.listdir(now_dir)) != {STARE_LEN}' + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(filename)[0] + '.png')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Removing the temporary files...') + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting labels-ah.tar...') + with tarfile.open(labels_ah) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + f'len(os.listdir(now_dir)) != {STARE_LEN}' + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a threshold + # to convert the nonstandard annotation imgs. The value divided by + # 128 equivalent to '1 if value >= 128 else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.png')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Removing the temporary files...') + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting labels-vk.tar...') + with tarfile.open(labels_vk) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + f'len(os.listdir(now_dir)) != {STARE_LEN}' + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.png')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/synapse.py b/tools/dataset_converters/synapse.py new file mode 100644 index 0000000000000000000000000000000000000000..42dac6b7eff94107b8b3a59984622cb1fd2e7599 --- /dev/null +++ b/tools/dataset_converters/synapse.py @@ -0,0 +1,155 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +import nibabel as nib +import numpy as np +from mmengine.utils import mkdir_or_exist +from PIL import Image + + +def read_files_from_txt(txt_path): + with open(txt_path) as f: + files = f.readlines() + files = [file.strip() for file in files] + return files + + +def read_nii_file(nii_path): + img = nib.load(nii_path).get_fdata() + return img + + +def split_3d_image(img): + c, _, _ = img.shape + res = [] + for i in range(c): + res.append(img[i, :, :]) + return res + + +def label_mapping(label): + """Label mapping from TransUNet paper setting. It only has 9 classes, which + are 'background', 'aorta', 'gallbladder', 'left_kidney', 'right_kidney', + 'liver', 'pancreas', 'spleen', 'stomach', respectively. Other foreground + classes in original dataset are all set to background. + + More details could be found here: https://arxiv.org/abs/2102.04306 + """ + maped_label = np.zeros_like(label) + maped_label[label == 8] = 1 + maped_label[label == 4] = 2 + maped_label[label == 3] = 3 + maped_label[label == 2] = 4 + maped_label[label == 6] = 5 + maped_label[label == 11] = 6 + maped_label[label == 1] = 7 + maped_label[label == 7] = 8 + return maped_label + + +def pares_args(): + parser = argparse.ArgumentParser( + description='Convert synapse dataset to mmsegmentation format') + parser.add_argument( + '--dataset-path', type=str, help='synapse dataset path.') + parser.add_argument( + '--save-path', + default='data/synapse', + type=str, + help='save path of the dataset.') + args = parser.parse_args() + return args + + +def main(): + args = pares_args() + dataset_path = args.dataset_path + save_path = args.save_path + + if not osp.exists(dataset_path): + raise ValueError('The dataset path does not exist. ' + 'Please enter a correct dataset path.') + if not osp.exists(osp.join(dataset_path, 'img')) \ + or not osp.exists(osp.join(dataset_path, 'label')): + raise FileNotFoundError('The dataset structure is incorrect. ' + 'Please check your dataset.') + + train_id = read_files_from_txt(osp.join(dataset_path, 'train.txt')) + train_id = [idx[3:7] for idx in train_id] + + test_id = read_files_from_txt(osp.join(dataset_path, 'val.txt')) + test_id = [idx[3:7] for idx in test_id] + + mkdir_or_exist(osp.join(save_path, 'img_dir/train')) + mkdir_or_exist(osp.join(save_path, 'img_dir/val')) + mkdir_or_exist(osp.join(save_path, 'ann_dir/train')) + mkdir_or_exist(osp.join(save_path, 'ann_dir/val')) + + # It follows data preparation pipeline from here: + # https://github.com/Beckschen/TransUNet/tree/main/datasets + for i, idx in enumerate(train_id): + img_3d = read_nii_file( + osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz')) + label_3d = read_nii_file( + osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz')) + + img_3d = np.clip(img_3d, -125, 275) + img_3d = (img_3d + 125) / 400 + img_3d *= 255 + img_3d = np.transpose(img_3d, [2, 0, 1]) + img_3d = np.flip(img_3d, 2) + + label_3d = np.transpose(label_3d, [2, 0, 1]) + label_3d = np.flip(label_3d, 2) + label_3d = label_mapping(label_3d) + + for c in range(img_3d.shape[0]): + img = img_3d[c] + label = label_3d[c] + + img = Image.fromarray(img).convert('RGB') + label = Image.fromarray(label).convert('L') + img.save( + osp.join( + save_path, 'img_dir/train', 'case' + idx.zfill(4) + + '_slice' + str(c).zfill(3) + '.jpg')) + label.save( + osp.join( + save_path, 'ann_dir/train', 'case' + idx.zfill(4) + + '_slice' + str(c).zfill(3) + '.png')) + + for i, idx in enumerate(test_id): + img_3d = read_nii_file( + osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz')) + label_3d = read_nii_file( + osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz')) + + img_3d = np.clip(img_3d, -125, 275) + img_3d = (img_3d + 125) / 400 + img_3d *= 255 + img_3d = np.transpose(img_3d, [2, 0, 1]) + img_3d = np.flip(img_3d, 2) + + label_3d = np.transpose(label_3d, [2, 0, 1]) + label_3d = np.flip(label_3d, 2) + label_3d = label_mapping(label_3d) + + for c in range(img_3d.shape[0]): + img = img_3d[c] + label = label_3d[c] + + img = Image.fromarray(img).convert('RGB') + label = Image.fromarray(label).convert('L') + img.save( + osp.join( + save_path, 'img_dir/val', 'case' + idx.zfill(4) + + '_slice' + str(c).zfill(3) + '.jpg')) + label.save( + osp.join( + save_path, 'ann_dir/val', 'case' + idx.zfill(4) + + '_slice' + str(c).zfill(3) + '.png')) + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/vaihingen.py b/tools/dataset_converters/vaihingen.py new file mode 100644 index 0000000000000000000000000000000000000000..db980144eb491846a844b0a374bb7a01d5509265 --- /dev/null +++ b/tools/dataset_converters/vaihingen.py @@ -0,0 +1,156 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import math +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv +import numpy as np +from mmengine.utils import ProgressBar, mkdir_or_exist + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert vaihingen dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='vaihingen folder path') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--clip_size', + type=int, + help='clipped size of image after preparation', + default=512) + parser.add_argument( + '--stride_size', + type=int, + help='stride of clipping original images', + default=256) + args = parser.parse_args() + return args + + +def clip_big_image(image_path, clip_save_dir, to_label=False): + # Original image of Vaihingen dataset is very large, thus pre-processing + # of them is adopted. Given fixed clip size and stride size to generate + # clipped image, the intersection of width and height is determined. + # For example, given one 5120 x 5120 original image, the clip size is + # 512 and stride size is 256, thus it would generate 20x20 = 400 images + # whose size are all 512x512. + image = mmcv.imread(image_path) + + h, w, c = image.shape + cs = args.clip_size + ss = args.stride_size + + num_rows = math.ceil((h - cs) / ss) if math.ceil( + (h - cs) / ss) * ss + cs >= h else math.ceil((h - cs) / ss) + 1 + num_cols = math.ceil((w - cs) / ss) if math.ceil( + (w - cs) / ss) * ss + cs >= w else math.ceil((w - cs) / ss) + 1 + + x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1)) + xmin = x * cs + ymin = y * cs + + xmin = xmin.ravel() + ymin = ymin.ravel() + xmin_offset = np.where(xmin + cs > w, w - xmin - cs, np.zeros_like(xmin)) + ymin_offset = np.where(ymin + cs > h, h - ymin - cs, np.zeros_like(ymin)) + boxes = np.stack([ + xmin + xmin_offset, ymin + ymin_offset, + np.minimum(xmin + cs, w), + np.minimum(ymin + cs, h) + ], + axis=1) + + if to_label: + color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0], + [255, 255, 0], [0, 255, 0], [0, 255, 255], + [0, 0, 255]]) + flatten_v = np.matmul( + image.reshape(-1, c), + np.array([2, 3, 4]).reshape(3, 1)) + out = np.zeros_like(flatten_v) + for idx, class_color in enumerate(color_map): + value_idx = np.matmul(class_color, + np.array([2, 3, 4]).reshape(3, 1)) + out[flatten_v == value_idx] = idx + image = out.reshape(h, w) + + for box in boxes: + start_x, start_y, end_x, end_y = box + clipped_image = image[start_y:end_y, + start_x:end_x] if to_label else image[ + start_y:end_y, start_x:end_x, :] + area_idx = osp.basename(image_path).split('_')[3].strip('.tif') + mmcv.imwrite( + clipped_image.astype(np.uint8), + osp.join(clip_save_dir, + f'{area_idx}_{start_x}_{start_y}_{end_x}_{end_y}.png')) + + +def main(): + splits = { + 'train': [ + 'area1', 'area11', 'area13', 'area15', 'area17', 'area21', + 'area23', 'area26', 'area28', 'area3', 'area30', 'area32', + 'area34', 'area37', 'area5', 'area7' + ], + 'val': [ + 'area6', 'area24', 'area35', 'area16', 'area14', 'area22', + 'area10', 'area4', 'area2', 'area20', 'area8', 'area31', 'area33', + 'area27', 'area38', 'area12', 'area29' + ], + } + + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'vaihingen') + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train')) + mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val')) + mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train')) + mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val')) + + zipp_list = glob.glob(os.path.join(dataset_path, '*.zip')) + print('Find the data', zipp_list) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + for zipp in zipp_list: + zip_file = zipfile.ZipFile(zipp) + zip_file.extractall(tmp_dir) + src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif')) + if 'ISPRS_semantic_labeling_Vaihingen' in zipp: + src_path_list = glob.glob( + os.path.join(os.path.join(tmp_dir, 'top'), '*.tif')) + if 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE' in zipp: # noqa + src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif')) + # delete unused area9 ground truth + for area_ann in src_path_list: + if 'area9' in area_ann: + src_path_list.remove(area_ann) + prog_bar = ProgressBar(len(src_path_list)) + for i, src_path in enumerate(src_path_list): + area_idx = osp.basename(src_path).split('_')[3].strip('.tif') + data_type = 'train' if area_idx in splits['train'] else 'val' + if 'noBoundary' in src_path: + dst_dir = osp.join(out_dir, 'ann_dir', data_type) + clip_big_image(src_path, dst_dir, to_label=True) + else: + dst_dir = osp.join(out_dir, 'img_dir', data_type) + clip_big_image(src_path, dst_dir, to_label=False) + prog_bar.update() + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + args = parse_args() + main() diff --git a/tools/dataset_converters/voc_aug.py b/tools/dataset_converters/voc_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..a536f4290d06e4a6c3c9fa8dbadfda847fec583b --- /dev/null +++ b/tools/dataset_converters/voc_aug.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from functools import partial + +import numpy as np +from mmengine.utils import mkdir_or_exist, scandir, track_parallel_progress +from PIL import Image +from scipy.io import loadmat + +AUG_LEN = 10582 + + +def convert_mat(mat_file, in_dir, out_dir): + data = loadmat(osp.join(in_dir, mat_file)) + mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8) + seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png')) + Image.fromarray(mask).save(seg_filename, 'PNG') + + +def generate_aug_list(merged_list, excluded_list): + return list(set(merged_list) - set(excluded_list)) + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert PASCAL VOC annotations to mmsegmentation format') + parser.add_argument('devkit_path', help='pascal voc devkit path') + parser.add_argument('aug_path', help='pascal voc aug path') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--nproc', default=1, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + devkit_path = args.devkit_path + aug_path = args.aug_path + nproc = args.nproc + if args.out_dir is None: + out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug') + else: + out_dir = args.out_dir + mkdir_or_exist(out_dir) + in_dir = osp.join(aug_path, 'dataset', 'cls') + + track_parallel_progress( + partial(convert_mat, in_dir=in_dir, out_dir=out_dir), + list(scandir(in_dir, suffix='.mat')), + nproc=nproc) + + full_aug_list = [] + with open(osp.join(aug_path, 'dataset', 'train.txt')) as f: + full_aug_list += [line.strip() for line in f] + with open(osp.join(aug_path, 'dataset', 'val.txt')) as f: + full_aug_list += [line.strip() for line in f] + + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', + 'train.txt')) as f: + ori_train_list = [line.strip() for line in f] + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', + 'val.txt')) as f: + val_list = [line.strip() for line in f] + + aug_train_list = generate_aug_list(ori_train_list + full_aug_list, + val_list) + assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format( + AUG_LEN) + + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', + 'trainaug.txt'), 'w') as f: + f.writelines(line + '\n' for line in aug_train_list) + + aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list) + assert len(aug_list) == AUG_LEN - len( + ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN - + len(ori_train_list)) + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'), + 'w') as f: + f.writelines(line + '\n' for line in aug_list) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/deployment/pytorch2torchscript.py b/tools/deployment/pytorch2torchscript.py new file mode 100644 index 0000000000000000000000000000000000000000..e69e705bb13ff3cca233534c34fcdaaeda02825b --- /dev/null +++ b/tools/deployment/pytorch2torchscript.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse + +import numpy as np +import torch +import torch._C +import torch.serialization +from mmengine import Config +from mmengine.runner import load_checkpoint +from torch import nn + +from mmseg.models import build_segmentor + +torch.manual_seed(3) + + +def digit_version(version_str): + digit_version = [] + for x in version_str.split('.'): + if x.isdigit(): + digit_version.append(int(x)) + elif x.find('rc') != -1: + patch_version = x.split('rc') + digit_version.append(int(patch_version[0]) - 1) + digit_version.append(int(patch_version[1])) + return digit_version + + +def check_torch_version(): + torch_minimum_version = '1.8.0' + torch_version = digit_version(torch.__version__) + + assert (torch_version >= digit_version(torch_minimum_version)), \ + f'Torch=={torch.__version__} is not support for converting to ' \ + f'torchscript. Please install pytorch>={torch_minimum_version}.' + + +def _convert_batchnorm(module): + module_output = module + if isinstance(module, torch.nn.SyncBatchNorm): + module_output = torch.nn.BatchNorm2d(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + module_output.weight.data = module.weight.data.clone().detach() + module_output.bias.data = module.bias.data.clone().detach() + # keep requires_grad unchanged + module_output.weight.requires_grad = module.weight.requires_grad + module_output.bias.requires_grad = module.bias.requires_grad + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + for name, child in module.named_children(): + module_output.add_module(name, _convert_batchnorm(child)) + del module + return module_output + + +def _demo_mm_inputs(input_shape, num_classes): + """Create a superset of inputs needed to run test or train batches. + + Args: + input_shape (tuple): + input batch dimensions + num_classes (int): + number of semantic classes + """ + (N, C, H, W) = input_shape + rng = np.random.RandomState(0) + imgs = rng.rand(*input_shape) + segs = rng.randint( + low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8) + img_metas = [{ + 'img_shape': (H, W, C), + 'ori_shape': (H, W, C), + 'pad_shape': (H, W, C), + 'filename': '.png', + 'scale_factor': 1.0, + 'flip': False, + } for _ in range(N)] + mm_inputs = { + 'imgs': torch.FloatTensor(imgs).requires_grad_(True), + 'img_metas': img_metas, + 'gt_semantic_seg': torch.LongTensor(segs) + } + return mm_inputs + + +def pytorch2libtorch(model, + input_shape, + show=False, + output_file='tmp.pt', + verify=False): + """Export Pytorch model to TorchScript model and verify the outputs are + same between Pytorch and TorchScript. + + Args: + model (nn.Module): Pytorch model we want to export. + input_shape (tuple): Use this input shape to construct + the corresponding dummy input and execute the model. + show (bool): Whether print the computation graph. Default: False. + output_file (string): The path to where we store the + output TorchScript model. Default: `tmp.pt`. + verify (bool): Whether compare the outputs between + Pytorch and TorchScript. Default: False. + """ + if isinstance(model.decode_head, nn.ModuleList): + num_classes = model.decode_head[-1].num_classes + else: + num_classes = model.decode_head.num_classes + + mm_inputs = _demo_mm_inputs(input_shape, num_classes) + + imgs = mm_inputs.pop('imgs') + + # replace the original forword with forward_dummy + model.forward = model.forward_dummy + model.eval() + traced_model = torch.jit.trace( + model, + example_inputs=imgs, + check_trace=verify, + ) + + if show: + print(traced_model.graph) + + traced_model.save(output_file) + print(f'Successfully exported TorchScript model: {output_file}') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert MMSeg to TorchScript') + parser.add_argument('config', help='test config file path') + parser.add_argument('--checkpoint', help='checkpoint file', default=None) + parser.add_argument( + '--show', action='store_true', help='show TorchScript graph') + parser.add_argument( + '--verify', action='store_true', help='verify the TorchScript model') + parser.add_argument('--output-file', type=str, default='tmp.pt') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[512, 512], + help='input image size (height, width)') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + check_torch_version() + + if len(args.shape) == 1: + input_shape = (1, 3, args.shape[0], args.shape[0]) + elif len(args.shape) == 2: + input_shape = ( + 1, + 3, + ) + tuple(args.shape) + else: + raise ValueError('invalid input shape') + + cfg = Config.fromfile(args.config) + cfg.model.pretrained = None + + # build the model and load checkpoint + cfg.model.train_cfg = None + segmentor = build_segmentor( + cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) + # convert SyncBN to BN + segmentor = _convert_batchnorm(segmentor) + + if args.checkpoint: + load_checkpoint(segmentor, args.checkpoint, map_location='cpu') + + # convert the PyTorch model to LibTorch model + pytorch2libtorch( + segmentor, + input_shape, + show=args.show, + output_file=args.output_file, + verify=args.verify) diff --git a/tools/dist_test.sh b/tools/dist_test.sh new file mode 100755 index 0000000000000000000000000000000000000000..89711fd5c02cfc1f0386e5354506d4b74ecac251 --- /dev/null +++ b/tools/dist_test.sh @@ -0,0 +1,20 @@ +CONFIG=$1 +CHECKPOINT=$2 +GPUS=$3 +NNODES=${NNODES:-1} +NODE_RANK=${NODE_RANK:-0} +PORT=${PORT:-29500} +MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ + --nproc_per_node=$GPUS \ + --master_port=$PORT \ + $(dirname "$0")/test.py \ + $CONFIG \ + $CHECKPOINT \ + --launcher pytorch \ + ${@:4} diff --git a/tools/dist_train.sh b/tools/dist_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..a857df78788edb8841b6f67d74dd0e6cfb77d8ab --- /dev/null +++ b/tools/dist_train.sh @@ -0,0 +1,17 @@ +CONFIG=$1 +GPUS=$2 +NNODES=${NNODES:-1} +NODE_RANK=${NODE_RANK:-0} +PORT=${PORT:-29500} +MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ + --nproc_per_node=$GPUS \ + --master_port=$PORT \ + $(dirname "$0")/train.py \ + $CONFIG \ + --launcher pytorch ${@:3} diff --git a/tools/misc/browse_dataset.py b/tools/misc/browse_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..7863eb74f2cab53d025afad347f7886a5ce29919 --- /dev/null +++ b/tools/misc/browse_dataset.py @@ -0,0 +1,73 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +from mmengine import Config, DictAction +from mmengine.registry import init_default_scope +from mmengine.utils import ProgressBar + +from mmseg.registry import DATASETS, VISUALIZERS + + +def parse_args(): + parser = argparse.ArgumentParser(description='Browse a dataset') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--output-dir', + default=None, + type=str, + help='If there is no display interface, you can save it') + parser.add_argument('--not-show', default=False, action='store_true') + parser.add_argument( + '--show-interval', + type=float, + default=2, + help='the interval of show (s)') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # register all modules in mmseg into the registries + init_default_scope('mmseg') + + dataset = DATASETS.build(cfg.train_dataloader.dataset) + cfg.visualizer['save_dir'] = args.output_dir + visualizer = VISUALIZERS.build(cfg.visualizer) + visualizer.dataset_meta = dataset.METAINFO + + progress_bar = ProgressBar(len(dataset)) + for item in dataset: + img = item['inputs'].permute(1, 2, 0).numpy() + data_sample = item['data_samples'].numpy() + img_path = osp.basename(item['data_samples'].img_path) + + img = img[..., [2, 1, 0]] # bgr to rgb + + visualizer.add_datasample( + osp.basename(img_path), + img, + data_sample, + show=not args.not_show, + wait_time=args.show_interval) + + progress_bar.update() + + +if __name__ == '__main__': + main() diff --git a/tools/misc/print_config.py b/tools/misc/print_config.py new file mode 100644 index 0000000000000000000000000000000000000000..2a1c024a6a44157a0b0d4d6213d18d67f57a33c5 --- /dev/null +++ b/tools/misc/print_config.py @@ -0,0 +1,69 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import warnings + +from mmengine import Config, DictAction + +from mmseg.apis import init_model + + +def parse_args(): + parser = argparse.ArgumentParser(description='Print the whole config') + parser.add_argument('config', help='config file path') + parser.add_argument( + '--graph', action='store_true', help='print the models graph') + parser.add_argument( + '--options', + nargs='+', + action=DictAction, + help="--options is deprecated in favor of --cfg_options' and it will " + 'not be supported in version v0.22.0. Override some settings in the ' + 'used config, the key-value pair in xxx=yyy format will be merged ' + 'into config file. If the value to be overwritten is a list, it ' + 'should be like key="[a,b]" or key=a,b It also allows nested ' + 'list/tuple values, e.g. key="[(a,b),(c,d)]" Note that the quotation ' + 'marks are necessary and that no white space is allowed.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + + if args.options and args.cfg_options: + raise ValueError( + '--options and --cfg-options cannot be both ' + 'specified, --options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + if args.options: + warnings.warn('--options is deprecated in favor of --cfg-options, ' + '--options will not be supported in version v0.22.0.') + args.cfg_options = args.options + + return args + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + print(f'Config:\n{cfg.pretty_text}') + # dump config + cfg.dump('example.py') + # dump models graph + if args.graph: + model = init_model(args.config, device='cpu') + print(f'Model graph:\n{str(model)}') + with open('example-graph.txt', 'w') as f: + f.writelines(str(model)) + + +if __name__ == '__main__': + main() diff --git a/tools/misc/publish_model.py b/tools/misc/publish_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e035ad90e85e0e03d8304c1d5b524c5ac322c644 --- /dev/null +++ b/tools/misc/publish_model.py @@ -0,0 +1,50 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import subprocess +from hashlib import sha256 + +import torch + +BLOCK_SIZE = 128 * 1024 + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Process a checkpoint to be published') + parser.add_argument('in_file', help='input checkpoint filename') + parser.add_argument('out_file', help='output checkpoint filename') + args = parser.parse_args() + return args + + +def sha256sum(filename: str) -> str: + """Compute SHA256 message digest from a file.""" + hash_func = sha256() + byte_array = bytearray(BLOCK_SIZE) + memory_view = memoryview(byte_array) + with open(filename, 'rb', buffering=0) as file: + for block in iter(lambda: file.readinto(memory_view), 0): + hash_func.update(memory_view[:block]) + return hash_func.hexdigest() + + +def process_checkpoint(in_file, out_file): + checkpoint = torch.load(in_file, map_location='cpu') + # remove optimizer for smaller file size + if 'optimizer' in checkpoint: + del checkpoint['optimizer'] + # if it is necessary to remove some sensitive data in checkpoint['meta'], + # add the code here. + torch.save(checkpoint, out_file) + sha = sha256sum(in_file) + final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth' + subprocess.Popen(['mv', out_file, final_file]) + + +def main(): + args = parse_args() + process_checkpoint(args.in_file, args.out_file) + + +if __name__ == '__main__': + main() diff --git a/tools/model_converters/beit2mmseg.py b/tools/model_converters/beit2mmseg.py new file mode 100644 index 0000000000000000000000000000000000000000..20f8f0f4509f93291782ca152bf04ab019b0e0ff --- /dev/null +++ b/tools/model_converters/beit2mmseg.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmengine +import torch +from mmengine.runner import CheckpointLoader + + +def convert_beit(ckpt): + new_ckpt = OrderedDict() + + for k, v in ckpt.items(): + if k.startswith('patch_embed'): + new_key = k.replace('patch_embed.proj', 'patch_embed.projection') + new_ckpt[new_key] = v + if k.startswith('blocks'): + new_key = k.replace('blocks', 'layers') + if 'norm' in new_key: + new_key = new_key.replace('norm', 'ln') + elif 'mlp.fc1' in new_key: + new_key = new_key.replace('mlp.fc1', 'ffn.layers.0.0') + elif 'mlp.fc2' in new_key: + new_key = new_key.replace('mlp.fc2', 'ffn.layers.1') + new_ckpt[new_key] = v + else: + new_key = k + new_ckpt[new_key] = v + + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in official pretrained beit models to' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_beit(state_dict) + mmengine.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/tools/model_converters/clip2mmseg.py b/tools/model_converters/clip2mmseg.py new file mode 100644 index 0000000000000000000000000000000000000000..9a97e4b04ab45740ee37149d30a85b67245868f5 --- /dev/null +++ b/tools/model_converters/clip2mmseg.py @@ -0,0 +1,163 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmengine +import torch +from mmengine.runner import CheckpointLoader + + +def convert_vitlayer(paras): + new_para_name = '' + if paras[0] == 'ln_1': + new_para_name = '.'.join(['ln1'] + paras[1:]) + elif paras[0] == 'attn': + new_para_name = '.'.join(['attn.attn'] + paras[1:]) + elif paras[0] == 'ln_2': + new_para_name = '.'.join(['ln2'] + paras[1:]) + elif paras[0] == 'mlp': + if paras[1] == 'c_fc': + new_para_name = '.'.join(['ffn.layers.0.0'] + paras[-1:]) + else: + new_para_name = '.'.join(['ffn.layers.1'] + paras[-1:]) + else: + print(f'Wrong for {paras}') + return new_para_name + + +def convert_translayer(paras): + new_para_name = '' + if paras[0] == 'attn': + new_para_name = '.'.join(['attentions.0.attn'] + paras[1:]) + elif paras[0] == 'ln_1': + new_para_name = '.'.join(['norms.0'] + paras[1:]) + elif paras[0] == 'ln_2': + new_para_name = '.'.join(['norms.1'] + paras[1:]) + elif paras[0] == 'mlp': + if paras[1] == 'c_fc': + new_para_name = '.'.join(['ffns.0.layers.0.0'] + paras[2:]) + elif paras[1] == 'c_proj': + new_para_name = '.'.join(['ffns.0.layers.1'] + paras[2:]) + else: + print(f'Wrong for {paras}') + else: + print(f'Wrong for {paras}') + return new_para_name + + +def convert_key_name(ckpt, visual_split): + new_ckpt = OrderedDict() + for k, v in ckpt.items(): + key_list = k.split('.') + if key_list[0] == 'visual': + new_transform_name = 'image_encoder' + if key_list[1] == 'class_embedding': + new_name = '.'.join([new_transform_name, 'cls_token']) + elif key_list[1] == 'positional_embedding': + new_name = '.'.join([new_transform_name, 'pos_embed']) + elif key_list[1] == 'conv1': + new_name = '.'.join([ + new_transform_name, 'patch_embed.projection', key_list[2] + ]) + elif key_list[1] == 'ln_pre': + new_name = '.'.join( + [new_transform_name, key_list[1], key_list[2]]) + elif key_list[1] == 'transformer': + new_layer_name = 'layers' + layer_index = key_list[3] + paras = key_list[4:] + if int(layer_index) < visual_split: + new_para_name = convert_vitlayer(paras) + new_name = '.'.join([ + new_transform_name, new_layer_name, layer_index, + new_para_name + ]) + else: + new_para_name = convert_translayer(paras) + new_transform_name = 'decode_head.rec_with_attnbias' + new_layer_name = 'layers' + layer_index = str(int(layer_index) - visual_split) + new_name = '.'.join([ + new_transform_name, new_layer_name, layer_index, + new_para_name + ]) + elif key_list[1] == 'proj': + new_name = 'decode_head.rec_with_attnbias.proj.weight' + elif key_list[1] == 'ln_post': + new_name = k.replace('visual', 'decode_head.rec_with_attnbias') + else: + print(f'pop parameter: {k}') + continue + else: + text_encoder_name = 'text_encoder' + if key_list[0] == 'transformer': + layer_name = 'transformer' + layer_index = key_list[2] + paras = key_list[3:] + new_para_name = convert_translayer(paras) + new_name = '.'.join([ + text_encoder_name, layer_name, layer_index, new_para_name + ]) + elif key_list[0] in [ + 'positional_embedding', 'text_projection', 'bg_embed', + 'attn_mask', 'logit_scale', 'token_embedding', 'ln_final' + ]: + new_name = 'text_encoder.' + k + else: + print(f'pop parameter: {k}') + continue + new_ckpt[new_name] = v + + return new_ckpt + + +def convert_tensor(ckpt): + cls_token = ckpt['image_encoder.cls_token'] + new_cls_token = cls_token.unsqueeze(0).unsqueeze(0) + ckpt['image_encoder.cls_token'] = new_cls_token + pos_embed = ckpt['image_encoder.pos_embed'] + new_pos_embed = pos_embed.unsqueeze(0) + ckpt['image_encoder.pos_embed'] = new_pos_embed + proj_weight = ckpt['decode_head.rec_with_attnbias.proj.weight'] + new_proj_weight = proj_weight.transpose(1, 0) + ckpt['decode_head.rec_with_attnbias.proj.weight'] = new_proj_weight + return ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in timm pretrained vit models to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + if any([s in args.src for s in ['B-16', 'b16', 'base_patch16']]): + visual_split = 9 + elif any([s in args.src for s in ['L-14', 'l14', 'large_patch14']]): + visual_split = 18 + else: + print('Make sure the clip model is ViT-B/16 or ViT-L/14!') + visual_split = -1 + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if isinstance(checkpoint, torch.jit.RecursiveScriptModule): + state_dict = checkpoint.state_dict() + else: + if 'state_dict' in checkpoint: + # timm checkpoint + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + # deit checkpoint + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_key_name(state_dict, visual_split) + weight = convert_tensor(weight) + mmengine.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/tools/model_converters/mit2mmseg.py b/tools/model_converters/mit2mmseg.py new file mode 100644 index 0000000000000000000000000000000000000000..f10cbbf9d40d3656be0d447460c12fc83771c14c --- /dev/null +++ b/tools/model_converters/mit2mmseg.py @@ -0,0 +1,82 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmengine +import torch +from mmengine.runner import CheckpointLoader + + +def convert_mit(ckpt): + new_ckpt = OrderedDict() + # Process the concat between q linear weights and kv linear weights + for k, v in ckpt.items(): + if k.startswith('head'): + continue + # patch embedding conversion + elif k.startswith('patch_embed'): + stage_i = int(k.split('.')[0].replace('patch_embed', '')) + new_k = k.replace(f'patch_embed{stage_i}', f'layers.{stage_i-1}.0') + new_v = v + if 'proj.' in new_k: + new_k = new_k.replace('proj.', 'projection.') + # transformer encoder layer conversion + elif k.startswith('block'): + stage_i = int(k.split('.')[0].replace('block', '')) + new_k = k.replace(f'block{stage_i}', f'layers.{stage_i-1}.1') + new_v = v + if 'attn.q.' in new_k: + sub_item_k = k.replace('q.', 'kv.') + new_k = new_k.replace('q.', 'attn.in_proj_') + new_v = torch.cat([v, ckpt[sub_item_k]], dim=0) + elif 'attn.kv.' in new_k: + continue + elif 'attn.proj.' in new_k: + new_k = new_k.replace('proj.', 'attn.out_proj.') + elif 'attn.sr.' in new_k: + new_k = new_k.replace('sr.', 'sr.') + elif 'mlp.' in new_k: + string = f'{new_k}-' + new_k = new_k.replace('mlp.', 'ffn.layers.') + if 'fc1.weight' in new_k or 'fc2.weight' in new_k: + new_v = v.reshape((*v.shape, 1, 1)) + new_k = new_k.replace('fc1.', '0.') + new_k = new_k.replace('dwconv.dwconv.', '1.') + new_k = new_k.replace('fc2.', '4.') + string += f'{new_k} {v.shape}-{new_v.shape}' + # norm layer conversion + elif k.startswith('norm'): + stage_i = int(k.split('.')[0].replace('norm', '')) + new_k = k.replace(f'norm{stage_i}', f'layers.{stage_i-1}.2') + new_v = v + else: + new_k = k + new_v = v + new_ckpt[new_k] = new_v + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in official pretrained segformer to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_mit(state_dict) + mmengine.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/tools/model_converters/san2mmseg.py b/tools/model_converters/san2mmseg.py new file mode 100644 index 0000000000000000000000000000000000000000..301a46608e0f14df17138922ae3a747aee105372 --- /dev/null +++ b/tools/model_converters/san2mmseg.py @@ -0,0 +1,220 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmengine +import torch +from mmengine.runner import CheckpointLoader + + +def convert_key_name(ckpt): + new_ckpt = OrderedDict() + + for k, v in ckpt.items(): + key_list = k.split('.') + if key_list[0] == 'clip_visual_extractor': + new_transform_name = 'image_encoder' + if key_list[1] == 'class_embedding': + new_name = '.'.join([new_transform_name, 'cls_token']) + elif key_list[1] == 'positional_embedding': + new_name = '.'.join([new_transform_name, 'pos_embed']) + elif key_list[1] == 'conv1': + new_name = '.'.join([ + new_transform_name, 'patch_embed.projection', key_list[2] + ]) + elif key_list[1] == 'ln_pre': + new_name = '.'.join( + [new_transform_name, key_list[1], key_list[2]]) + elif key_list[1] == 'resblocks': + new_layer_name = 'layers' + layer_index = key_list[2] + paras = key_list[3:] + if paras[0] == 'ln_1': + new_para_name = '.'.join(['ln1'] + key_list[4:]) + elif paras[0] == 'attn': + new_para_name = '.'.join(['attn.attn'] + key_list[4:]) + elif paras[0] == 'ln_2': + new_para_name = '.'.join(['ln2'] + key_list[4:]) + elif paras[0] == 'mlp': + if paras[1] == 'c_fc': + new_para_name = '.'.join(['ffn.layers.0.0'] + + key_list[-1:]) + else: + new_para_name = '.'.join(['ffn.layers.1'] + + key_list[-1:]) + new_name = '.'.join([ + new_transform_name, new_layer_name, layer_index, + new_para_name + ]) + elif key_list[0] == 'side_adapter_network': + decode_head_name = 'decode_head' + module_name = 'side_adapter_network' + if key_list[1] == 'vit_model': + if key_list[2] == 'blocks': + layer_name = 'encode_layers' + layer_index = key_list[3] + paras = key_list[4:] + if paras[0] == 'norm1': + new_para_name = '.'.join(['ln1'] + key_list[5:]) + elif paras[0] == 'attn': + new_para_name = '.'.join(key_list[4:]) + new_para_name = new_para_name.replace( + 'attn.qkv.', 'attn.attn.in_proj_') + new_para_name = new_para_name.replace( + 'attn.proj', 'attn.attn.out_proj') + elif paras[0] == 'norm2': + new_para_name = '.'.join(['ln2'] + key_list[5:]) + elif paras[0] == 'mlp': + new_para_name = '.'.join(['ffn'] + key_list[5:]) + new_para_name = new_para_name.replace( + 'fc1', 'layers.0.0') + new_para_name = new_para_name.replace( + 'fc2', 'layers.1') + else: + print(f'Wrong for {k}') + new_name = '.'.join([ + decode_head_name, module_name, layer_name, layer_index, + new_para_name + ]) + elif key_list[2] == 'pos_embed': + new_name = '.'.join( + [decode_head_name, module_name, 'pos_embed']) + elif key_list[2] == 'patch_embed': + new_name = '.'.join([ + decode_head_name, module_name, 'patch_embed', + 'projection', key_list[4] + ]) + else: + print(f'Wrong for {k}') + elif key_list[1] == 'query_embed' or key_list[ + 1] == 'query_pos_embed': + new_name = '.'.join( + [decode_head_name, module_name, key_list[1]]) + elif key_list[1] == 'fusion_layers': + layer_name = 'conv_clips' + layer_index = key_list[2][-1] + paras = '.'.join(key_list[3:]) + new_para_name = paras.replace('input_proj.0', '0') + new_para_name = new_para_name.replace('input_proj.1', '1.conv') + new_name = '.'.join([ + decode_head_name, module_name, layer_name, layer_index, + new_para_name + ]) + elif key_list[1] == 'mask_decoder': + new_name = 'decode_head.' + k + else: + print(f'Wrong for {k}') + elif key_list[0] == 'clip_rec_head': + module_name = 'rec_with_attnbias' + if key_list[1] == 'proj': + new_name = '.'.join( + [decode_head_name, module_name, 'proj.weight']) + elif key_list[1] == 'ln_post': + new_name = '.'.join( + [decode_head_name, module_name, 'ln_post', key_list[2]]) + elif key_list[1] == 'resblocks': + new_layer_name = 'layers' + layer_index = key_list[2] + paras = key_list[3:] + if paras[0] == 'ln_1': + new_para_name = '.'.join(['norms.0'] + paras[1:]) + elif paras[0] == 'attn': + new_para_name = '.'.join(['attentions.0.attn'] + paras[1:]) + elif paras[0] == 'ln_2': + new_para_name = '.'.join(['norms.1'] + paras[1:]) + elif paras[0] == 'mlp': + if paras[1] == 'c_fc': + new_para_name = '.'.join(['ffns.0.layers.0.0'] + + paras[2:]) + elif paras[1] == 'c_proj': + new_para_name = '.'.join(['ffns.0.layers.1'] + + paras[2:]) + else: + print(f'Wrong for {k}') + new_name = '.'.join([ + decode_head_name, module_name, new_layer_name, layer_index, + new_para_name + ]) + else: + print(f'Wrong for {k}') + elif key_list[0] == 'ov_classifier': + text_encoder_name = 'text_encoder' + if key_list[1] == 'transformer': + layer_name = 'transformer' + layer_index = key_list[3] + paras = key_list[4:] + if paras[0] == 'attn': + new_para_name = '.'.join(['attentions.0.attn'] + paras[1:]) + elif paras[0] == 'ln_1': + new_para_name = '.'.join(['norms.0'] + paras[1:]) + elif paras[0] == 'ln_2': + new_para_name = '.'.join(['norms.1'] + paras[1:]) + elif paras[0] == 'mlp': + if paras[1] == 'c_fc': + new_para_name = '.'.join(['ffns.0.layers.0.0'] + + paras[2:]) + elif paras[1] == 'c_proj': + new_para_name = '.'.join(['ffns.0.layers.1'] + + paras[2:]) + else: + print(f'Wrong for {k}') + else: + print(f'Wrong for {k}') + new_name = '.'.join([ + text_encoder_name, layer_name, layer_index, new_para_name + ]) + elif key_list[1] in [ + 'positional_embedding', 'text_projection', 'bg_embed', + 'attn_mask', 'logit_scale', 'token_embedding', 'ln_final' + ]: + new_name = k.replace('ov_classifier', 'text_encoder') + else: + print(f'Wrong for {k}') + elif key_list[0] == 'criterion': + new_name = k + else: + print(f'Wrong for {k}') + new_ckpt[new_name] = v + return new_ckpt + + +def convert_tensor(ckpt): + cls_token = ckpt['image_encoder.cls_token'] + new_cls_token = cls_token.unsqueeze(0).unsqueeze(0) + ckpt['image_encoder.cls_token'] = new_cls_token + pos_embed = ckpt['image_encoder.pos_embed'] + new_pos_embed = pos_embed.unsqueeze(0) + ckpt['image_encoder.pos_embed'] = new_pos_embed + proj_weight = ckpt['decode_head.rec_with_attnbias.proj.weight'] + new_proj_weight = proj_weight.transpose(1, 0) + ckpt['decode_head.rec_with_attnbias.proj.weight'] = new_proj_weight + return ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in timm pretrained vit models to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + # timm checkpoint + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + # deit checkpoint + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_key_name(state_dict) + weight = convert_tensor(weight) + mmengine.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/tools/model_converters/stdc2mmseg.py b/tools/model_converters/stdc2mmseg.py new file mode 100644 index 0000000000000000000000000000000000000000..6ea3b8342f546692f50a8e3c0b740f881058229c --- /dev/null +++ b/tools/model_converters/stdc2mmseg.py @@ -0,0 +1,71 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +import mmengine +import torch +from mmengine.runner import CheckpointLoader + + +def convert_stdc(ckpt, stdc_type): + new_state_dict = {} + if stdc_type == 'STDC1': + stage_lst = ['0', '1', '2.0', '2.1', '3.0', '3.1', '4.0', '4.1'] + else: + stage_lst = [ + '0', '1', '2.0', '2.1', '2.2', '2.3', '3.0', '3.1', '3.2', '3.3', + '3.4', '4.0', '4.1', '4.2' + ] + for k, v in ckpt.items(): + ori_k = k + flag = False + if 'cp.' in k: + k = k.replace('cp.', '') + if 'features.' in k: + num_layer = int(k.split('.')[1]) + feature_key_lst = 'features.' + str(num_layer) + '.' + stages_key_lst = 'stages.' + stage_lst[num_layer] + '.' + k = k.replace(feature_key_lst, stages_key_lst) + flag = True + if 'conv_list' in k: + k = k.replace('conv_list', 'layers') + flag = True + if 'avd_layer.' in k: + if 'avd_layer.0' in k: + k = k.replace('avd_layer.0', 'downsample.conv') + elif 'avd_layer.1' in k: + k = k.replace('avd_layer.1', 'downsample.bn') + flag = True + if flag: + new_state_dict[k] = ckpt[ori_k] + + return new_state_dict + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in official pretrained STDC1/2 to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + parser.add_argument('type', help='model type: STDC1 or STDC2') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + + assert args.type in ['STDC1', + 'STDC2'], 'STD type should be STDC1 or STDC2!' + weight = convert_stdc(state_dict, args.type) + mmengine.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/tools/model_converters/swin2mmseg.py b/tools/model_converters/swin2mmseg.py new file mode 100644 index 0000000000000000000000000000000000000000..d434f9465bbdad6bebc7d5962e8bfaf63c7c9e72 --- /dev/null +++ b/tools/model_converters/swin2mmseg.py @@ -0,0 +1,87 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmengine +import torch +from mmengine.runner import CheckpointLoader + + +def convert_swin(ckpt): + new_ckpt = OrderedDict() + + def correct_unfold_reduction_order(x): + out_channel, in_channel = x.shape + x = x.reshape(out_channel, 4, in_channel // 4) + x = x[:, [0, 2, 1, 3], :].transpose(1, + 2).reshape(out_channel, in_channel) + return x + + def correct_unfold_norm_order(x): + in_channel = x.shape[0] + x = x.reshape(4, in_channel // 4) + x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel) + return x + + for k, v in ckpt.items(): + if k.startswith('head'): + continue + elif k.startswith('layers'): + new_v = v + if 'attn.' in k: + new_k = k.replace('attn.', 'attn.w_msa.') + elif 'mlp.' in k: + if 'mlp.fc1.' in k: + new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.') + elif 'mlp.fc2.' in k: + new_k = k.replace('mlp.fc2.', 'ffn.layers.1.') + else: + new_k = k.replace('mlp.', 'ffn.') + elif 'downsample' in k: + new_k = k + if 'reduction.' in k: + new_v = correct_unfold_reduction_order(v) + elif 'norm.' in k: + new_v = correct_unfold_norm_order(v) + else: + new_k = k + new_k = new_k.replace('layers', 'stages', 1) + elif k.startswith('patch_embed'): + new_v = v + if 'proj' in k: + new_k = k.replace('proj', 'projection') + else: + new_k = k + else: + new_v = v + new_k = k + + new_ckpt[new_k] = new_v + + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in official pretrained swin models to' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_swin(state_dict) + mmengine.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/tools/model_converters/twins2mmseg.py b/tools/model_converters/twins2mmseg.py new file mode 100644 index 0000000000000000000000000000000000000000..647d41784aa07468be4b3f2e183064ad55266ad1 --- /dev/null +++ b/tools/model_converters/twins2mmseg.py @@ -0,0 +1,87 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmengine +import torch +from mmengine.runner import CheckpointLoader + + +def convert_twins(args, ckpt): + + new_ckpt = OrderedDict() + + for k, v in list(ckpt.items()): + new_v = v + if k.startswith('head'): + continue + elif k.startswith('patch_embeds'): + if 'proj.' in k: + new_k = k.replace('proj.', 'projection.') + else: + new_k = k + elif k.startswith('blocks'): + # Union + if 'attn.q.' in k: + new_k = k.replace('q.', 'attn.in_proj_') + new_v = torch.cat([v, ckpt[k.replace('attn.q.', 'attn.kv.')]], + dim=0) + elif 'mlp.fc1' in k: + new_k = k.replace('mlp.fc1', 'ffn.layers.0.0') + elif 'mlp.fc2' in k: + new_k = k.replace('mlp.fc2', 'ffn.layers.1') + # Only pcpvt + elif args.model == 'pcpvt': + if 'attn.proj.' in k: + new_k = k.replace('proj.', 'attn.out_proj.') + else: + new_k = k + + # Only svt + else: + if 'attn.proj.' in k: + k_lst = k.split('.') + if int(k_lst[2]) % 2 == 1: + new_k = k.replace('proj.', 'attn.out_proj.') + else: + new_k = k + else: + new_k = k + new_k = new_k.replace('blocks.', 'layers.') + elif k.startswith('pos_block'): + new_k = k.replace('pos_block', 'position_encodings') + if 'proj.0.' in new_k: + new_k = new_k.replace('proj.0.', 'proj.') + else: + new_k = k + if 'attn.kv.' not in k: + new_ckpt[new_k] = new_v + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in timm pretrained vit models to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + parser.add_argument('model', help='model: pcpvt or svt') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + + if 'state_dict' in checkpoint: + # timm checkpoint + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + weight = convert_twins(args, state_dict) + mmengine.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/tools/model_converters/vit2mmseg.py b/tools/model_converters/vit2mmseg.py new file mode 100644 index 0000000000000000000000000000000000000000..1d1f8a427e232290c6dcf490e33f777275dd238a --- /dev/null +++ b/tools/model_converters/vit2mmseg.py @@ -0,0 +1,70 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmengine +import torch +from mmengine.runner import CheckpointLoader + + +def convert_vit(ckpt): + + new_ckpt = OrderedDict() + + for k, v in ckpt.items(): + if k.startswith('head'): + continue + if k.startswith('norm'): + new_k = k.replace('norm.', 'ln1.') + elif k.startswith('patch_embed'): + if 'proj' in k: + new_k = k.replace('proj', 'projection') + else: + new_k = k + elif k.startswith('blocks'): + if 'norm' in k: + new_k = k.replace('norm', 'ln') + elif 'mlp.fc1' in k: + new_k = k.replace('mlp.fc1', 'ffn.layers.0.0') + elif 'mlp.fc2' in k: + new_k = k.replace('mlp.fc2', 'ffn.layers.1') + elif 'attn.qkv' in k: + new_k = k.replace('attn.qkv.', 'attn.attn.in_proj_') + elif 'attn.proj' in k: + new_k = k.replace('attn.proj', 'attn.attn.out_proj') + else: + new_k = k + new_k = new_k.replace('blocks.', 'layers.') + else: + new_k = k + new_ckpt[new_k] = v + + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in timm pretrained vit models to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + # timm checkpoint + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + # deit checkpoint + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_vit(state_dict) + mmengine.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/tools/model_converters/vitjax2mmseg.py b/tools/model_converters/vitjax2mmseg.py new file mode 100644 index 0000000000000000000000000000000000000000..81bc2ea020e32d086fc4ce2153cc2bf51edd4d48 --- /dev/null +++ b/tools/model_converters/vitjax2mmseg.py @@ -0,0 +1,123 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +import mmengine +import numpy as np +import torch + + +def vit_jax_to_torch(jax_weights, num_layer=12): + torch_weights = dict() + + # patch embedding + conv_filters = jax_weights['embedding/kernel'] + conv_filters = conv_filters.permute(3, 2, 0, 1) + torch_weights['patch_embed.projection.weight'] = conv_filters + torch_weights['patch_embed.projection.bias'] = jax_weights[ + 'embedding/bias'] + + # pos embedding + torch_weights['pos_embed'] = jax_weights[ + 'Transformer/posembed_input/pos_embedding'] + + # cls token + torch_weights['cls_token'] = jax_weights['cls'] + + # head + torch_weights['ln1.weight'] = jax_weights['Transformer/encoder_norm/scale'] + torch_weights['ln1.bias'] = jax_weights['Transformer/encoder_norm/bias'] + + # transformer blocks + for i in range(num_layer): + jax_block = f'Transformer/encoderblock_{i}' + torch_block = f'layers.{i}' + + # attention norm + torch_weights[f'{torch_block}.ln1.weight'] = jax_weights[ + f'{jax_block}/LayerNorm_0/scale'] + torch_weights[f'{torch_block}.ln1.bias'] = jax_weights[ + f'{jax_block}/LayerNorm_0/bias'] + + # attention + query_weight = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/query/kernel'] + query_bias = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/query/bias'] + key_weight = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/key/kernel'] + key_bias = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/key/bias'] + value_weight = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/value/kernel'] + value_bias = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/value/bias'] + + qkv_weight = torch.from_numpy( + np.stack((query_weight, key_weight, value_weight), 1)) + qkv_weight = torch.flatten(qkv_weight, start_dim=1) + qkv_bias = torch.from_numpy( + np.stack((query_bias, key_bias, value_bias), 0)) + qkv_bias = torch.flatten(qkv_bias, start_dim=0) + + torch_weights[f'{torch_block}.attn.attn.in_proj_weight'] = qkv_weight + torch_weights[f'{torch_block}.attn.attn.in_proj_bias'] = qkv_bias + to_out_weight = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/out/kernel'] + to_out_weight = torch.flatten(to_out_weight, start_dim=0, end_dim=1) + torch_weights[ + f'{torch_block}.attn.attn.out_proj.weight'] = to_out_weight + torch_weights[f'{torch_block}.attn.attn.out_proj.bias'] = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/out/bias'] + + # mlp norm + torch_weights[f'{torch_block}.ln2.weight'] = jax_weights[ + f'{jax_block}/LayerNorm_2/scale'] + torch_weights[f'{torch_block}.ln2.bias'] = jax_weights[ + f'{jax_block}/LayerNorm_2/bias'] + + # mlp + torch_weights[f'{torch_block}.ffn.layers.0.0.weight'] = jax_weights[ + f'{jax_block}/MlpBlock_3/Dense_0/kernel'] + torch_weights[f'{torch_block}.ffn.layers.0.0.bias'] = jax_weights[ + f'{jax_block}/MlpBlock_3/Dense_0/bias'] + torch_weights[f'{torch_block}.ffn.layers.1.weight'] = jax_weights[ + f'{jax_block}/MlpBlock_3/Dense_1/kernel'] + torch_weights[f'{torch_block}.ffn.layers.1.bias'] = jax_weights[ + f'{jax_block}/MlpBlock_3/Dense_1/bias'] + + # transpose weights + for k, v in torch_weights.items(): + if 'weight' in k and 'patch_embed' not in k and 'ln' not in k: + v = v.permute(1, 0) + torch_weights[k] = v + + return torch_weights + + +def main(): + # stole refactoring code from Robin Strudel, thanks + parser = argparse.ArgumentParser( + description='Convert keys from jax official pretrained vit models to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + jax_weights = np.load(args.src) + jax_weights_tensor = {} + for key in jax_weights.files: + value = torch.from_numpy(jax_weights[key]) + jax_weights_tensor[key] = value + if 'L_16-i21k' in args.src: + num_layer = 24 + else: + num_layer = 12 + torch_weights = vit_jax_to_torch(jax_weights_tensor, num_layer) + mmengine.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(torch_weights, args.dst) + + +if __name__ == '__main__': + main() diff --git a/tools/slurm_test.sh b/tools/slurm_test.sh new file mode 100755 index 0000000000000000000000000000000000000000..4e6f7bf4e33267f269cf0f455924cb70166ccd4b --- /dev/null +++ b/tools/slurm_test.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -x + +PARTITION=$1 +JOB_NAME=$2 +CONFIG=$3 +CHECKPOINT=$4 +GPUS=${GPUS:-4} +GPUS_PER_NODE=${GPUS_PER_NODE:-4} +CPUS_PER_TASK=${CPUS_PER_TASK:-5} +PY_ARGS=${@:5} +SRUN_ARGS=${SRUN_ARGS:-""} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +srun -p ${PARTITION} \ + --job-name=${JOB_NAME} \ + --gres=gpu:${GPUS_PER_NODE} \ + --ntasks=${GPUS} \ + --ntasks-per-node=${GPUS_PER_NODE} \ + --cpus-per-task=${CPUS_PER_TASK} \ + --kill-on-bad-exit=1 \ + ${SRUN_ARGS} \ + python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} diff --git a/tools/slurm_train.sh b/tools/slurm_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..ab232105f0309c720ed81a522eca14b6fbd64afd --- /dev/null +++ b/tools/slurm_train.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +set -x + +PARTITION=$1 +JOB_NAME=$2 +CONFIG=$3 +GPUS=${GPUS:-4} +GPUS_PER_NODE=${GPUS_PER_NODE:-4} +CPUS_PER_TASK=${CPUS_PER_TASK:-5} +SRUN_ARGS=${SRUN_ARGS:-""} +PY_ARGS=${@:4} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +srun -p ${PARTITION} \ + --job-name=${JOB_NAME} \ + --gres=gpu:${GPUS_PER_NODE} \ + --ntasks=${GPUS} \ + --ntasks-per-node=${GPUS_PER_NODE} \ + --cpus-per-task=${CPUS_PER_TASK} \ + --kill-on-bad-exit=1 \ + ${SRUN_ARGS} \ + python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} diff --git a/tools/test.py b/tools/test.py new file mode 100644 index 0000000000000000000000000000000000000000..0d7f39b3a8b5f94d33ce8529755013451184d5ed --- /dev/null +++ b/tools/test.py @@ -0,0 +1,123 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp + +from mmengine.config import Config, DictAction +from mmengine.runner import Runner + + +# TODO: support fuse_conv_bn, visualization, and format_only +def parse_args(): + parser = argparse.ArgumentParser( + description='MMSeg test (and eval) a model') + parser.add_argument('config', help='train config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--work-dir', + help=('if specified, the evaluation metric results will be dumped' + 'into the directory as json')) + parser.add_argument( + '--out', + type=str, + help='The directory to save output prediction for offline evaluation') + parser.add_argument( + '--show', action='store_true', help='show prediction results') + parser.add_argument( + '--show-dir', + help='directory where painted images will be saved. ' + 'If specified, it will be automatically saved ' + 'to the work_dir/timestamp/show_dir') + parser.add_argument( + '--wait-time', type=float, default=2, help='the interval of show (s)') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument( + '--tta', action='store_true', help='Test time augmentation') + # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` + # will pass the `--local-rank` parameter to `tools/train.py` instead + # of `--local_rank`. + parser.add_argument('--local_rank', '--local-rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + return args + + +def trigger_visualization_hook(cfg, args): + default_hooks = cfg.default_hooks + if 'visualization' in default_hooks: + visualization_hook = default_hooks['visualization'] + # Turn on visualization + visualization_hook['draw'] = True + if args.show: + visualization_hook['show'] = True + visualization_hook['wait_time'] = args.wait_time + if args.show_dir: + visualizer = cfg.visualizer + visualizer['save_dir'] = args.show_dir + else: + raise RuntimeError( + 'VisualizationHook must be included in default_hooks.' + 'refer to usage ' + '"visualization=dict(type=\'VisualizationHook\')"') + + return cfg + + +def main(): + args = parse_args() + + # load config + cfg = Config.fromfile(args.config) + cfg.launcher = args.launcher + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # work_dir is determined in this priority: CLI > segment in file > filename + if args.work_dir is not None: + # update configs according to CLI args if args.work_dir is not None + cfg.work_dir = args.work_dir + elif cfg.get('work_dir', None) is None: + # use config filename as default work_dir if cfg.work_dir is None + cfg.work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + + cfg.load_from = args.checkpoint + + if args.show or args.show_dir: + cfg = trigger_visualization_hook(cfg, args) + + if args.tta: + cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline + cfg.tta_model.module = cfg.model + cfg.model = cfg.tta_model + + # add output_dir in metric + if args.out is not None: + cfg.test_evaluator['output_dir'] = args.out + cfg.test_evaluator['keep_results'] = True + + # build the runner from config + runner = Runner.from_cfg(cfg) + + # start testing + runner.test() + + +if __name__ == '__main__': + main() diff --git a/tools/torchserve/mmseg2torchserve.py b/tools/torchserve/mmseg2torchserve.py new file mode 100644 index 0000000000000000000000000000000000000000..23f99638e799fd0b37a6737cc833dd7d24f611f8 --- /dev/null +++ b/tools/torchserve/mmseg2torchserve.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser, Namespace +from pathlib import Path +from tempfile import TemporaryDirectory + +from mmengine import Config +from mmengine.utils import mkdir_or_exist + +try: + from model_archiver.model_packaging import package_model + from model_archiver.model_packaging_utils import ModelExportUtils +except ImportError: + package_model = None + + +def mmseg2torchserve( + config_file: str, + checkpoint_file: str, + output_folder: str, + model_name: str, + model_version: str = '1.0', + force: bool = False, +): + """Converts mmsegmentation model (config + checkpoint) to TorchServe + `.mar`. + + Args: + config_file: + In MMSegmentation config format. + The contents vary for each task repository. + checkpoint_file: + In MMSegmentation checkpoint format. + The contents vary for each task repository. + output_folder: + Folder where `{model_name}.mar` will be created. + The file created will be in TorchServe archive format. + model_name: + If not None, used for naming the `{model_name}.mar` file + that will be created under `output_folder`. + If None, `{Path(checkpoint_file).stem}` will be used. + model_version: + Model's version. + force: + If True, if there is an existing `{model_name}.mar` + file under `output_folder` it will be overwritten. + """ + mkdir_or_exist(output_folder) + + config = Config.fromfile(config_file) + + with TemporaryDirectory() as tmpdir: + config.dump(f'{tmpdir}/config.py') + + args = Namespace( + **{ + 'model_file': f'{tmpdir}/config.py', + 'serialized_file': checkpoint_file, + 'handler': f'{Path(__file__).parent}/mmseg_handler.py', + 'model_name': model_name or Path(checkpoint_file).stem, + 'version': model_version, + 'export_path': output_folder, + 'force': force, + 'requirements_file': None, + 'extra_files': None, + 'runtime': 'python', + 'archive_format': 'default' + }) + manifest = ModelExportUtils.generate_manifest_json(args) + package_model(args, manifest) + + +def parse_args(): + parser = ArgumentParser( + description='Convert mmseg models to TorchServe `.mar` format.') + parser.add_argument('config', type=str, help='config file path') + parser.add_argument('checkpoint', type=str, help='checkpoint file path') + parser.add_argument( + '--output-folder', + type=str, + required=True, + help='Folder where `{model_name}.mar` will be created.') + parser.add_argument( + '--model-name', + type=str, + default=None, + help='If not None, used for naming the `{model_name}.mar`' + 'file that will be created under `output_folder`.' + 'If None, `{Path(checkpoint_file).stem}` will be used.') + parser.add_argument( + '--model-version', + type=str, + default='1.0', + help='Number used for versioning.') + parser.add_argument( + '-f', + '--force', + action='store_true', + help='overwrite the existing `{model_name}.mar`') + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + args = parse_args() + + if package_model is None: + raise ImportError('`torch-model-archiver` is required.' + 'Try: pip install torch-model-archiver') + + mmseg2torchserve(args.config, args.checkpoint, args.output_folder, + args.model_name, args.model_version, args.force) diff --git a/tools/torchserve/mmseg_handler.py b/tools/torchserve/mmseg_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..dbe5ded8482c1113a6adb45a22b650af71f6294e --- /dev/null +++ b/tools/torchserve/mmseg_handler.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import base64 +import os + +import cv2 +import mmcv +import torch +from mmengine.model.utils import revert_sync_batchnorm +from ts.torch_handler.base_handler import BaseHandler + +from mmseg.apis import inference_model, init_model + + +class MMsegHandler(BaseHandler): + + def initialize(self, context): + properties = context.system_properties + self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu' + self.device = torch.device(self.map_location + ':' + + str(properties.get('gpu_id')) if torch.cuda. + is_available() else self.map_location) + self.manifest = context.manifest + + model_dir = properties.get('model_dir') + serialized_file = self.manifest['model']['serializedFile'] + checkpoint = os.path.join(model_dir, serialized_file) + self.config_file = os.path.join(model_dir, 'config.py') + + self.model = init_model(self.config_file, checkpoint, self.device) + self.model = revert_sync_batchnorm(self.model) + self.initialized = True + + def preprocess(self, data): + images = [] + + for row in data: + image = row.get('data') or row.get('body') + if isinstance(image, str): + image = base64.b64decode(image) + image = mmcv.imfrombytes(image) + images.append(image) + + return images + + def inference(self, data, *args, **kwargs): + results = [inference_model(self.model, img) for img in data] + return results + + def postprocess(self, data): + output = [] + + for image_result in data: + _, buffer = cv2.imencode('.png', image_result[0].astype('uint8')) + content = buffer.tobytes() + output.append(content) + return output diff --git a/tools/torchserve/test_torchserve.py b/tools/torchserve/test_torchserve.py new file mode 100644 index 0000000000000000000000000000000000000000..b015b6658556e5045af2daf5d998de0de61e1f6b --- /dev/null +++ b/tools/torchserve/test_torchserve.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser +from io import BytesIO + +import matplotlib.pyplot as plt +import mmcv +import requests + +from mmseg.apis import inference_model, init_model + + +def parse_args(): + parser = ArgumentParser( + description='Compare result of torchserve and pytorch,' + 'and visualize them.') + parser.add_argument('img', help='Image file') + parser.add_argument('config', help='Config file') + parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument('model_name', help='The model name in the server') + parser.add_argument( + '--inference-addr', + default='127.0.0.1:8080', + help='Address and port of the inference server') + parser.add_argument( + '--result-image', + type=str, + default=None, + help='save server output in result-image') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + + args = parser.parse_args() + return args + + +def main(args): + url = 'http://' + args.inference_addr + '/predictions/' + args.model_name + with open(args.img, 'rb') as image: + tmp_res = requests.post(url, image) + content = tmp_res.content + if args.result_image: + with open(args.result_image, 'wb') as out_image: + out_image.write(content) + plt.imshow(mmcv.imread(args.result_image, 'grayscale')) + plt.show() + else: + plt.imshow(plt.imread(BytesIO(content))) + plt.show() + model = init_model(args.config, args.checkpoint, args.device) + image = mmcv.imread(args.img) + result = inference_model(model, image) + plt.imshow(result[0]) + plt.show() + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/tools/train.py b/tools/train.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed078cadbb4cbbc7226b154968b5340b6df6204 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,143 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import logging +import os +import os.path as osp + +from mmengine.config import Config, DictAction +from mmengine.logging import print_log +from mmengine.runner import Runner + +from mmseg.registry import RUNNERS + +import torch +import json +import numpy as np + +def initialize_model_stitching_layer(model, data_loader): + print(data_loader) + # print(next(iter(data_loader))) + dataiter = iter(data_loader) + images = [] + total_samples = 50 + batch_size = data_loader.batch_size + num_iter = total_samples // batch_size + for i in range(num_iter): + item = next(dataiter) + images.append(item['img'].data[0]) + images = torch.cat(images, dim=0) + samples = images.to(model.device, non_blocking=True) + model.backbone.initialize_stitching_weights(samples) + +def group_subnets_by_flops(data, flops_step=10): + sorted_data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1])} + candidate_idx = [] + grouped_cands = [] + last_flops = 0 + for cfg_id, flops in sorted_data.items(): + # flops, _ = values + flops = flops // 1e9 + if abs(last_flops - flops) > flops_step: + if len(candidate_idx) > 0: + grouped_cands.append(candidate_idx) + candidate_idx = [int(cfg_id)] + last_flops = flops + else: + candidate_idx.append(int(cfg_id)) + + if len(candidate_idx) > 0: + grouped_cands.append(candidate_idx) + + return grouped_cands + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a segmentor') + parser.add_argument('config', help='train config file path') + parser.add_argument('--work-dir', help='the dir to save logs and models') + parser.add_argument( + '--resume', + action='store_true', + default=False, + help='resume from the latest checkpoint in the work_dir automatically') + parser.add_argument( + '--amp', + action='store_true', + default=False, + help='enable automatic-mixed-precision training') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` + # will pass the `--local-rank` parameter to `tools/train.py` instead + # of `--local_rank`. + parser.add_argument('--local_rank', '--local-rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + return args + + +def main(): + args = parse_args() + + # load config + cfg = Config.fromfile(args.config) + cfg.launcher = args.launcher + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # work_dir is determined in this priority: CLI > segment in file > filename + if args.work_dir is not None: + # update configs according to CLI args if args.work_dir is not None + cfg.work_dir = args.work_dir + elif cfg.get('work_dir', None) is None: + # use config filename as default work_dir if cfg.work_dir is None + cfg.work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + + # enable automatic-mixed-precision training + if args.amp is True: + optim_wrapper = cfg.optim_wrapper.type + if optim_wrapper == 'AmpOptimWrapper': + print_log( + 'AMP training is already enabled in your config.', + logger='current', + level=logging.WARNING) + else: + assert optim_wrapper == 'OptimWrapper', ( + '`--amp` is only supported when the optimizer wrapper type is ' + f'`OptimWrapper` but got {optim_wrapper}.') + cfg.optim_wrapper.type = 'AmpOptimWrapper' + cfg.optim_wrapper.loss_scale = 'dynamic' + + # resume training + cfg.resume = args.resume + + # build the runner from config + if 'runner_type' not in cfg: + # build the default runner + runner = Runner.from_cfg(cfg) + else: + # build customized runner from the registry + # if 'runner_type' is set in the cfg + runner = RUNNERS.build(cfg) + + # start training + runner.train() + + +if __name__ == '__main__': + main()