AlexShmak commited on
Commit
517fc39
·
verified ·
1 Parent(s): 684f5a0

Add models implementation and training outputs

Browse files
Files changed (28) hide show
  1. faster_rcnn_R_101_FPN_3x/eval.py +44 -0
  2. faster_rcnn_R_101_FPN_3x/evaluation.txt +33 -0
  3. faster_rcnn_R_101_FPN_3x/test.py +29 -0
  4. faster_rcnn_R_101_FPN_3x/train.py +30 -0
  5. faster_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726006373.Legion.31775.0 +3 -0
  6. faster_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726406883.Legion.19405.0 +3 -0
  7. faster_rcnn_R_101_FPN_3x/training_output/training_output/last_checkpoint +1 -0
  8. faster_rcnn_R_101_FPN_3x/training_output/training_output/metrics.json +0 -0
  9. faster_rcnn_R_101_FPN_3x/training_output/training_output/model_0004999.pth +3 -0
  10. faster_rcnn_R_101_FPN_3x/training_output/training_output/model_final.pth +3 -0
  11. faster_rcnn_R_101_FPN_3x/utils.py +114 -0
  12. mask_rcnn_R_101_FPN_3x/eval.py +44 -0
  13. mask_rcnn_R_101_FPN_3x/evaluation.txt +62 -0
  14. mask_rcnn_R_101_FPN_3x/test.py +29 -0
  15. mask_rcnn_R_101_FPN_3x/train.py +30 -0
  16. mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1725897998.Legion.47938.0 +3 -0
  17. mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726172796.Legion.3380.0 +3 -0
  18. mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726421934.Legion.80102.0 +3 -0
  19. mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726422787.Legion.1493.0 +3 -0
  20. mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726423456.Legion.4616.0 +3 -0
  21. mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726423883.Legion.6492.0 +3 -0
  22. mask_rcnn_R_101_FPN_3x/training_output/training_output/last_checkpoint +1 -0
  23. mask_rcnn_R_101_FPN_3x/training_output/training_output/metrics.json +0 -0
  24. mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0004999.pth +3 -0
  25. mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0009999.pth +3 -0
  26. mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0014999.pth +3 -0
  27. mask_rcnn_R_101_FPN_3x/training_output/training_output/model_final.pth +3 -0
  28. mask_rcnn_R_101_FPN_3x/utils.py +117 -0
faster_rcnn_R_101_FPN_3x/eval.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.data import DatasetCatalog, DatasetMapper
2
+ from detectron2.engine import DefaultPredictor
3
+ from detectron2.evaluation import COCOEvaluator, inference_on_dataset
4
+ from detectron2.data import build_detection_test_loader
5
+ from utils import (
6
+ build_config,
7
+ register_publaynet_datasets,
8
+ )
9
+
10
+ # Register datasets
11
+ dataset_train_name, dataset_test_name = register_publaynet_datasets()
12
+
13
+ # Model parameters
14
+ model_zoo_config_name = "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"
15
+ trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/faster_rcnn_R_101_FPN_3x/training_output"
16
+ prediction_score_threshold = 0.7
17
+ base_lr = 0.001
18
+ max_iter = 500
19
+ batch_size = 128
20
+
21
+ # Detectron config
22
+ cfg = build_config(
23
+ model_zoo_config_name,
24
+ dataset_train_name,
25
+ dataset_test_name,
26
+ trained_model_output_dir,
27
+ prediction_score_threshold,
28
+ base_lr,
29
+ max_iter,
30
+ batch_size,
31
+ )
32
+
33
+ # Detectron predictor
34
+ predictor = DefaultPredictor(cfg)
35
+
36
+ # Load test dataset, and evaluate over it
37
+ dataset = DatasetCatalog.get(dataset_test_name)
38
+ dataset_mapper = DatasetMapper(cfg, is_train=False)
39
+ data_loader = build_detection_test_loader(dataset=dataset, mapper=dataset_mapper)
40
+ inference_on_dataset(
41
+ model=predictor.model,
42
+ data_loader=data_loader,
43
+ evaluator=COCOEvaluator(dataset_test_name),
44
+ )
faster_rcnn_R_101_FPN_3x/evaluation.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [09/20 16:54:58 d2.evaluation.evaluator]: Total inference time: 0:19:19.760338 (0.103182 s / iter per device, on 1 devices)
2
+ [09/20 16:54:58 d2.evaluation.evaluator]: Total inference pure compute time: 0:15:54 (0.084938 s / iter per device, on 1 devices)
3
+ [09/20 16:54:58 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
4
+ [09/20 16:54:58 d2.evaluation.coco_evaluation]: Evaluating predictions with unofficial COCO API...
5
+ Loading and preparing results...
6
+ DONE (t=0.40s)
7
+ creating index...
8
+ index created!
9
+ [09/20 16:54:59 d2.evaluation.fast_eval_api]: Evaluate annotation type *bbox*
10
+ [09/20 16:55:01 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 2.51 seconds.
11
+ [09/20 16:55:01 d2.evaluation.fast_eval_api]: Accumulating evaluation results...
12
+ [09/20 16:55:02 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.39 seconds.
13
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.843
14
+ Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.940
15
+ Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.912
16
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.251
17
+ Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.602
18
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.894
19
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.503
20
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.871
21
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.880
22
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.270
23
+ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.650
24
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.927
25
+ [09/20 16:55:02 d2.evaluation.coco_evaluation]: Evaluation results for bbox:
26
+ | AP | AP50 | AP75 | APs | APm | APl |
27
+ |:------:|:------:|:------:|:------:|:------:|:------:|
28
+ | 84.295 | 94.048 | 91.199 | 25.061 | 60.217 | 89.431 |
29
+ [09/20 16:55:02 d2.evaluation.coco_evaluation]: Per-category bbox AP:
30
+ | category | AP | category | AP | category | AP |
31
+ |:-----------|:-------|:-----------|:-------|:-----------|:-------|
32
+ | text | 89.249 | title | 76.824 | list | 77.612 |
33
+ | table | 92.250 | figure | 85.539 | | |
faster_rcnn_R_101_FPN_3x/test.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.engine import DefaultPredictor
2
+ from utils import build_config, register_publaynet_datasets, visual_test
3
+
4
+ # Register datasets
5
+ dataset_train_name, dataset_test_name = register_publaynet_datasets()
6
+
7
+ # Model parameters
8
+ model_zoo_config_name = "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"
9
+ trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/faster_rcnn_R_101_FPN_3x/training_output"
10
+ prediction_score_threshold = 0.7
11
+ base_lr = 0.001
12
+ max_iter = 50
13
+ batch_size = 128
14
+
15
+ # Detectron config
16
+ cfg = build_config(
17
+ model_zoo_config_name,
18
+ dataset_train_name,
19
+ dataset_test_name,
20
+ trained_model_output_dir,
21
+ prediction_score_threshold,
22
+ base_lr,
23
+ max_iter,
24
+ batch_size,
25
+ )
26
+
27
+ # Detectron predictor
28
+ predictor = DefaultPredictor(cfg)
29
+ visual_test(cfg, predictor)
faster_rcnn_R_101_FPN_3x/train.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.engine import DefaultTrainer
2
+ from utils import build_config, register_publaynet_datasets
3
+
4
+ # Register datasets
5
+ dataset_train_name, dataset_test_name = register_publaynet_datasets()
6
+
7
+ # Model parameters
8
+ model_zoo_config_name = "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"
9
+ trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/faster_rcnn_R_101_FPN_3x/training_output"
10
+ prediction_score_threshold = 0.7
11
+ base_lr = 0.00001
12
+ max_iter = 8000
13
+ batch_size = 128
14
+
15
+ # Detectron config
16
+ cfg = build_config(
17
+ model_zoo_config_name,
18
+ dataset_train_name,
19
+ dataset_test_name,
20
+ trained_model_output_dir,
21
+ prediction_score_threshold,
22
+ base_lr,
23
+ max_iter,
24
+ batch_size,
25
+ )
26
+
27
+ # Detectron Trainer
28
+ trainer = DefaultTrainer(cfg)
29
+ trainer.resume_or_load(resume=False)
30
+ trainer.train()
faster_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726006373.Legion.31775.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f639edc57729dc50b05d14ce2101eda3bfb966037a64542bbef8d394cd7f55
3
+ size 91186
faster_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726406883.Legion.19405.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b2bd51b5d8529ecff5fb5cf2999aa052a1c4958972c0f3728528484813dc02
3
+ size 364786
faster_rcnn_R_101_FPN_3x/training_output/training_output/last_checkpoint ADDED
@@ -0,0 +1 @@
 
 
1
+ model_final.pth
faster_rcnn_R_101_FPN_3x/training_output/training_output/metrics.json ADDED
The diff for this file is too large to render. See raw diff
 
faster_rcnn_R_101_FPN_3x/training_output/training_output/model_0004999.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5854879a3f88d2ccc9ac871a4dee4a79ad3d0d7e73dae646d031ec1d2eb4901e
3
+ size 482234736
faster_rcnn_R_101_FPN_3x/training_output/training_output/model_final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e58306b214ec6c7fef0e5daa14eac84a546d2313322c2dcd4946a6338f02589
3
+ size 482234736
faster_rcnn_R_101_FPN_3x/utils.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from detectron2.utils.visualizer import Visualizer
3
+ import cv2
4
+ import numpy
5
+ from PIL.Image import Image
6
+ from PIL import Image as image_main
7
+ from detectron2.utils.logger import setup_logger
8
+
9
+ setup_logger()
10
+ from detectron2 import model_zoo
11
+ from detectron2.config import get_cfg, CfgNode
12
+ from detectron2.data import MetadataCatalog
13
+ from detectron2.data.datasets.register_coco import register_coco_instances
14
+
15
+
16
+ def open_image_pil(image_path: str) -> Image:
17
+ return image_main.open(image_path)
18
+
19
+
20
+ def convert_pil_to_cv(pil_image: Image):
21
+ if pil_image.mode != "RGB":
22
+ pil_image = pil_image.convert("RGB")
23
+ return cv2.cvtColor(numpy.array(pil_image), cv2.COLOR_RGB2BGR)
24
+
25
+
26
+ def register_publaynet_datasets() -> (str, str):
27
+ dataset_train_name = "publaynet_dataset_train"
28
+ dataset_test_name = "publaynet_dataset_test"
29
+ class_labels = ["text", "title", "list", "table", "figure"]
30
+ register_coco_instances(
31
+ dataset_train_name,
32
+ {},
33
+ "/home/alex/Datasets/PubLayNet/publaynet/train.json",
34
+ "/home/alex/Datasets/PubLayNet/publaynet/train",
35
+ )
36
+ register_coco_instances(
37
+ dataset_test_name,
38
+ {},
39
+ "/home/alex/Datasets/PubLayNet/publaynet/val.json",
40
+ "/home/alex/Datasets/PubLayNet/publaynet/val",
41
+ )
42
+
43
+ # Make sure the datasets got registered
44
+ metadata_train = MetadataCatalog.get(dataset_train_name)
45
+ metadata_test = MetadataCatalog.get(dataset_test_name)
46
+ print(metadata_train)
47
+ print(metadata_test)
48
+
49
+ # Set labels
50
+ MetadataCatalog.get(dataset_train_name).thing_classes = class_labels
51
+ MetadataCatalog.get(dataset_test_name).thing_classes = class_labels
52
+
53
+ return dataset_train_name, dataset_test_name
54
+
55
+
56
+ def build_config(
57
+ model_zoo_config_name: str,
58
+ dataset_train_name: str,
59
+ dataset_test_name: str,
60
+ trained_model_output_dir: str,
61
+ prediction_score_threshold: float,
62
+ base_lr: float,
63
+ max_iter: int,
64
+ batch_size: int,
65
+ ) -> CfgNode:
66
+ trained_model_weights_path = trained_model_output_dir + "/model_final.pth"
67
+
68
+ cfg = get_cfg()
69
+ cfg.merge_from_file(model_zoo.get_config_file(model_zoo_config_name))
70
+ cfg.DATASETS.TRAIN = (dataset_train_name,)
71
+ cfg.DATASETS.TEST = (dataset_test_name,)
72
+ cfg.OUTPUT_DIR = trained_model_output_dir
73
+ cfg.DATALOADER.NUM_WORKERS = 8
74
+ if os.path.exists(trained_model_weights_path):
75
+ cfg.MODEL.WEIGHTS = trained_model_weights_path
76
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = prediction_score_threshold
77
+ cfg.SOLVER.IMS_PER_BATCH = 4
78
+ cfg.SOLVER.BASE_LR = base_lr
79
+ cfg.SOLVER.MAX_ITER = max_iter
80
+ cfg.SOLVER.STEPS = []
81
+ cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size
82
+ cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5
83
+ cfg.TEST.DETECTIONS_PER_IMAGE = 100
84
+ # cfg.INPUT.MIN_SIZE_TRAIN = (640, 672, 704, 736, 768, 800)
85
+ # cfg.INPUT.MIN_SIZE_TRAIN = (600, 632, 664, 696, 728, 760)
86
+ cfg.INPUT.MIN_SIZE_TRAIN = (580, 612, 644, 676, 708, 740)
87
+ return cfg
88
+
89
+
90
+ def visualize_outputs(cfg, image_cv, outputs, name):
91
+ v = Visualizer(
92
+ image_cv[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2
93
+ )
94
+ out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
95
+ output_image_cv = out.get_image()[:, :, ::-1]
96
+ cv2.imwrite(name, output_image_cv)
97
+
98
+
99
+ def visual_test(cfg: CfgNode, predictor: DefaultPredictor):
100
+ image_paths = [
101
+ "/home/alex/Datasets/PubLayNet/publaynet/train/PMC1500815_00002.jpg",
102
+ "/home/alex/Datasets/PubLayNet/publaynet/train/PMC3162874_00002.jpg",
103
+ "/home/alex/Datasets/PubLayNet/publaynet/train/PMC4203354_00000.jpg",
104
+ "/home/alex/Datasets/PubLayNet/publaynet/val/PMC1247188_00003.jpg",
105
+ "/home/alex/Datasets/PubLayNet/publaynet/val/PMC2829689_00004.jpg",
106
+ "/home/alex/Datasets/PubLayNet/publaynet/val/PMC4520132_00000.jpg",
107
+ ]
108
+
109
+ for i, image_path in enumerate(image_paths):
110
+ print("Testing on " + image_path)
111
+ image_pil = open_image_pil(image_path)
112
+ image_cv = convert_pil_to_cv(image_pil)
113
+ outputs = predictor(image_cv)
114
+ visualize_outputs(cfg, image_cv, outputs, f"image_{i}.jpg")
mask_rcnn_R_101_FPN_3x/eval.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.data import DatasetCatalog, DatasetMapper
2
+ from detectron2.engine import DefaultPredictor
3
+ from detectron2.evaluation import COCOEvaluator, inference_on_dataset
4
+ from detectron2.data import build_detection_test_loader
5
+ from utils import (
6
+ build_config,
7
+ register_publaynet_datasets,
8
+ )
9
+
10
+ # Register datasets
11
+ dataset_train_name, dataset_test_name = register_publaynet_datasets()
12
+
13
+ # Model parameters
14
+ model_zoo_config_name = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"
15
+ trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/mask_rcnn_R_101_FPN_3x/training_output"
16
+ prediction_score_threshold = 0.7
17
+ base_lr = 0.0001
18
+ max_iter = 1000
19
+ batch_size = 128
20
+
21
+ # Detectron config
22
+ cfg = build_config(
23
+ model_zoo_config_name,
24
+ dataset_train_name,
25
+ dataset_test_name,
26
+ trained_model_output_dir,
27
+ prediction_score_threshold,
28
+ base_lr,
29
+ max_iter,
30
+ batch_size,
31
+ )
32
+
33
+ # Detectron predictor
34
+ predictor = DefaultPredictor(cfg)
35
+
36
+ # Load test dataset, and evaluate over it
37
+ dataset = DatasetCatalog.get(dataset_test_name)
38
+ dataset_mapper = DatasetMapper(cfg, is_train=False)
39
+ data_loader = build_detection_test_loader(dataset=dataset, mapper=dataset_mapper)
40
+ inference_on_dataset(
41
+ model=predictor.model,
42
+ data_loader=data_loader,
43
+ evaluator=COCOEvaluator(dataset_test_name),
44
+ )
mask_rcnn_R_101_FPN_3x/evaluation.txt ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [09/26 01:03:24 d2.evaluation.evaluator]: Total inference time: 0:20:57.157306 (0.111847 s / iter per device, on 1 devices)
2
+ [09/26 01:03:24 d2.evaluation.evaluator]: Total inference pure compute time: 0:16:27 (0.087851 s / iter per device, on 1 devices)
3
+ [09/26 01:03:24 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
4
+ [09/26 01:03:24 d2.evaluation.coco_evaluation]: Evaluating predictions with unofficial COCO API...
5
+ Loading and preparing results...
6
+ DONE (t=0.06s)
7
+ creating index...
8
+ index created!
9
+ [09/26 01:03:24 d2.evaluation.fast_eval_api]: Evaluate annotation type *bbox*
10
+ [09/26 01:03:27 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 2.66 seconds.
11
+ [09/26 01:03:27 d2.evaluation.fast_eval_api]: Accumulating evaluation results...
12
+ [09/26 01:03:27 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.41 seconds.
13
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.867
14
+ Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.948
15
+ Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.923
16
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.290
17
+ Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.615
18
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.916
19
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.514
20
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.889
21
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.898
22
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.313
23
+ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.658
24
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.942
25
+ [09/26 01:03:27 d2.evaluation.coco_evaluation]: Evaluation results for bbox:
26
+ | AP | AP50 | AP75 | APs | APm | APl |
27
+ |:------:|:------:|:------:|:------:|:------:|:------:|
28
+ | 86.690 | 94.839 | 92.308 | 29.010 | 61.459 | 91.559 |
29
+ [09/26 01:03:27 d2.evaluation.coco_evaluation]: Per-category bbox AP:
30
+ | category | AP | category | AP | category | AP |
31
+ |:-----------|:-------|:-----------|:-------|:-----------|:-------|
32
+ | text | 89.822 | title | 79.101 | list | 80.716 |
33
+ | table | 94.215 | figure | 89.594 | | |
34
+ Loading and preparing results...
35
+ DONE (t=1.41s)
36
+ creating index...
37
+ index created!
38
+ [09/26 01:03:30 d2.evaluation.fast_eval_api]: Evaluate annotation type *segm*
39
+ [09/26 01:03:39 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 8.26 seconds.
40
+ [09/26 01:03:39 d2.evaluation.fast_eval_api]: Accumulating evaluation results...
41
+ [09/26 01:03:39 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.39 seconds.
42
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.821
43
+ Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.947
44
+ Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.898
45
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.280
46
+ Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.569
47
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.862
48
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.489
49
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.854
50
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.862
51
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.311
52
+ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.620
53
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.898
54
+ [09/26 01:03:39 d2.evaluation.coco_evaluation]: Evaluation results for segm:
55
+ | AP | AP50 | AP75 | APs | APm | APl |
56
+ |:------:|:------:|:------:|:------:|:------:|:------:|
57
+ | 82.105 | 94.654 | 89.840 | 28.016 | 56.863 | 86.208 |
58
+ [09/26 01:03:39 d2.evaluation.coco_evaluation]: Per-category segm AP:
59
+ | category | AP | category | AP | category | AP |
60
+ |:-----------|:-------|:-----------|:-------|:-----------|:-------|
61
+ | text | 88.786 | title | 76.630 | list | 62.243 |
62
+ | table | 93.647 | figure | 89.217 | | |
mask_rcnn_R_101_FPN_3x/test.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.engine import DefaultPredictor
2
+ from utils import build_config, register_publaynet_datasets, visual_test
3
+
4
+ # Register datasets
5
+ dataset_train_name, dataset_test_name = register_publaynet_datasets()
6
+
7
+ # Model parameters
8
+ model_zoo_config_name = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"
9
+ trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/mask_rcnn_R_101_FPN_3x/training_output"
10
+ prediction_score_threshold = 0.7
11
+ base_lr = 0
12
+ max_iter = 0
13
+ batch_size = 0
14
+
15
+ # Detectron config
16
+ cfg = build_config(
17
+ model_zoo_config_name,
18
+ dataset_train_name,
19
+ dataset_test_name,
20
+ trained_model_output_dir,
21
+ prediction_score_threshold,
22
+ base_lr,
23
+ max_iter,
24
+ batch_size,
25
+ )
26
+
27
+ # Detectron predictor
28
+ predictor = DefaultPredictor(cfg)
29
+ visual_test(cfg, predictor)
mask_rcnn_R_101_FPN_3x/train.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.engine import DefaultTrainer
2
+ from utils import build_config, register_publaynet_datasets
3
+
4
+ # Register datasets
5
+ dataset_train_name, dataset_test_name = register_publaynet_datasets()
6
+
7
+ # Model parameters
8
+ model_zoo_config_name = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"
9
+ trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/mask_rcnn_R_101_FPN_3x/training_output"
10
+ prediction_score_threshold = 0.7
11
+ base_lr = 0.00001
12
+ max_iter = 15000
13
+ batch_size = 128
14
+
15
+ # Detectron config
16
+ cfg = build_config(
17
+ model_zoo_config_name,
18
+ dataset_train_name,
19
+ dataset_test_name,
20
+ trained_model_output_dir,
21
+ prediction_score_threshold,
22
+ base_lr,
23
+ max_iter,
24
+ batch_size,
25
+ )
26
+
27
+ # Detectron Trainer
28
+ trainer = DefaultTrainer(cfg)
29
+ trainer.resume_or_load(resume=False)
30
+ trainer.train()
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1725897998.Legion.47938.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13688b1d5c48925ef86b9abfbc8caddf0b77929a4e635754d1f6be4154023d5
3
+ size 57112
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726172796.Legion.3380.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:260f4db66bc80b5c0a0cf0bc60d3e1837bcee0d0653c76f2f485c578f3741d29
3
+ size 171412
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726421934.Legion.80102.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ddd5e4f8e0de29661376bbe5124c4e40cca3e6158be0c2dc75a5af118deb8a6
3
+ size 20536
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726422787.Legion.1493.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0db25f7e76307a7515fd271c972f11a6a8fe8bd9e56232e0811c835850f38cb0
3
+ size 20536
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726423456.Legion.4616.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32f5311e5c29787029c04da765c5bf0e735c78cd6d0609116a07f9f15ae38c88
3
+ size 17107
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726423883.Legion.6492.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92134b9c0422d1a7ad420c392d6c1659b867e8e0418011d3116e456495f6da6f
3
+ size 857212
mask_rcnn_R_101_FPN_3x/training_output/training_output/last_checkpoint ADDED
@@ -0,0 +1 @@
 
 
1
+ model_final.pth
mask_rcnn_R_101_FPN_3x/training_output/training_output/metrics.json ADDED
The diff for this file is too large to render. See raw diff
 
mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0004999.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d0951119dc4443fd4b3fa7b8a963251e6fd043bf1816ee403da5a649f71881b
3
+ size 503235392
mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0009999.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84659e4c6e5844940acb6909032f7aee1947818f9d1e2098ceabaa15cc3c579
3
+ size 503235392
mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0014999.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df3036f36789631a59f532fe615f13d4ee1ba13aafd420f1c1cef8536d5cb1fc
3
+ size 503235392
mask_rcnn_R_101_FPN_3x/training_output/training_output/model_final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df3036f36789631a59f532fe615f13d4ee1ba13aafd420f1c1cef8536d5cb1fc
3
+ size 503235392
mask_rcnn_R_101_FPN_3x/utils.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+ from detectron2.utils.visualizer import Visualizer
4
+ import cv2
5
+ import numpy
6
+ from PIL.Image import Image
7
+ from PIL import Image as image_main
8
+ import detectron2
9
+ from detectron2.utils.logger import setup_logger
10
+
11
+ setup_logger()
12
+ from detectron2 import model_zoo
13
+ from detectron2.config import get_cfg, CfgNode
14
+ from detectron2.engine import DefaultTrainer, DefaultPredictor
15
+ from detectron2.data import MetadataCatalog
16
+ from detectron2.data.datasets.register_coco import register_coco_instances
17
+
18
+
19
+ def open_image_pil(image_path: str) -> Image:
20
+ return image_main.open(image_path)
21
+
22
+
23
+ def convert_pil_to_cv(pil_image: Image):
24
+ if pil_image.mode != "RGB":
25
+ pil_image = pil_image.convert("RGB")
26
+ return cv2.cvtColor(numpy.array(pil_image), cv2.COLOR_RGB2BGR)
27
+
28
+
29
+ def register_publaynet_datasets() -> (str, str):
30
+ dataset_train_name = "publaynet_dataset_train"
31
+ dataset_test_name = "publaynet_dataset_test"
32
+ class_labels = ["text", "title", "list", "table", "figure"]
33
+ register_coco_instances(
34
+ dataset_train_name,
35
+ {},
36
+ "/home/alex/Datasets/PubLayNet/publaynet/train.json",
37
+ "/home/alex/Datasets/PubLayNet/publaynet/train",
38
+ )
39
+ register_coco_instances(
40
+ dataset_test_name,
41
+ {},
42
+ "/home/alex/Datasets/PubLayNet/publaynet/val.json",
43
+ "/home/alex/Datasets/PubLayNet/publaynet/val",
44
+ )
45
+
46
+ # Make sure the datasets got registered
47
+ metadata_train = MetadataCatalog.get(dataset_train_name)
48
+ metadata_test = MetadataCatalog.get(dataset_test_name)
49
+ print(metadata_train)
50
+ print(metadata_test)
51
+
52
+ # Set labels
53
+ MetadataCatalog.get(dataset_train_name).thing_classes = class_labels
54
+ MetadataCatalog.get(dataset_test_name).thing_classes = class_labels
55
+
56
+ return dataset_train_name, dataset_test_name
57
+
58
+
59
+ def build_config(
60
+ model_zoo_config_name: str,
61
+ dataset_train_name: str,
62
+ dataset_test_name: str,
63
+ trained_model_output_dir: str,
64
+ prediction_score_threshold: float,
65
+ base_lr: float,
66
+ max_iter: int,
67
+ batch_size: int,
68
+ ) -> CfgNode:
69
+ trained_model_weights_path = trained_model_output_dir + "/model_final.pth"
70
+
71
+ cfg = get_cfg()
72
+ cfg.merge_from_file(model_zoo.get_config_file(model_zoo_config_name))
73
+ cfg.DATASETS.TRAIN = (dataset_train_name,)
74
+ cfg.DATASETS.TEST = (dataset_test_name,)
75
+ cfg.OUTPUT_DIR = trained_model_output_dir
76
+ cfg.DATALOADER.NUM_WORKERS = 8
77
+ if os.path.exists(trained_model_weights_path):
78
+ cfg.MODEL.WEIGHTS = trained_model_weights_path
79
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = prediction_score_threshold
80
+ cfg.SOLVER.IMS_PER_BATCH = 4
81
+ cfg.SOLVER.BASE_LR = base_lr
82
+ cfg.SOLVER.MAX_ITER = max_iter
83
+ cfg.SOLVER.STEPS = []
84
+ cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size
85
+ cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5
86
+ cfg.TEST.DETECTIONS_PER_IMAGE = 100
87
+ # cfg.INPUT.MIN_SIZE_TRAIN = (640, 672, 704, 736, 768, 800)
88
+ # cfg.INPUT.MIN_SIZE_TRAIN = (600, 632, 664, 696, 728, 760)
89
+ cfg.INPUT.MIN_SIZE_TRAIN = (580, 612, 644, 676, 708, 740)
90
+ return cfg
91
+
92
+
93
+ def visualize_outputs(cfg, image_cv, outputs, name):
94
+ v = Visualizer(
95
+ image_cv[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2
96
+ )
97
+ out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
98
+ output_image_cv = out.get_image()[:, :, ::-1]
99
+ cv2.imwrite(name, output_image_cv)
100
+
101
+
102
+ def visual_test(cfg: CfgNode, predictor: DefaultPredictor):
103
+ image_paths = [
104
+ "/home/alex/Datasets/PubLayNet/publaynet/train/PMC1500815_00002.jpg",
105
+ "/home/alex/Datasets/PubLayNet/publaynet/train/PMC3162874_00002.jpg",
106
+ "/home/alex/Datasets/PubLayNet/publaynet/train/PMC4203354_00000.jpg",
107
+ "/home/alex/Datasets/PubLayNet/publaynet/val/PMC1247188_00003.jpg",
108
+ "/home/alex/Datasets/PubLayNet/publaynet/val/PMC2829689_00004.jpg",
109
+ "/home/alex/Datasets/PubLayNet/publaynet/val/PMC4520132_00000.jpg",
110
+ ]
111
+
112
+ for i, image_path in enumerate(image_paths):
113
+ print("Testing on " + image_path)
114
+ image_pil = open_image_pil(image_path)
115
+ image_cv = convert_pil_to_cv(image_pil)
116
+ outputs = predictor(image_cv)
117
+ visualize_outputs(cfg, image_cv, outputs, f"image_{i}.jpg")