Add models implementation and training outputs
Browse files- faster_rcnn_R_101_FPN_3x/eval.py +44 -0
- faster_rcnn_R_101_FPN_3x/evaluation.txt +33 -0
- faster_rcnn_R_101_FPN_3x/test.py +29 -0
- faster_rcnn_R_101_FPN_3x/train.py +30 -0
- faster_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726006373.Legion.31775.0 +3 -0
- faster_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726406883.Legion.19405.0 +3 -0
- faster_rcnn_R_101_FPN_3x/training_output/training_output/last_checkpoint +1 -0
- faster_rcnn_R_101_FPN_3x/training_output/training_output/metrics.json +0 -0
- faster_rcnn_R_101_FPN_3x/training_output/training_output/model_0004999.pth +3 -0
- faster_rcnn_R_101_FPN_3x/training_output/training_output/model_final.pth +3 -0
- faster_rcnn_R_101_FPN_3x/utils.py +114 -0
- mask_rcnn_R_101_FPN_3x/eval.py +44 -0
- mask_rcnn_R_101_FPN_3x/evaluation.txt +62 -0
- mask_rcnn_R_101_FPN_3x/test.py +29 -0
- mask_rcnn_R_101_FPN_3x/train.py +30 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1725897998.Legion.47938.0 +3 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726172796.Legion.3380.0 +3 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726421934.Legion.80102.0 +3 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726422787.Legion.1493.0 +3 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726423456.Legion.4616.0 +3 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726423883.Legion.6492.0 +3 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/last_checkpoint +1 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/metrics.json +0 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0004999.pth +3 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0009999.pth +3 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0014999.pth +3 -0
- mask_rcnn_R_101_FPN_3x/training_output/training_output/model_final.pth +3 -0
- mask_rcnn_R_101_FPN_3x/utils.py +117 -0
faster_rcnn_R_101_FPN_3x/eval.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.data import DatasetCatalog, DatasetMapper
|
2 |
+
from detectron2.engine import DefaultPredictor
|
3 |
+
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
|
4 |
+
from detectron2.data import build_detection_test_loader
|
5 |
+
from utils import (
|
6 |
+
build_config,
|
7 |
+
register_publaynet_datasets,
|
8 |
+
)
|
9 |
+
|
10 |
+
# Register datasets
|
11 |
+
dataset_train_name, dataset_test_name = register_publaynet_datasets()
|
12 |
+
|
13 |
+
# Model parameters
|
14 |
+
model_zoo_config_name = "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"
|
15 |
+
trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/faster_rcnn_R_101_FPN_3x/training_output"
|
16 |
+
prediction_score_threshold = 0.7
|
17 |
+
base_lr = 0.001
|
18 |
+
max_iter = 500
|
19 |
+
batch_size = 128
|
20 |
+
|
21 |
+
# Detectron config
|
22 |
+
cfg = build_config(
|
23 |
+
model_zoo_config_name,
|
24 |
+
dataset_train_name,
|
25 |
+
dataset_test_name,
|
26 |
+
trained_model_output_dir,
|
27 |
+
prediction_score_threshold,
|
28 |
+
base_lr,
|
29 |
+
max_iter,
|
30 |
+
batch_size,
|
31 |
+
)
|
32 |
+
|
33 |
+
# Detectron predictor
|
34 |
+
predictor = DefaultPredictor(cfg)
|
35 |
+
|
36 |
+
# Load test dataset, and evaluate over it
|
37 |
+
dataset = DatasetCatalog.get(dataset_test_name)
|
38 |
+
dataset_mapper = DatasetMapper(cfg, is_train=False)
|
39 |
+
data_loader = build_detection_test_loader(dataset=dataset, mapper=dataset_mapper)
|
40 |
+
inference_on_dataset(
|
41 |
+
model=predictor.model,
|
42 |
+
data_loader=data_loader,
|
43 |
+
evaluator=COCOEvaluator(dataset_test_name),
|
44 |
+
)
|
faster_rcnn_R_101_FPN_3x/evaluation.txt
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[09/20 16:54:58 d2.evaluation.evaluator]: Total inference time: 0:19:19.760338 (0.103182 s / iter per device, on 1 devices)
|
2 |
+
[09/20 16:54:58 d2.evaluation.evaluator]: Total inference pure compute time: 0:15:54 (0.084938 s / iter per device, on 1 devices)
|
3 |
+
[09/20 16:54:58 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
|
4 |
+
[09/20 16:54:58 d2.evaluation.coco_evaluation]: Evaluating predictions with unofficial COCO API...
|
5 |
+
Loading and preparing results...
|
6 |
+
DONE (t=0.40s)
|
7 |
+
creating index...
|
8 |
+
index created!
|
9 |
+
[09/20 16:54:59 d2.evaluation.fast_eval_api]: Evaluate annotation type *bbox*
|
10 |
+
[09/20 16:55:01 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 2.51 seconds.
|
11 |
+
[09/20 16:55:01 d2.evaluation.fast_eval_api]: Accumulating evaluation results...
|
12 |
+
[09/20 16:55:02 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.39 seconds.
|
13 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.843
|
14 |
+
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.940
|
15 |
+
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.912
|
16 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.251
|
17 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.602
|
18 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.894
|
19 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.503
|
20 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.871
|
21 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.880
|
22 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.270
|
23 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.650
|
24 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.927
|
25 |
+
[09/20 16:55:02 d2.evaluation.coco_evaluation]: Evaluation results for bbox:
|
26 |
+
| AP | AP50 | AP75 | APs | APm | APl |
|
27 |
+
|:------:|:------:|:------:|:------:|:------:|:------:|
|
28 |
+
| 84.295 | 94.048 | 91.199 | 25.061 | 60.217 | 89.431 |
|
29 |
+
[09/20 16:55:02 d2.evaluation.coco_evaluation]: Per-category bbox AP:
|
30 |
+
| category | AP | category | AP | category | AP |
|
31 |
+
|:-----------|:-------|:-----------|:-------|:-----------|:-------|
|
32 |
+
| text | 89.249 | title | 76.824 | list | 77.612 |
|
33 |
+
| table | 92.250 | figure | 85.539 | | |
|
faster_rcnn_R_101_FPN_3x/test.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.engine import DefaultPredictor
|
2 |
+
from utils import build_config, register_publaynet_datasets, visual_test
|
3 |
+
|
4 |
+
# Register datasets
|
5 |
+
dataset_train_name, dataset_test_name = register_publaynet_datasets()
|
6 |
+
|
7 |
+
# Model parameters
|
8 |
+
model_zoo_config_name = "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"
|
9 |
+
trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/faster_rcnn_R_101_FPN_3x/training_output"
|
10 |
+
prediction_score_threshold = 0.7
|
11 |
+
base_lr = 0.001
|
12 |
+
max_iter = 50
|
13 |
+
batch_size = 128
|
14 |
+
|
15 |
+
# Detectron config
|
16 |
+
cfg = build_config(
|
17 |
+
model_zoo_config_name,
|
18 |
+
dataset_train_name,
|
19 |
+
dataset_test_name,
|
20 |
+
trained_model_output_dir,
|
21 |
+
prediction_score_threshold,
|
22 |
+
base_lr,
|
23 |
+
max_iter,
|
24 |
+
batch_size,
|
25 |
+
)
|
26 |
+
|
27 |
+
# Detectron predictor
|
28 |
+
predictor = DefaultPredictor(cfg)
|
29 |
+
visual_test(cfg, predictor)
|
faster_rcnn_R_101_FPN_3x/train.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.engine import DefaultTrainer
|
2 |
+
from utils import build_config, register_publaynet_datasets
|
3 |
+
|
4 |
+
# Register datasets
|
5 |
+
dataset_train_name, dataset_test_name = register_publaynet_datasets()
|
6 |
+
|
7 |
+
# Model parameters
|
8 |
+
model_zoo_config_name = "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"
|
9 |
+
trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/faster_rcnn_R_101_FPN_3x/training_output"
|
10 |
+
prediction_score_threshold = 0.7
|
11 |
+
base_lr = 0.00001
|
12 |
+
max_iter = 8000
|
13 |
+
batch_size = 128
|
14 |
+
|
15 |
+
# Detectron config
|
16 |
+
cfg = build_config(
|
17 |
+
model_zoo_config_name,
|
18 |
+
dataset_train_name,
|
19 |
+
dataset_test_name,
|
20 |
+
trained_model_output_dir,
|
21 |
+
prediction_score_threshold,
|
22 |
+
base_lr,
|
23 |
+
max_iter,
|
24 |
+
batch_size,
|
25 |
+
)
|
26 |
+
|
27 |
+
# Detectron Trainer
|
28 |
+
trainer = DefaultTrainer(cfg)
|
29 |
+
trainer.resume_or_load(resume=False)
|
30 |
+
trainer.train()
|
faster_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726006373.Legion.31775.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31f639edc57729dc50b05d14ce2101eda3bfb966037a64542bbef8d394cd7f55
|
3 |
+
size 91186
|
faster_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726406883.Legion.19405.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3b2bd51b5d8529ecff5fb5cf2999aa052a1c4958972c0f3728528484813dc02
|
3 |
+
size 364786
|
faster_rcnn_R_101_FPN_3x/training_output/training_output/last_checkpoint
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
model_final.pth
|
faster_rcnn_R_101_FPN_3x/training_output/training_output/metrics.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
faster_rcnn_R_101_FPN_3x/training_output/training_output/model_0004999.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5854879a3f88d2ccc9ac871a4dee4a79ad3d0d7e73dae646d031ec1d2eb4901e
|
3 |
+
size 482234736
|
faster_rcnn_R_101_FPN_3x/training_output/training_output/model_final.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e58306b214ec6c7fef0e5daa14eac84a546d2313322c2dcd4946a6338f02589
|
3 |
+
size 482234736
|
faster_rcnn_R_101_FPN_3x/utils.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from detectron2.utils.visualizer import Visualizer
|
3 |
+
import cv2
|
4 |
+
import numpy
|
5 |
+
from PIL.Image import Image
|
6 |
+
from PIL import Image as image_main
|
7 |
+
from detectron2.utils.logger import setup_logger
|
8 |
+
|
9 |
+
setup_logger()
|
10 |
+
from detectron2 import model_zoo
|
11 |
+
from detectron2.config import get_cfg, CfgNode
|
12 |
+
from detectron2.data import MetadataCatalog
|
13 |
+
from detectron2.data.datasets.register_coco import register_coco_instances
|
14 |
+
|
15 |
+
|
16 |
+
def open_image_pil(image_path: str) -> Image:
|
17 |
+
return image_main.open(image_path)
|
18 |
+
|
19 |
+
|
20 |
+
def convert_pil_to_cv(pil_image: Image):
|
21 |
+
if pil_image.mode != "RGB":
|
22 |
+
pil_image = pil_image.convert("RGB")
|
23 |
+
return cv2.cvtColor(numpy.array(pil_image), cv2.COLOR_RGB2BGR)
|
24 |
+
|
25 |
+
|
26 |
+
def register_publaynet_datasets() -> (str, str):
|
27 |
+
dataset_train_name = "publaynet_dataset_train"
|
28 |
+
dataset_test_name = "publaynet_dataset_test"
|
29 |
+
class_labels = ["text", "title", "list", "table", "figure"]
|
30 |
+
register_coco_instances(
|
31 |
+
dataset_train_name,
|
32 |
+
{},
|
33 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train.json",
|
34 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train",
|
35 |
+
)
|
36 |
+
register_coco_instances(
|
37 |
+
dataset_test_name,
|
38 |
+
{},
|
39 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val.json",
|
40 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val",
|
41 |
+
)
|
42 |
+
|
43 |
+
# Make sure the datasets got registered
|
44 |
+
metadata_train = MetadataCatalog.get(dataset_train_name)
|
45 |
+
metadata_test = MetadataCatalog.get(dataset_test_name)
|
46 |
+
print(metadata_train)
|
47 |
+
print(metadata_test)
|
48 |
+
|
49 |
+
# Set labels
|
50 |
+
MetadataCatalog.get(dataset_train_name).thing_classes = class_labels
|
51 |
+
MetadataCatalog.get(dataset_test_name).thing_classes = class_labels
|
52 |
+
|
53 |
+
return dataset_train_name, dataset_test_name
|
54 |
+
|
55 |
+
|
56 |
+
def build_config(
|
57 |
+
model_zoo_config_name: str,
|
58 |
+
dataset_train_name: str,
|
59 |
+
dataset_test_name: str,
|
60 |
+
trained_model_output_dir: str,
|
61 |
+
prediction_score_threshold: float,
|
62 |
+
base_lr: float,
|
63 |
+
max_iter: int,
|
64 |
+
batch_size: int,
|
65 |
+
) -> CfgNode:
|
66 |
+
trained_model_weights_path = trained_model_output_dir + "/model_final.pth"
|
67 |
+
|
68 |
+
cfg = get_cfg()
|
69 |
+
cfg.merge_from_file(model_zoo.get_config_file(model_zoo_config_name))
|
70 |
+
cfg.DATASETS.TRAIN = (dataset_train_name,)
|
71 |
+
cfg.DATASETS.TEST = (dataset_test_name,)
|
72 |
+
cfg.OUTPUT_DIR = trained_model_output_dir
|
73 |
+
cfg.DATALOADER.NUM_WORKERS = 8
|
74 |
+
if os.path.exists(trained_model_weights_path):
|
75 |
+
cfg.MODEL.WEIGHTS = trained_model_weights_path
|
76 |
+
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = prediction_score_threshold
|
77 |
+
cfg.SOLVER.IMS_PER_BATCH = 4
|
78 |
+
cfg.SOLVER.BASE_LR = base_lr
|
79 |
+
cfg.SOLVER.MAX_ITER = max_iter
|
80 |
+
cfg.SOLVER.STEPS = []
|
81 |
+
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size
|
82 |
+
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5
|
83 |
+
cfg.TEST.DETECTIONS_PER_IMAGE = 100
|
84 |
+
# cfg.INPUT.MIN_SIZE_TRAIN = (640, 672, 704, 736, 768, 800)
|
85 |
+
# cfg.INPUT.MIN_SIZE_TRAIN = (600, 632, 664, 696, 728, 760)
|
86 |
+
cfg.INPUT.MIN_SIZE_TRAIN = (580, 612, 644, 676, 708, 740)
|
87 |
+
return cfg
|
88 |
+
|
89 |
+
|
90 |
+
def visualize_outputs(cfg, image_cv, outputs, name):
|
91 |
+
v = Visualizer(
|
92 |
+
image_cv[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2
|
93 |
+
)
|
94 |
+
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
|
95 |
+
output_image_cv = out.get_image()[:, :, ::-1]
|
96 |
+
cv2.imwrite(name, output_image_cv)
|
97 |
+
|
98 |
+
|
99 |
+
def visual_test(cfg: CfgNode, predictor: DefaultPredictor):
|
100 |
+
image_paths = [
|
101 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train/PMC1500815_00002.jpg",
|
102 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train/PMC3162874_00002.jpg",
|
103 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train/PMC4203354_00000.jpg",
|
104 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val/PMC1247188_00003.jpg",
|
105 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val/PMC2829689_00004.jpg",
|
106 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val/PMC4520132_00000.jpg",
|
107 |
+
]
|
108 |
+
|
109 |
+
for i, image_path in enumerate(image_paths):
|
110 |
+
print("Testing on " + image_path)
|
111 |
+
image_pil = open_image_pil(image_path)
|
112 |
+
image_cv = convert_pil_to_cv(image_pil)
|
113 |
+
outputs = predictor(image_cv)
|
114 |
+
visualize_outputs(cfg, image_cv, outputs, f"image_{i}.jpg")
|
mask_rcnn_R_101_FPN_3x/eval.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.data import DatasetCatalog, DatasetMapper
|
2 |
+
from detectron2.engine import DefaultPredictor
|
3 |
+
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
|
4 |
+
from detectron2.data import build_detection_test_loader
|
5 |
+
from utils import (
|
6 |
+
build_config,
|
7 |
+
register_publaynet_datasets,
|
8 |
+
)
|
9 |
+
|
10 |
+
# Register datasets
|
11 |
+
dataset_train_name, dataset_test_name = register_publaynet_datasets()
|
12 |
+
|
13 |
+
# Model parameters
|
14 |
+
model_zoo_config_name = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"
|
15 |
+
trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/mask_rcnn_R_101_FPN_3x/training_output"
|
16 |
+
prediction_score_threshold = 0.7
|
17 |
+
base_lr = 0.0001
|
18 |
+
max_iter = 1000
|
19 |
+
batch_size = 128
|
20 |
+
|
21 |
+
# Detectron config
|
22 |
+
cfg = build_config(
|
23 |
+
model_zoo_config_name,
|
24 |
+
dataset_train_name,
|
25 |
+
dataset_test_name,
|
26 |
+
trained_model_output_dir,
|
27 |
+
prediction_score_threshold,
|
28 |
+
base_lr,
|
29 |
+
max_iter,
|
30 |
+
batch_size,
|
31 |
+
)
|
32 |
+
|
33 |
+
# Detectron predictor
|
34 |
+
predictor = DefaultPredictor(cfg)
|
35 |
+
|
36 |
+
# Load test dataset, and evaluate over it
|
37 |
+
dataset = DatasetCatalog.get(dataset_test_name)
|
38 |
+
dataset_mapper = DatasetMapper(cfg, is_train=False)
|
39 |
+
data_loader = build_detection_test_loader(dataset=dataset, mapper=dataset_mapper)
|
40 |
+
inference_on_dataset(
|
41 |
+
model=predictor.model,
|
42 |
+
data_loader=data_loader,
|
43 |
+
evaluator=COCOEvaluator(dataset_test_name),
|
44 |
+
)
|
mask_rcnn_R_101_FPN_3x/evaluation.txt
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[09/26 01:03:24 d2.evaluation.evaluator]: Total inference time: 0:20:57.157306 (0.111847 s / iter per device, on 1 devices)
|
2 |
+
[09/26 01:03:24 d2.evaluation.evaluator]: Total inference pure compute time: 0:16:27 (0.087851 s / iter per device, on 1 devices)
|
3 |
+
[09/26 01:03:24 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
|
4 |
+
[09/26 01:03:24 d2.evaluation.coco_evaluation]: Evaluating predictions with unofficial COCO API...
|
5 |
+
Loading and preparing results...
|
6 |
+
DONE (t=0.06s)
|
7 |
+
creating index...
|
8 |
+
index created!
|
9 |
+
[09/26 01:03:24 d2.evaluation.fast_eval_api]: Evaluate annotation type *bbox*
|
10 |
+
[09/26 01:03:27 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 2.66 seconds.
|
11 |
+
[09/26 01:03:27 d2.evaluation.fast_eval_api]: Accumulating evaluation results...
|
12 |
+
[09/26 01:03:27 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.41 seconds.
|
13 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.867
|
14 |
+
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.948
|
15 |
+
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.923
|
16 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.290
|
17 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.615
|
18 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.916
|
19 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.514
|
20 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.889
|
21 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.898
|
22 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.313
|
23 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.658
|
24 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.942
|
25 |
+
[09/26 01:03:27 d2.evaluation.coco_evaluation]: Evaluation results for bbox:
|
26 |
+
| AP | AP50 | AP75 | APs | APm | APl |
|
27 |
+
|:------:|:------:|:------:|:------:|:------:|:------:|
|
28 |
+
| 86.690 | 94.839 | 92.308 | 29.010 | 61.459 | 91.559 |
|
29 |
+
[09/26 01:03:27 d2.evaluation.coco_evaluation]: Per-category bbox AP:
|
30 |
+
| category | AP | category | AP | category | AP |
|
31 |
+
|:-----------|:-------|:-----------|:-------|:-----------|:-------|
|
32 |
+
| text | 89.822 | title | 79.101 | list | 80.716 |
|
33 |
+
| table | 94.215 | figure | 89.594 | | |
|
34 |
+
Loading and preparing results...
|
35 |
+
DONE (t=1.41s)
|
36 |
+
creating index...
|
37 |
+
index created!
|
38 |
+
[09/26 01:03:30 d2.evaluation.fast_eval_api]: Evaluate annotation type *segm*
|
39 |
+
[09/26 01:03:39 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 8.26 seconds.
|
40 |
+
[09/26 01:03:39 d2.evaluation.fast_eval_api]: Accumulating evaluation results...
|
41 |
+
[09/26 01:03:39 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.39 seconds.
|
42 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.821
|
43 |
+
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.947
|
44 |
+
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.898
|
45 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.280
|
46 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.569
|
47 |
+
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.862
|
48 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.489
|
49 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.854
|
50 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.862
|
51 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.311
|
52 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.620
|
53 |
+
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.898
|
54 |
+
[09/26 01:03:39 d2.evaluation.coco_evaluation]: Evaluation results for segm:
|
55 |
+
| AP | AP50 | AP75 | APs | APm | APl |
|
56 |
+
|:------:|:------:|:------:|:------:|:------:|:------:|
|
57 |
+
| 82.105 | 94.654 | 89.840 | 28.016 | 56.863 | 86.208 |
|
58 |
+
[09/26 01:03:39 d2.evaluation.coco_evaluation]: Per-category segm AP:
|
59 |
+
| category | AP | category | AP | category | AP |
|
60 |
+
|:-----------|:-------|:-----------|:-------|:-----------|:-------|
|
61 |
+
| text | 88.786 | title | 76.630 | list | 62.243 |
|
62 |
+
| table | 93.647 | figure | 89.217 | | |
|
mask_rcnn_R_101_FPN_3x/test.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.engine import DefaultPredictor
|
2 |
+
from utils import build_config, register_publaynet_datasets, visual_test
|
3 |
+
|
4 |
+
# Register datasets
|
5 |
+
dataset_train_name, dataset_test_name = register_publaynet_datasets()
|
6 |
+
|
7 |
+
# Model parameters
|
8 |
+
model_zoo_config_name = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"
|
9 |
+
trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/mask_rcnn_R_101_FPN_3x/training_output"
|
10 |
+
prediction_score_threshold = 0.7
|
11 |
+
base_lr = 0
|
12 |
+
max_iter = 0
|
13 |
+
batch_size = 0
|
14 |
+
|
15 |
+
# Detectron config
|
16 |
+
cfg = build_config(
|
17 |
+
model_zoo_config_name,
|
18 |
+
dataset_train_name,
|
19 |
+
dataset_test_name,
|
20 |
+
trained_model_output_dir,
|
21 |
+
prediction_score_threshold,
|
22 |
+
base_lr,
|
23 |
+
max_iter,
|
24 |
+
batch_size,
|
25 |
+
)
|
26 |
+
|
27 |
+
# Detectron predictor
|
28 |
+
predictor = DefaultPredictor(cfg)
|
29 |
+
visual_test(cfg, predictor)
|
mask_rcnn_R_101_FPN_3x/train.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.engine import DefaultTrainer
|
2 |
+
from utils import build_config, register_publaynet_datasets
|
3 |
+
|
4 |
+
# Register datasets
|
5 |
+
dataset_train_name, dataset_test_name = register_publaynet_datasets()
|
6 |
+
|
7 |
+
# Model parameters
|
8 |
+
model_zoo_config_name = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"
|
9 |
+
trained_model_output_dir = "/home/alex/Dev/deep_learning/detectron2-publaynet/mask_rcnn_R_101_FPN_3x/training_output"
|
10 |
+
prediction_score_threshold = 0.7
|
11 |
+
base_lr = 0.00001
|
12 |
+
max_iter = 15000
|
13 |
+
batch_size = 128
|
14 |
+
|
15 |
+
# Detectron config
|
16 |
+
cfg = build_config(
|
17 |
+
model_zoo_config_name,
|
18 |
+
dataset_train_name,
|
19 |
+
dataset_test_name,
|
20 |
+
trained_model_output_dir,
|
21 |
+
prediction_score_threshold,
|
22 |
+
base_lr,
|
23 |
+
max_iter,
|
24 |
+
batch_size,
|
25 |
+
)
|
26 |
+
|
27 |
+
# Detectron Trainer
|
28 |
+
trainer = DefaultTrainer(cfg)
|
29 |
+
trainer.resume_or_load(resume=False)
|
30 |
+
trainer.train()
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1725897998.Legion.47938.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f13688b1d5c48925ef86b9abfbc8caddf0b77929a4e635754d1f6be4154023d5
|
3 |
+
size 57112
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726172796.Legion.3380.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:260f4db66bc80b5c0a0cf0bc60d3e1837bcee0d0653c76f2f485c578f3741d29
|
3 |
+
size 171412
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726421934.Legion.80102.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ddd5e4f8e0de29661376bbe5124c4e40cca3e6158be0c2dc75a5af118deb8a6
|
3 |
+
size 20536
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726422787.Legion.1493.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0db25f7e76307a7515fd271c972f11a6a8fe8bd9e56232e0811c835850f38cb0
|
3 |
+
size 20536
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726423456.Legion.4616.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32f5311e5c29787029c04da765c5bf0e735c78cd6d0609116a07f9f15ae38c88
|
3 |
+
size 17107
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/events.out.tfevents.1726423883.Legion.6492.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92134b9c0422d1a7ad420c392d6c1659b867e8e0418011d3116e456495f6da6f
|
3 |
+
size 857212
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/last_checkpoint
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
model_final.pth
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/metrics.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0004999.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d0951119dc4443fd4b3fa7b8a963251e6fd043bf1816ee403da5a649f71881b
|
3 |
+
size 503235392
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0009999.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c84659e4c6e5844940acb6909032f7aee1947818f9d1e2098ceabaa15cc3c579
|
3 |
+
size 503235392
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/model_0014999.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df3036f36789631a59f532fe615f13d4ee1ba13aafd420f1c1cef8536d5cb1fc
|
3 |
+
size 503235392
|
mask_rcnn_R_101_FPN_3x/training_output/training_output/model_final.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df3036f36789631a59f532fe615f13d4ee1ba13aafd420f1c1cef8536d5cb1fc
|
3 |
+
size 503235392
|
mask_rcnn_R_101_FPN_3x/utils.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import List
|
3 |
+
from detectron2.utils.visualizer import Visualizer
|
4 |
+
import cv2
|
5 |
+
import numpy
|
6 |
+
from PIL.Image import Image
|
7 |
+
from PIL import Image as image_main
|
8 |
+
import detectron2
|
9 |
+
from detectron2.utils.logger import setup_logger
|
10 |
+
|
11 |
+
setup_logger()
|
12 |
+
from detectron2 import model_zoo
|
13 |
+
from detectron2.config import get_cfg, CfgNode
|
14 |
+
from detectron2.engine import DefaultTrainer, DefaultPredictor
|
15 |
+
from detectron2.data import MetadataCatalog
|
16 |
+
from detectron2.data.datasets.register_coco import register_coco_instances
|
17 |
+
|
18 |
+
|
19 |
+
def open_image_pil(image_path: str) -> Image:
|
20 |
+
return image_main.open(image_path)
|
21 |
+
|
22 |
+
|
23 |
+
def convert_pil_to_cv(pil_image: Image):
|
24 |
+
if pil_image.mode != "RGB":
|
25 |
+
pil_image = pil_image.convert("RGB")
|
26 |
+
return cv2.cvtColor(numpy.array(pil_image), cv2.COLOR_RGB2BGR)
|
27 |
+
|
28 |
+
|
29 |
+
def register_publaynet_datasets() -> (str, str):
|
30 |
+
dataset_train_name = "publaynet_dataset_train"
|
31 |
+
dataset_test_name = "publaynet_dataset_test"
|
32 |
+
class_labels = ["text", "title", "list", "table", "figure"]
|
33 |
+
register_coco_instances(
|
34 |
+
dataset_train_name,
|
35 |
+
{},
|
36 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train.json",
|
37 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train",
|
38 |
+
)
|
39 |
+
register_coco_instances(
|
40 |
+
dataset_test_name,
|
41 |
+
{},
|
42 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val.json",
|
43 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val",
|
44 |
+
)
|
45 |
+
|
46 |
+
# Make sure the datasets got registered
|
47 |
+
metadata_train = MetadataCatalog.get(dataset_train_name)
|
48 |
+
metadata_test = MetadataCatalog.get(dataset_test_name)
|
49 |
+
print(metadata_train)
|
50 |
+
print(metadata_test)
|
51 |
+
|
52 |
+
# Set labels
|
53 |
+
MetadataCatalog.get(dataset_train_name).thing_classes = class_labels
|
54 |
+
MetadataCatalog.get(dataset_test_name).thing_classes = class_labels
|
55 |
+
|
56 |
+
return dataset_train_name, dataset_test_name
|
57 |
+
|
58 |
+
|
59 |
+
def build_config(
|
60 |
+
model_zoo_config_name: str,
|
61 |
+
dataset_train_name: str,
|
62 |
+
dataset_test_name: str,
|
63 |
+
trained_model_output_dir: str,
|
64 |
+
prediction_score_threshold: float,
|
65 |
+
base_lr: float,
|
66 |
+
max_iter: int,
|
67 |
+
batch_size: int,
|
68 |
+
) -> CfgNode:
|
69 |
+
trained_model_weights_path = trained_model_output_dir + "/model_final.pth"
|
70 |
+
|
71 |
+
cfg = get_cfg()
|
72 |
+
cfg.merge_from_file(model_zoo.get_config_file(model_zoo_config_name))
|
73 |
+
cfg.DATASETS.TRAIN = (dataset_train_name,)
|
74 |
+
cfg.DATASETS.TEST = (dataset_test_name,)
|
75 |
+
cfg.OUTPUT_DIR = trained_model_output_dir
|
76 |
+
cfg.DATALOADER.NUM_WORKERS = 8
|
77 |
+
if os.path.exists(trained_model_weights_path):
|
78 |
+
cfg.MODEL.WEIGHTS = trained_model_weights_path
|
79 |
+
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = prediction_score_threshold
|
80 |
+
cfg.SOLVER.IMS_PER_BATCH = 4
|
81 |
+
cfg.SOLVER.BASE_LR = base_lr
|
82 |
+
cfg.SOLVER.MAX_ITER = max_iter
|
83 |
+
cfg.SOLVER.STEPS = []
|
84 |
+
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size
|
85 |
+
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5
|
86 |
+
cfg.TEST.DETECTIONS_PER_IMAGE = 100
|
87 |
+
# cfg.INPUT.MIN_SIZE_TRAIN = (640, 672, 704, 736, 768, 800)
|
88 |
+
# cfg.INPUT.MIN_SIZE_TRAIN = (600, 632, 664, 696, 728, 760)
|
89 |
+
cfg.INPUT.MIN_SIZE_TRAIN = (580, 612, 644, 676, 708, 740)
|
90 |
+
return cfg
|
91 |
+
|
92 |
+
|
93 |
+
def visualize_outputs(cfg, image_cv, outputs, name):
|
94 |
+
v = Visualizer(
|
95 |
+
image_cv[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2
|
96 |
+
)
|
97 |
+
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
|
98 |
+
output_image_cv = out.get_image()[:, :, ::-1]
|
99 |
+
cv2.imwrite(name, output_image_cv)
|
100 |
+
|
101 |
+
|
102 |
+
def visual_test(cfg: CfgNode, predictor: DefaultPredictor):
|
103 |
+
image_paths = [
|
104 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train/PMC1500815_00002.jpg",
|
105 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train/PMC3162874_00002.jpg",
|
106 |
+
"/home/alex/Datasets/PubLayNet/publaynet/train/PMC4203354_00000.jpg",
|
107 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val/PMC1247188_00003.jpg",
|
108 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val/PMC2829689_00004.jpg",
|
109 |
+
"/home/alex/Datasets/PubLayNet/publaynet/val/PMC4520132_00000.jpg",
|
110 |
+
]
|
111 |
+
|
112 |
+
for i, image_path in enumerate(image_paths):
|
113 |
+
print("Testing on " + image_path)
|
114 |
+
image_pil = open_image_pil(image_path)
|
115 |
+
image_cv = convert_pil_to_cv(image_pil)
|
116 |
+
outputs = predictor(image_cv)
|
117 |
+
visualize_outputs(cfg, image_cv, outputs, f"image_{i}.jpg")
|