napatswift commited on
Commit
f8c7387
Β·
1 Parent(s): 28d6a1a

Add table det

Browse files
main.py CHANGED
@@ -7,11 +7,11 @@ import torch
7
 
8
  print('Loading model...')
9
  device = 'gpu' if torch.cuda.is_available() else 'cpu'
10
- # table_det = init_detector('model/table-det/config.py',
11
- # 'model/table-det/model.pth', device=device)
12
 
13
- ocr = MMOCRInferencer(det='model/det/config.py',
14
- det_weights='model/det/model.pth',
15
  device=device)
16
 
17
  def get_rec(points):
 
7
 
8
  print('Loading model...')
9
  device = 'gpu' if torch.cuda.is_available() else 'cpu'
10
+ table_det = init_detector('model/table-det/config.py',
11
+ 'model/table-det/model.pth', device=device)
12
 
13
+ ocr = MMOCRInferencer(det='model/text-det/config.py',
14
+ det_weights='model/text-det/model.pth',
15
  device=device)
16
 
17
  def get_rec(points):
model/table-det/config.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='MaskRCNN',
3
+ data_preprocessor=dict(
4
+ type='DetDataPreprocessor',
5
+ mean=[103.53, 116.28, 123.675],
6
+ std=[1.0, 1.0, 1.0],
7
+ bgr_to_rgb=False,
8
+ pad_mask=True,
9
+ pad_size_divisor=32),
10
+ backbone=dict(
11
+ type='ResNet',
12
+ depth=50,
13
+ num_stages=4,
14
+ out_indices=(0, 1, 2, 3),
15
+ frozen_stages=1,
16
+ norm_cfg=dict(type='BN', requires_grad=False),
17
+ norm_eval=True,
18
+ style='caffe',
19
+ init_cfg=dict(
20
+ type='Pretrained',
21
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
22
+ neck=dict(
23
+ type='FPN',
24
+ in_channels=[256, 512, 1024, 2048],
25
+ out_channels=256,
26
+ num_outs=5),
27
+ rpn_head=dict(
28
+ type='RPNHead',
29
+ in_channels=256,
30
+ feat_channels=256,
31
+ anchor_generator=dict(
32
+ type='AnchorGenerator',
33
+ scales=[8],
34
+ ratios=[0.5, 1.0, 2.0],
35
+ strides=[4, 8, 16, 32, 64]),
36
+ bbox_coder=dict(
37
+ type='DeltaXYWHBBoxCoder',
38
+ target_means=[0.0, 0.0, 0.0, 0.0],
39
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
40
+ loss_cls=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
42
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
43
+ roi_head=dict(
44
+ type='StandardRoIHead',
45
+ bbox_roi_extractor=dict(
46
+ type='SingleRoIExtractor',
47
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
48
+ out_channels=256,
49
+ featmap_strides=[4, 8, 16, 32]),
50
+ bbox_head=dict(
51
+ type='Shared2FCBBoxHead',
52
+ in_channels=256,
53
+ fc_out_channels=1024,
54
+ roi_feat_size=7,
55
+ num_classes=1,
56
+ bbox_coder=dict(
57
+ type='DeltaXYWHBBoxCoder',
58
+ target_means=[0.0, 0.0, 0.0, 0.0],
59
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
60
+ reg_class_agnostic=False,
61
+ loss_cls=dict(
62
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
63
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
64
+ mask_roi_extractor=dict(
65
+ type='SingleRoIExtractor',
66
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
67
+ out_channels=256,
68
+ featmap_strides=[4, 8, 16, 32]),
69
+ mask_head=dict(
70
+ type='FCNMaskHead',
71
+ num_convs=4,
72
+ in_channels=256,
73
+ conv_out_channels=256,
74
+ num_classes=1,
75
+ loss_mask=dict(
76
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
77
+ train_cfg=dict(
78
+ rpn=dict(
79
+ assigner=dict(
80
+ type='MaxIoUAssigner',
81
+ pos_iou_thr=0.7,
82
+ neg_iou_thr=0.3,
83
+ min_pos_iou=0.3,
84
+ match_low_quality=True,
85
+ ignore_iof_thr=-1),
86
+ sampler=dict(
87
+ type='RandomSampler',
88
+ num=256,
89
+ pos_fraction=0.5,
90
+ neg_pos_ub=-1,
91
+ add_gt_as_proposals=False),
92
+ allowed_border=-1,
93
+ pos_weight=-1,
94
+ debug=False),
95
+ rpn_proposal=dict(
96
+ nms_pre=2000,
97
+ max_per_img=1000,
98
+ nms=dict(type='nms', iou_threshold=0.7),
99
+ min_bbox_size=0),
100
+ rcnn=dict(
101
+ assigner=dict(
102
+ type='MaxIoUAssigner',
103
+ pos_iou_thr=0.5,
104
+ neg_iou_thr=0.5,
105
+ min_pos_iou=0.5,
106
+ match_low_quality=True,
107
+ ignore_iof_thr=-1),
108
+ sampler=dict(
109
+ type='RandomSampler',
110
+ num=512,
111
+ pos_fraction=0.25,
112
+ neg_pos_ub=-1,
113
+ add_gt_as_proposals=True),
114
+ mask_size=28,
115
+ pos_weight=-1,
116
+ debug=False)),
117
+ test_cfg=dict(
118
+ rpn=dict(
119
+ nms_pre=1000,
120
+ max_per_img=1000,
121
+ nms=dict(type='nms', iou_threshold=0.7),
122
+ min_bbox_size=0),
123
+ rcnn=dict(
124
+ score_thr=0.05,
125
+ nms=dict(type='nms', iou_threshold=0.5),
126
+ max_per_img=100,
127
+ mask_thr_binary=0.5)))
128
+ dataset_type = 'CocoDataset'
129
+ data_root = 'data/table-det-740/'
130
+ backend_args = None
131
+ train_pipeline = [
132
+ dict(type='LoadImageFromFile', backend_args=None),
133
+ dict(
134
+ type='LoadAnnotations',
135
+ with_bbox=True,
136
+ with_mask=True,
137
+ poly2mask=False),
138
+ dict(
139
+ type='RandomChoiceResize',
140
+ scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
141
+ (1333, 768), (1333, 800)],
142
+ keep_ratio=True),
143
+ dict(type='RandomFlip', prob=0.5),
144
+ dict(type='PackDetInputs')
145
+ ]
146
+ test_pipeline = [
147
+ dict(type='LoadImageFromFile', backend_args=None),
148
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
149
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
150
+ dict(
151
+ type='PackDetInputs',
152
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
153
+ 'scale_factor'))
154
+ ]
155
+ train_dataloader = dict(
156
+ batch_size=4,
157
+ num_workers=2,
158
+ persistent_workers=True,
159
+ sampler=dict(type='DefaultSampler', shuffle=True),
160
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
161
+ dataset=dict(
162
+ type='CocoDataset',
163
+ data_root='data/table-det-740/',
164
+ ann_file='train_coco.json',
165
+ data_prefix=dict(img=''),
166
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
167
+ pipeline=[
168
+ dict(type='LoadImageFromFile', backend_args=None),
169
+ dict(
170
+ type='LoadAnnotations',
171
+ with_bbox=True,
172
+ with_mask=True,
173
+ poly2mask=False),
174
+ dict(
175
+ type='RandomChoiceResize',
176
+ scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
177
+ (1333, 768), (1333, 800)],
178
+ keep_ratio=True),
179
+ dict(type='RandomFlip', prob=0.5),
180
+ dict(type='PackDetInputs')
181
+ ],
182
+ backend_args=None,
183
+ metainfo=dict(classes=('Table', ), palette=[(220, 20, 60)])))
184
+ val_dataloader = dict(
185
+ batch_size=1,
186
+ num_workers=2,
187
+ persistent_workers=True,
188
+ drop_last=False,
189
+ sampler=dict(type='DefaultSampler', shuffle=False),
190
+ dataset=dict(
191
+ type='CocoDataset',
192
+ data_root='data/table-det-740/',
193
+ ann_file='test_coco.json',
194
+ data_prefix=dict(img=''),
195
+ test_mode=True,
196
+ pipeline=[
197
+ dict(type='LoadImageFromFile', backend_args=None),
198
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
199
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
200
+ dict(
201
+ type='PackDetInputs',
202
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
203
+ 'scale_factor'))
204
+ ],
205
+ backend_args=None,
206
+ metainfo=dict(classes=('Table', ), palette=[(220, 20, 60)])))
207
+ test_dataloader = dict(
208
+ batch_size=1,
209
+ num_workers=2,
210
+ persistent_workers=True,
211
+ drop_last=False,
212
+ sampler=dict(type='DefaultSampler', shuffle=False),
213
+ dataset=dict(
214
+ type='CocoDataset',
215
+ data_root='data/table-det-740/',
216
+ ann_file='test_coco.json',
217
+ data_prefix=dict(img=''),
218
+ test_mode=True,
219
+ pipeline=[
220
+ dict(type='LoadImageFromFile', backend_args=None),
221
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
222
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
223
+ dict(
224
+ type='PackDetInputs',
225
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
226
+ 'scale_factor'))
227
+ ],
228
+ backend_args=None,
229
+ metainfo=dict(classes=('Table', ), palette=[(220, 20, 60)])))
230
+ val_evaluator = dict(
231
+ type='CocoMetric',
232
+ ann_file='data/table-det-740/test_coco.json',
233
+ metric=['bbox', 'segm'],
234
+ format_only=False,
235
+ backend_args=None)
236
+ test_evaluator = dict(
237
+ type='CocoMetric',
238
+ ann_file='data/table-det-740/test_coco.json',
239
+ metric=['bbox', 'segm'],
240
+ format_only=False,
241
+ backend_args=None)
242
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
243
+ val_cfg = dict(type='ValLoop')
244
+ test_cfg = dict(type='TestLoop')
245
+ param_scheduler = [
246
+ dict(
247
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
248
+ dict(
249
+ type='MultiStepLR',
250
+ begin=0,
251
+ end=12,
252
+ by_epoch=True,
253
+ milestones=[8, 11],
254
+ gamma=0.1)
255
+ ]
256
+ optim_wrapper = dict(
257
+ type='OptimWrapper',
258
+ optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
259
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
260
+ default_scope = 'mmdet'
261
+ default_hooks = dict(
262
+ timer=dict(type='IterTimerHook'),
263
+ logger=dict(type='LoggerHook', interval=50),
264
+ param_scheduler=dict(type='ParamSchedulerHook'),
265
+ checkpoint=dict(type='CheckpointHook', interval=1),
266
+ sampler_seed=dict(type='DistSamplerSeedHook'),
267
+ visualization=dict(type='DetVisualizationHook'))
268
+ env_cfg = dict(
269
+ cudnn_benchmark=False,
270
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
271
+ dist_cfg=dict(backend='nccl'))
272
+ vis_backends = [dict(type='LocalVisBackend')]
273
+ visualizer = dict(
274
+ type='DetLocalVisualizer',
275
+ vis_backends=[dict(type='LocalVisBackend')],
276
+ name='visualizer')
277
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
278
+ log_level = 'INFO'
279
+ load_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
280
+ resume = False
281
+ metainfo = dict(classes=('Table', ), palette=[(220, 20, 60)])
282
+ launcher = 'none'
283
+ work_dir = './work_dirs/vote-config'
model/table-det/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580befce0957f18f2dde80eafa744b5128d7ba1fbb08ea0a8ea18bcaab7d0b50
3
+ size 351236105
model/{det β†’ text-det}/config.py RENAMED
File without changes
model/{det β†’ text-det}/model.pth RENAMED
File without changes
model/{recog β†’ text-recog}/config.py RENAMED
File without changes
model/{recog β†’ text-recog}/model.pth RENAMED
File without changes