napatswift commited on
Commit
cc0b98f
·
1 Parent(s): f25f280

Update model

Browse files
Files changed (2) hide show
  1. model/det/config.py +133 -19
  2. model/det/model.pth +2 -2
model/det/config.py CHANGED
@@ -42,9 +42,20 @@ train_pipeline = [
42
  dict(
43
  type='ImgAugWrapper',
44
  args=[['Fliplr', 0.5], {
 
 
 
 
45
  'cls': 'Affine',
46
  'rotate': [-10, 10]
47
- }, ['Resize', [0.5, 3.0]]]),
 
 
 
 
 
 
 
48
  dict(type='RandomCrop', min_side_ratio=0.1),
49
  dict(type='Resize', scale=(640, 640), keep_ratio=True),
50
  dict(type='Pad', size=(640, 640)),
@@ -67,10 +78,10 @@ test_pipeline = [
67
  type='PackTextDetInputs',
68
  meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
69
  ]
70
- icdar2015_textdet_data_root = 'data/det/textdet-thvote'
71
- icdar2015_textdet_train = dict(
72
  type='OCRDataset',
73
- data_root='data/det/textdet-thvote',
74
  ann_file='textdet_train.json',
75
  data_prefix=dict(img_path='imgs/'),
76
  filter_cfg=dict(filter_empty_gt=True, min_size=32),
@@ -92,9 +103,20 @@ icdar2015_textdet_train = dict(
92
  dict(
93
  type='ImgAugWrapper',
94
  args=[['Fliplr', 0.5], {
 
 
 
 
95
  'cls': 'Affine',
96
  'rotate': [-10, 10]
97
- }, ['Resize', [0.5, 3.0]]]),
 
 
 
 
 
 
 
98
  dict(type='RandomCrop', min_side_ratio=0.1),
99
  dict(type='Resize', scale=(640, 640), keep_ratio=True),
100
  dict(type='Pad', size=(640, 640)),
@@ -102,7 +124,22 @@ icdar2015_textdet_train = dict(
102
  type='PackTextDetInputs',
103
  meta_keys=('img_path', 'ori_shape', 'img_shape'))
104
  ])
105
- icdar2015_textdet_test = dict(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  type='OCRDataset',
107
  data_root='data/det/textdet-thvote',
108
  ann_file='textdet_test.json',
@@ -133,7 +170,7 @@ default_hooks = dict(
133
  timer=dict(type='IterTimerHook'),
134
  logger=dict(type='LoggerHook', interval=5),
135
  param_scheduler=dict(type='ParamSchedulerHook'),
136
- checkpoint=dict(type='CheckpointHook', interval=20),
137
  sampler_seed=dict(type='DistSamplerSeedHook'),
138
  sync_buffer=dict(type='SyncBuffersHook'),
139
  visualization=dict(
@@ -146,7 +183,7 @@ default_hooks = dict(
146
  log_level = 'INFO'
147
  log_processor = dict(type='LogProcessor', window_size=10, by_epoch=True)
148
  load_from = None
149
- resume = False
150
  val_evaluator = dict(type='HmeanIOUMetric')
151
  test_evaluator = dict(type='HmeanIOUMetric')
152
  vis_backends = [dict(type='LocalVisBackend')]
@@ -154,21 +191,87 @@ visualizer = dict(
154
  type='TextDetLocalVisualizer',
155
  name='visualizer',
156
  vis_backends=[dict(type='LocalVisBackend')])
 
157
  optim_wrapper = dict(
158
- type='OptimWrapper',
159
- optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
160
- train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=1200, val_interval=20)
161
  val_cfg = dict(type='ValLoop')
162
  test_cfg = dict(type='TestLoop')
163
- param_scheduler = [dict(type='PolyLR', power=0.9, eta_min=1e-07, end=1200)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  train_dataloader = dict(
165
- batch_size=16,
166
  num_workers=8,
167
  persistent_workers=True,
168
  sampler=dict(type='DefaultSampler', shuffle=True),
169
  dataset=dict(
170
  type='OCRDataset',
171
- data_root='data/det/textdet-thvote',
172
  ann_file='textdet_train.json',
173
  data_prefix=dict(img_path='imgs/'),
174
  filter_cfg=dict(filter_empty_gt=True, min_size=32),
@@ -190,9 +293,20 @@ train_dataloader = dict(
190
  dict(
191
  type='ImgAugWrapper',
192
  args=[['Fliplr', 0.5], {
 
 
 
 
193
  'cls': 'Affine',
194
  'rotate': [-10, 10]
195
- }, ['Resize', [0.5, 3.0]]]),
 
 
 
 
 
 
 
196
  dict(type='RandomCrop', min_side_ratio=0.1),
197
  dict(type='Resize', scale=(640, 640), keep_ratio=True),
198
  dict(type='Pad', size=(640, 640)),
@@ -201,7 +315,7 @@ train_dataloader = dict(
201
  meta_keys=('img_path', 'ori_shape', 'img_shape'))
202
  ]))
203
  val_dataloader = dict(
204
- batch_size=1,
205
  num_workers=4,
206
  persistent_workers=True,
207
  sampler=dict(type='DefaultSampler', shuffle=False),
@@ -228,7 +342,7 @@ val_dataloader = dict(
228
  'scale_factor'))
229
  ]))
230
  test_dataloader = dict(
231
- batch_size=1,
232
  num_workers=4,
233
  persistent_workers=True,
234
  sampler=dict(type='DefaultSampler', shuffle=False),
@@ -254,6 +368,6 @@ test_dataloader = dict(
254
  meta_keys=('img_path', 'ori_shape', 'img_shape',
255
  'scale_factor'))
256
  ]))
257
- auto_scale_lr = dict(base_batch_size=16)
258
  launcher = 'none'
259
- work_dir = './work_dirs/dbnet_resnet18_fpnc_1200e_icdar2015'
 
42
  dict(
43
  type='ImgAugWrapper',
44
  args=[['Fliplr', 0.5], {
45
+ 'cls': 'CoarseDropout',
46
+ 'p': (0.0005, 0.001),
47
+ 'size_percent': 0.4
48
+ }, {
49
  'cls': 'Affine',
50
  'rotate': [-10, 10]
51
+ }, {
52
+ 'cls': 'GaussianBlur',
53
+ 'sigma': (1, 1.6)
54
+ }, ['Resize', [0.5, 3.0]], {
55
+ 'cls': 'CoarseDropout',
56
+ 'p': (0.0005, 0.001),
57
+ 'size_percent': 0.4
58
+ }]),
59
  dict(type='RandomCrop', min_side_ratio=0.1),
60
  dict(type='Resize', scale=(640, 640), keep_ratio=True),
61
  dict(type='Pad', size=(640, 640)),
 
78
  type='PackTextDetInputs',
79
  meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
80
  ]
81
+ thvc_textdet_data_root = 'data/det/vl+vc-textdet'
82
+ thvc_textdet_train = dict(
83
  type='OCRDataset',
84
+ data_root='data/det/vl+vc-textdet',
85
  ann_file='textdet_train.json',
86
  data_prefix=dict(img_path='imgs/'),
87
  filter_cfg=dict(filter_empty_gt=True, min_size=32),
 
103
  dict(
104
  type='ImgAugWrapper',
105
  args=[['Fliplr', 0.5], {
106
+ 'cls': 'CoarseDropout',
107
+ 'p': (0.0005, 0.001),
108
+ 'size_percent': 0.4
109
+ }, {
110
  'cls': 'Affine',
111
  'rotate': [-10, 10]
112
+ }, {
113
+ 'cls': 'GaussianBlur',
114
+ 'sigma': (1, 1.6)
115
+ }, ['Resize', [0.5, 3.0]], {
116
+ 'cls': 'CoarseDropout',
117
+ 'p': (0.0005, 0.001),
118
+ 'size_percent': 0.4
119
+ }]),
120
  dict(type='RandomCrop', min_side_ratio=0.1),
121
  dict(type='Resize', scale=(640, 640), keep_ratio=True),
122
  dict(type='Pad', size=(640, 640)),
 
124
  type='PackTextDetInputs',
125
  meta_keys=('img_path', 'ori_shape', 'img_shape'))
126
  ])
127
+ thvc_textdet_test = dict(
128
+ type='OCRDataset',
129
+ data_root='data/det/vl+vc-textdet',
130
+ ann_file='textdet_test.json',
131
+ data_prefix=dict(img_path='imgs/'),
132
+ test_mode=True,
133
+ pipeline=None)
134
+ thvote_textdet_data_root = 'data/det/textdet-thvote'
135
+ thvote_textdet_train = dict(
136
+ type='OCRDataset',
137
+ data_root='data/det/textdet-thvote',
138
+ ann_file='textdet_train.json',
139
+ data_prefix=dict(img_path='imgs/'),
140
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
141
+ pipeline=None)
142
+ thvote_textdet_test = dict(
143
  type='OCRDataset',
144
  data_root='data/det/textdet-thvote',
145
  ann_file='textdet_test.json',
 
170
  timer=dict(type='IterTimerHook'),
171
  logger=dict(type='LoggerHook', interval=5),
172
  param_scheduler=dict(type='ParamSchedulerHook'),
173
+ checkpoint=dict(type='CheckpointHook', interval=5),
174
  sampler_seed=dict(type='DistSamplerSeedHook'),
175
  sync_buffer=dict(type='SyncBuffersHook'),
176
  visualization=dict(
 
183
  log_level = 'INFO'
184
  log_processor = dict(type='LogProcessor', window_size=10, by_epoch=True)
185
  load_from = None
186
+ resume = True
187
  val_evaluator = dict(type='HmeanIOUMetric')
188
  test_evaluator = dict(type='HmeanIOUMetric')
189
  vis_backends = [dict(type='LocalVisBackend')]
 
191
  type='TextDetLocalVisualizer',
192
  name='visualizer',
193
  vis_backends=[dict(type='LocalVisBackend')])
194
+ max_epochs = 30
195
  optim_wrapper = dict(
196
+ type='OptimWrapper', optimizer=dict(type='Adam', lr=0.001))
197
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=30, val_interval=10)
 
198
  val_cfg = dict(type='ValLoop')
199
  test_cfg = dict(type='TestLoop')
200
+ param_scheduler = [dict(type='PolyLR', power=0.9, end=30)]
201
+ thvotecount_textdet_train = dict(
202
+ type='OCRDataset',
203
+ data_root='data/det/vl+vc-textdet',
204
+ ann_file='textdet_train.json',
205
+ data_prefix=dict(img_path='imgs/'),
206
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
207
+ pipeline=[
208
+ dict(
209
+ type='LoadImageFromFile',
210
+ file_client_args=dict(backend='disk'),
211
+ color_type='color_ignore_orientation'),
212
+ dict(
213
+ type='LoadOCRAnnotations',
214
+ with_polygon=True,
215
+ with_bbox=True,
216
+ with_label=True),
217
+ dict(
218
+ type='TorchVisionWrapper',
219
+ op='ColorJitter',
220
+ brightness=0.12549019607843137,
221
+ saturation=0.5),
222
+ dict(
223
+ type='ImgAugWrapper',
224
+ args=[['Fliplr', 0.5], {
225
+ 'cls': 'CoarseDropout',
226
+ 'p': (0.0005, 0.001),
227
+ 'size_percent': 0.4
228
+ }, {
229
+ 'cls': 'Affine',
230
+ 'rotate': [-10, 10]
231
+ }, {
232
+ 'cls': 'GaussianBlur',
233
+ 'sigma': (1, 1.6)
234
+ }, ['Resize', [0.5, 3.0]], {
235
+ 'cls': 'CoarseDropout',
236
+ 'p': (0.0005, 0.001),
237
+ 'size_percent': 0.4
238
+ }]),
239
+ dict(type='RandomCrop', min_side_ratio=0.1),
240
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
241
+ dict(type='Pad', size=(640, 640)),
242
+ dict(
243
+ type='PackTextDetInputs',
244
+ meta_keys=('img_path', 'ori_shape', 'img_shape'))
245
+ ])
246
+ thvotecount_textdet_test = dict(
247
+ type='OCRDataset',
248
+ data_root='data/det/textdet-thvote',
249
+ ann_file='textdet_test.json',
250
+ data_prefix=dict(img_path='imgs/'),
251
+ test_mode=True,
252
+ pipeline=[
253
+ dict(
254
+ type='LoadImageFromFile',
255
+ file_client_args=dict(backend='disk'),
256
+ color_type='color_ignore_orientation'),
257
+ dict(type='Resize', scale=(1333, 736), keep_ratio=True),
258
+ dict(
259
+ type='LoadOCRAnnotations',
260
+ with_polygon=True,
261
+ with_bbox=True,
262
+ with_label=True),
263
+ dict(
264
+ type='PackTextDetInputs',
265
+ meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
266
+ ])
267
  train_dataloader = dict(
268
+ batch_size=40,
269
  num_workers=8,
270
  persistent_workers=True,
271
  sampler=dict(type='DefaultSampler', shuffle=True),
272
  dataset=dict(
273
  type='OCRDataset',
274
+ data_root='data/det/vl+vc-textdet',
275
  ann_file='textdet_train.json',
276
  data_prefix=dict(img_path='imgs/'),
277
  filter_cfg=dict(filter_empty_gt=True, min_size=32),
 
293
  dict(
294
  type='ImgAugWrapper',
295
  args=[['Fliplr', 0.5], {
296
+ 'cls': 'CoarseDropout',
297
+ 'p': (0.0005, 0.001),
298
+ 'size_percent': 0.4
299
+ }, {
300
  'cls': 'Affine',
301
  'rotate': [-10, 10]
302
+ }, {
303
+ 'cls': 'GaussianBlur',
304
+ 'sigma': (1, 1.6)
305
+ }, ['Resize', [0.5, 3.0]], {
306
+ 'cls': 'CoarseDropout',
307
+ 'p': (0.0005, 0.001),
308
+ 'size_percent': 0.4
309
+ }]),
310
  dict(type='RandomCrop', min_side_ratio=0.1),
311
  dict(type='Resize', scale=(640, 640), keep_ratio=True),
312
  dict(type='Pad', size=(640, 640)),
 
315
  meta_keys=('img_path', 'ori_shape', 'img_shape'))
316
  ]))
317
  val_dataloader = dict(
318
+ batch_size=8,
319
  num_workers=4,
320
  persistent_workers=True,
321
  sampler=dict(type='DefaultSampler', shuffle=False),
 
342
  'scale_factor'))
343
  ]))
344
  test_dataloader = dict(
345
+ batch_size=8,
346
  num_workers=4,
347
  persistent_workers=True,
348
  sampler=dict(type='DefaultSampler', shuffle=False),
 
368
  meta_keys=('img_path', 'ori_shape', 'img_shape',
369
  'scale_factor'))
370
  ]))
371
+ auto_scale_lr = dict(base_batch_size=100)
372
  launcher = 'none'
373
+ work_dir = './work_dirs/dbnet_resnet18_fpnc_votecount'
model/det/model.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8de2f2fe74ea3d941fe56373001209d13904d8d313f6bdfbeb4e1142f321e8ec
3
- size 100074073
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e79675b6bab2c627fbf19196dd0baf57c7016958892e75ffb0eed0bc3c467b
3
+ size 148596609