napatswift commited on
Commit
49aa0b6
·
1 Parent(s): 1d4a6c0

Update weights - 40e

Browse files
model/text-det/psenet.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42af4c913d9af97e9beeed47185fac769bcf65b7a653195e22340748d9eb335
3
- size 352719845
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9fd081c8004a7a8a6f3d1bab370637a819b8dcefe0d9c23c54c2d2502339aa
3
+ size 353251813
model/text-det/psenet.py CHANGED
@@ -29,114 +29,6 @@ model = dict(
29
  std=[58.395, 57.12, 57.375],
30
  bgr_to_rgb=True,
31
  pad_size_divisor=32))
32
- train_pipeline = [
33
- dict(
34
- type='LoadImageFromFile',
35
- file_client_args=dict(backend='disk'),
36
- color_type='color_ignore_orientation'),
37
- dict(
38
- type='LoadOCRAnnotations',
39
- with_polygon=True,
40
- with_bbox=True,
41
- with_label=True),
42
- dict(
43
- type='TorchVisionWrapper',
44
- op='ColorJitter',
45
- brightness=0.12549019607843137,
46
- saturation=0.5),
47
- dict(type='FixInvalidPolygon'),
48
- dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
49
- dict(type='RandomFlip', prob=0.5, direction='horizontal'),
50
- dict(type='RandomRotate', max_angle=10),
51
- dict(type='TextDetRandomCrop', target_size=(736, 736)),
52
- dict(type='Pad', size=(736, 736)),
53
- dict(
54
- type='PackTextDetInputs',
55
- meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
56
- ]
57
- test_pipeline = [
58
- dict(
59
- type='LoadImageFromFile',
60
- file_client_args=dict(backend='disk'),
61
- color_type='color_ignore_orientation'),
62
- dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
63
- dict(
64
- type='LoadOCRAnnotations',
65
- with_polygon=True,
66
- with_bbox=True,
67
- with_label=True),
68
- dict(
69
- type='PackTextDetInputs',
70
- meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
71
- ]
72
- thvc_textdet_data_root = 'data/det/vl+vc-textdet'
73
- thvc_textdet_train = dict(
74
- type='OCRDataset',
75
- data_root='data/det/vl+vc-textdet',
76
- ann_file='textdet_train.json',
77
- data_prefix=dict(img_path='imgs/'),
78
- filter_cfg=dict(filter_empty_gt=True, min_size=32),
79
- pipeline=[
80
- dict(
81
- type='LoadImageFromFile',
82
- file_client_args=dict(backend='disk'),
83
- color_type='color_ignore_orientation'),
84
- dict(
85
- type='LoadOCRAnnotations',
86
- with_polygon=True,
87
- with_bbox=True,
88
- with_label=True),
89
- dict(
90
- type='TorchVisionWrapper',
91
- op='ColorJitter',
92
- brightness=0.12549019607843137,
93
- saturation=0.5),
94
- dict(type='FixInvalidPolygon'),
95
- dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
96
- dict(type='RandomFlip', prob=0.5, direction='horizontal'),
97
- dict(type='RandomRotate', max_angle=10),
98
- dict(type='TextDetRandomCrop', target_size=(736, 736)),
99
- dict(type='Pad', size=(736, 736)),
100
- dict(
101
- type='PackTextDetInputs',
102
- meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
103
- ])
104
- thvc_textdet_test = dict(
105
- type='OCRDataset',
106
- data_root='data/det/vl+vc-textdet',
107
- ann_file='textdet_test.json',
108
- data_prefix=dict(img_path='imgs/'),
109
- test_mode=True,
110
- pipeline=None)
111
- thvote_textdet_data_root = 'data/det/textdet-thvote'
112
- thvote_textdet_train = dict(
113
- type='OCRDataset',
114
- data_root='data/det/textdet-thvote',
115
- ann_file='textdet_train.json',
116
- data_prefix=dict(img_path='imgs/'),
117
- filter_cfg=dict(filter_empty_gt=True, min_size=32),
118
- pipeline=None)
119
- thvote_textdet_test = dict(
120
- type='OCRDataset',
121
- data_root='data/det/textdet-thvote',
122
- ann_file='textdet_test.json',
123
- data_prefix=dict(img_path='imgs/'),
124
- test_mode=True,
125
- pipeline=[
126
- dict(
127
- type='LoadImageFromFile',
128
- file_client_args=dict(backend='disk'),
129
- color_type='color_ignore_orientation'),
130
- dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
131
- dict(
132
- type='LoadOCRAnnotations',
133
- with_polygon=True,
134
- with_bbox=True,
135
- with_label=True),
136
- dict(
137
- type='PackTextDetInputs',
138
- meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
139
- ])
140
  default_scope = 'mmocr'
141
  env_cfg = dict(
142
  cudnn_benchmark=True,
@@ -168,65 +60,13 @@ visualizer = dict(
168
  type='TextDetLocalVisualizer',
169
  name='visualizer',
170
  vis_backends=[dict(type='LocalVisBackend')])
171
- max_epochs = 200
172
  optim_wrapper = dict(
173
  type='OptimWrapper', optimizer=dict(type='Adam', lr=0.001))
174
  train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=50, val_interval=20)
175
  val_cfg = dict(type='ValLoop')
176
  test_cfg = dict(type='TestLoop')
177
- param_scheduler = [dict(type='PolyLR', power=0.9, end=200)]
178
- thvotecount_textdet_train = dict(
179
- type='OCRDataset',
180
- data_root='data/det/vl+vc-textdet',
181
- ann_file='textdet_train.json',
182
- data_prefix=dict(img_path='imgs/'),
183
- filter_cfg=dict(filter_empty_gt=True, min_size=32),
184
- pipeline=[
185
- dict(
186
- type='LoadImageFromFile',
187
- file_client_args=dict(backend='disk'),
188
- color_type='color_ignore_orientation'),
189
- dict(
190
- type='LoadOCRAnnotations',
191
- with_polygon=True,
192
- with_bbox=True,
193
- with_label=True),
194
- dict(
195
- type='TorchVisionWrapper',
196
- op='ColorJitter',
197
- brightness=0.12549019607843137,
198
- saturation=0.5),
199
- dict(type='FixInvalidPolygon'),
200
- dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
201
- dict(type='RandomFlip', prob=0.5, direction='horizontal'),
202
- dict(type='RandomRotate', max_angle=10),
203
- dict(type='TextDetRandomCrop', target_size=(736, 736)),
204
- dict(type='Pad', size=(736, 736)),
205
- dict(
206
- type='PackTextDetInputs',
207
- meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
208
- ])
209
- thvotecount_textdet_test = dict(
210
- type='OCRDataset',
211
- data_root='data/det/textdet-thvote',
212
- ann_file='textdet_test.json',
213
- data_prefix=dict(img_path='imgs/'),
214
- test_mode=True,
215
- pipeline=[
216
- dict(
217
- type='LoadImageFromFile',
218
- file_client_args=dict(backend='disk'),
219
- color_type='color_ignore_orientation'),
220
- dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
221
- dict(
222
- type='LoadOCRAnnotations',
223
- with_polygon=True,
224
- with_bbox=True,
225
- with_label=True),
226
- dict(
227
- type='PackTextDetInputs',
228
- meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
229
- ])
230
  train_dataloader = dict(
231
  batch_size=10,
232
  num_workers=16,
@@ -258,7 +98,6 @@ train_dataloader = dict(
258
  type='ShortScaleAspectJitter',
259
  short_size=736,
260
  scale_divisor=32),
261
- dict(type='RandomFlip', prob=0.5, direction='horizontal'),
262
  dict(type='RandomRotate', max_angle=10),
263
  dict(type='TextDetRandomCrop', target_size=(736, 736)),
264
  dict(type='Pad', size=(736, 736)),
 
29
  std=[58.395, 57.12, 57.375],
30
  bgr_to_rgb=True,
31
  pad_size_divisor=32))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  default_scope = 'mmocr'
33
  env_cfg = dict(
34
  cudnn_benchmark=True,
 
60
  type='TextDetLocalVisualizer',
61
  name='visualizer',
62
  vis_backends=[dict(type='LocalVisBackend')])
63
+ max_epochs = 50
64
  optim_wrapper = dict(
65
  type='OptimWrapper', optimizer=dict(type='Adam', lr=0.001))
66
  train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=50, val_interval=20)
67
  val_cfg = dict(type='ValLoop')
68
  test_cfg = dict(type='TestLoop')
69
+ param_scheduler = [dict(type='PolyLR', power=0.9, end=50)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  train_dataloader = dict(
71
  batch_size=10,
72
  num_workers=16,
 
98
  type='ShortScaleAspectJitter',
99
  short_size=736,
100
  scale_divisor=32),
 
101
  dict(type='RandomRotate', max_angle=10),
102
  dict(type='TextDetRandomCrop', target_size=(736, 736)),
103
  dict(type='Pad', size=(736, 736)),