Spaces:
Running
Running
File size: 4,381 Bytes
29f689c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
Global:
device: gpu
epoch_num: 20
log_smooth_window: 20
print_batch_step: 10
output_dir: ./output/rec/u14m_filter/svtr_base_igtr
save_epoch_step: 1
# evaluation is run every 2000 iterations
eval_batch_step: [0, 500]
eval_epoch_step: [0, 1]
cal_metric_during_train: True
pretrained_model:
checkpoints:
use_tensorboard: false
infer_img:
# for data or label process
character_dict_path: &character_dict_path
# ./tools/utils/EN_symbol_dict.txt # 96en
# ./tools/utils/ppocr_keys_v1.txt # ch
max_text_length: &max_text_length 25
use_space_char: &use_space_char False
save_res_path: ./output/rec/u14m_filter/predicts_svtr_base_igtr.txt
use_amp: True
Optimizer:
name: AdamW
lr: 0.0005 # 2gpus 384bs/gpu
weight_decay: 0.05
filter_bias_and_bn: True
LRScheduler:
name: OneCycleLR
warmup_epoch: 1.5
cycle_momentum: False
Architecture:
model_type: rec
algorithm: IGTR
in_channels: 3
Transform:
Encoder:
name: SVTRNet2DPos
img_size: [32, -1]
out_char_num: 25
out_channels: 256
patch_merging: 'Conv'
embed_dim: [128, 256, 384]
depth: [6, 6, 6]
num_heads: [4, 8, 12]
mixer: ['ConvB','ConvB','ConvB','ConvB','ConvB','ConvB', 'ConvB','ConvB', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
local_mixer: [[5, 5], [5, 5], [5, 5]]
last_stage: False
prenorm: True
use_first_sub: False
Decoder:
name: IGTRDecoder
dim: 384
num_layer: 1
ar: False
refine_iter: 0
# next_pred: True
next_pred: False
pos2d: True
ds: True
# pos_len: False
# rec_layer: 1
Loss:
name: IGTRLoss
PostProcess:
name: IGTRLabelDecode
character_dict_path: *character_dict_path
use_space_char: *use_space_char
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: RatioDataSet
ds_width: True
padding: &padding False
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
'../Union14M-L-LMDB-Filtered/filter_train_hard',
'../Union14M-L-LMDB-Filtered/filter_train_medium',
'../Union14M-L-LMDB-Filtered/filter_train_normal',
'../Union14M-L-LMDB-Filtered/filter_train_easy',
]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- PARSeqAug:
- IGTRLabelEncode: # Class handling label
k: 8
prompt_error: False
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- KeepKeys:
keep_keys: ['image', 'label', 'prompt_pos_idx_list',
'prompt_char_idx_list', 'ques_pos_idx_list', 'ques1_answer_list',
'ques2_char_idx_list', 'ques2_answer_list', 'ques3_answer', 'ques4_char_num_list',
'ques_len_list', 'ques2_len_list', 'prompt_len_list', 'length'] # dataloader will return list in this order
sampler:
name: RatioSampler
scales: [[128, 32]] # w, h
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
first_bs: &bs 384
fix_bs: false
divided_factor: [4, 16] # w, h
is_training: True
loader:
shuffle: True
batch_size_per_card: *bs
drop_last: True
max_ratio: &max_ratio 4
num_workers: 4
Eval:
dataset:
name: RatioDataSet
ds_width: True
padding: *padding
data_dir_list: ['../evaluation/CUTE80',
'../evaluation/IC13_857',
'../evaluation/IC15_1811',
'../evaluation/IIIT5k',
'../evaluation/SVT',
'../evaluation/SVTP']
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- ARLabelEncode: # Class handling label
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
sampler:
name: RatioSampler
scales: [[128, 32]] # w, h
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
first_bs: 256
fix_bs: false
divided_factor: [4, 16] # w, h
is_training: False
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
max_ratio: *max_ratio
num_workers: 4
|