Spaces:

topdu
/

OpenOCR-Demo

Running

App Files Files Community

OpenOCR-Demo / configs /rec /svtrv2 /svtrv2_smtr_gtc_rctc_ch.yml

topdu

openocr demo

29f689c about 2 months ago

raw

history blame

4.35 kB

	Global:
	device: gpu
	epoch_num: 100
	log_smooth_window: 20
	print_batch_step: 10
	output_dir: ./output/rec/ch/svtrv2_smtr_gtc_rctc_ch
	save_epoch_step: 1
	# evaluation is run every 2000 iterations
	eval_batch_step: [0, 2000]
	eval_epoch_step: [0, 1]
	cal_metric_during_train: False
	pretrained_model:
	# ./output/rec/u14m_filter/svtrv2_rctc/best.pth
	checkpoints:
	use_tensorboard: false
	infer_img:
	# for data or label process
	character_dict_path: &character_dict_path ./tools/utils/ppocr_keys_v1.txt # ch
	max_text_length: &max_text_length 25
	use_space_char: &use_space_char False
	save_res_path: ./output/rec/ch/predicts_svtrv2_smtr_gtc_rctc_ch.txt
	use_amp: True

	Optimizer:
	name: AdamW
	lr: 0.00065 # for 4gpus bs256/gpu
	weight_decay: 0.05
	filter_bias_and_bn: True

	LRScheduler:
	name: OneCycleLR
	warmup_epoch: 5
	cycle_momentum: False

	Architecture:
	model_type: rec
	algorithm: SVTRv2
	in_channels: 3
	Transform:
	Encoder:
	name: SVTRv2LNConvTwo33
	use_pos_embed: False
	dims: [128, 256, 384]
	depths: [6, 6, 6]
	num_heads: [4, 8, 12]
	mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
	local_k: [[5, 5], [5, 5], [-1, -1]]
	sub_k: [[1, 1], [2, 1], [-1, -1]]
	last_stage: false
	feat2d: True
	Decoder:
	name: GTCDecoder
	infer_gtc: True
	detach: False
	gtc_decoder:
	name: SMTRDecoder
	num_layer: 1
	ds: True
	max_len: *max_text_length
	next_mode: &next True
	sub_str_len: &subsl 5
	ctc_decoder:
	name: RCTCDecoder

	Loss:
	name: GTCLoss
	ctc_weight: 0.1
	gtc_loss:
	name: SMTRLoss

	PostProcess:
	name: GTCLabelDecode
	gtc_label_decode:
	name: SMTRLabelDecode
	next_mode: *next
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char

	Metric:
	name: RecGTCMetric
	main_indicator: acc
	# is_filter: True

	Train:
	dataset:
	name: RatioDataSetTVResize
	ds_width: True
	padding: false
	data_dir_list: ['../benchmark_bctr/benchmark_bctr_train/document_train',
	'../benchmark_bctr/benchmark_bctr_train/handwriting_train',
	'../benchmark_bctr/benchmark_bctr_train/scene_train',
	'../benchmark_bctr/benchmark_bctr_train/web_train'
	]
	transforms:
	- DecodeImagePIL: # load image
	img_mode: RGB
	- PARSeqAugPIL:
	- GTCLabelEncode: # Class handling label
	gtc_label_encode:
	name: SMTRLabelEncode
	sub_str_len: *subsl
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char
	max_text_length: *max_text_length
	- KeepKeys:
	keep_keys: ['image', 'label', 'label_subs', 'label_next', 'length_subs',
	'label_subs_pre', 'label_next_pre', 'length_subs_pre', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
	sampler:
	name: RatioSampler
	scales: [[128, 32]] # w, h
	# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
	first_bs: &bs 256
	fix_bs: false
	divided_factor: [4, 16] # w, h
	is_training: True
	loader:
	shuffle: True
	batch_size_per_card: *bs
	drop_last: True
	max_ratio: &max_ratio 8
	num_workers: 4

	Eval:
	dataset:
	name: RatioDataSetTVResize
	ds_width: True
	padding: False
	data_dir_list: ['../benchmark_bctr/benchmark_bctr_test/scene_test']
	transforms:
	- DecodeImagePIL: # load image
	img_mode: RGB
	- GTCLabelEncode: # Class handling label
	gtc_label_encode:
	name: ARLabelEncode
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char
	max_text_length: *max_text_length
	- KeepKeys:
	keep_keys: ['image', 'label', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
	sampler:
	name: RatioSampler
	scales: [[128, 32]] # w, h
	# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
	first_bs: *bs
	fix_bs: false
	divided_factor: [4, 16] # w, h
	is_training: False
	loader:
	shuffle: False
	drop_last: False
	batch_size_per_card: *bs
	max_ratio: *max_ratio
	num_workers: 4