Spaces:

topdu
/

OpenOCR-Demo

Running

App Files Files Community

OpenOCR-Demo / configs /rec /svtrs /svtrnet_ctc_syn.yml

topdu

openocr demo

29f689c 2 months ago

raw

history blame

2.83 kB

	Global:
	device: gpu
	epoch_num: 20
	log_smooth_window: 20
	print_batch_step: 10
	output_dir: ./output/rec/syn/svtr_tiny/
	eval_epoch_step: [0, 1]
	eval_batch_step: [0, 500]
	cal_metric_during_train: True
	pretrained_model:
	checkpoints:
	use_tensorboard: false
	infer_img:
	# for data or label process
	character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
	# ./tools/utils/ppocr_keys_v1.txt # ch
	max_text_length: &max_text_length 25
	use_space_char: &use_space_char False
	save_res_path: ./output/rec/syn/predicts_svtr_tiny.txt
	use_amp: True

	Optimizer:
	name: AdamW
	lr: 0.0005 # for 4gpus bs256/gpu
	weight_decay: 0.05
	filter_bias_and_bn: True

	LRScheduler:
	name: CosineAnnealingLR
	warmup_epoch: 2

	Architecture:
	model_type: rec
	algorithm: SVTR
	Transform:
	Encoder:
	name: SVTRNet
	img_size: [32, 100]
	out_char_num: 25 # W//4 or W//8 or W/12
	out_channels: 192
	patch_merging: 'Conv'
	embed_dim: [64, 128, 256]
	depth: [3, 6, 3]
	num_heads: [2, 4, 8]
	mixer: ['Local','Local','Local','Local','Local','Local','Global','Global','Global','Global','Global','Global']
	local_mixer: [[7, 11], [7, 11], [7, 11]]
	last_stage: True
	prenorm: False
	Decoder:
	name: CTCDecoder

	Loss:
	name: CTCLoss
	zero_infinity: True

	PostProcess:
	name: CTCLabelDecode
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char

	Metric:
	name: RecMetric
	main_indicator: acc

	Train:
	dataset:
	name: STRLMDBDataSet
	data_dir: ./
	transforms:
	- DecodeImage: # load image
	img_mode: BGR
	channel_first: False
	# - SVTRRecAug:
	# aug_type: 0 # or 1
	- CTCLabelEncode: # Class handling label
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char
	max_text_length: *max_text_length
	- SVTRResize:
	image_shape: [3, 32, 100]
	padding: False
	- KeepKeys:
	keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
	loader:
	shuffle: True
	batch_size_per_card: 256
	drop_last: True
	num_workers: 8

	Eval:
	dataset:
	name: LMDBDataSet
	data_dir: ../evaluation/
	transforms:
	- DecodeImage: # load image
	img_mode: BGR
	channel_first: False
	- CTCLabelEncode: # Class handling label
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char
	max_text_length: *max_text_length
	- SVTRResize:
	image_shape: [3, 32, 100]
	padding: False
	- KeepKeys:
	keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
	loader:
	shuffle: False
	drop_last: False
	batch_size_per_card: 256
	num_workers: 2