Spaces:

topdu
/

OpenOCR-Demo

Running

App Files Files Community

OpenOCR-Demo / configs /rec /igtr /svtr_base_ds_igtr.yml

topdu

openocr demo

29f689c about 1 month ago

raw

history blame

4.38 kB

	Global:
	device: gpu
	epoch_num: 20
	log_smooth_window: 20
	print_batch_step: 10
	output_dir: ./output/rec/u14m_filter/svtr_base_igtr
	save_epoch_step: 1
	# evaluation is run every 2000 iterations
	eval_batch_step: [0, 500]
	eval_epoch_step: [0, 1]
	cal_metric_during_train: True
	pretrained_model:
	checkpoints:
	use_tensorboard: false
	infer_img:
	# for data or label process
	character_dict_path: &character_dict_path
	# ./tools/utils/EN_symbol_dict.txt # 96en
	# ./tools/utils/ppocr_keys_v1.txt # ch
	max_text_length: &max_text_length 25
	use_space_char: &use_space_char False
	save_res_path: ./output/rec/u14m_filter/predicts_svtr_base_igtr.txt
	use_amp: True

	Optimizer:
	name: AdamW
	lr: 0.0005 # 2gpus 384bs/gpu
	weight_decay: 0.05
	filter_bias_and_bn: True

	LRScheduler:
	name: OneCycleLR
	warmup_epoch: 1.5
	cycle_momentum: False

	Architecture:
	model_type: rec
	algorithm: IGTR
	in_channels: 3
	Transform:
	Encoder:
	name: SVTRNet2DPos
	img_size: [32, -1]
	out_char_num: 25
	out_channels: 256
	patch_merging: 'Conv'
	embed_dim: [128, 256, 384]
	depth: [6, 6, 6]
	num_heads: [4, 8, 12]
	mixer: ['ConvB','ConvB','ConvB','ConvB','ConvB','ConvB', 'ConvB','ConvB', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
	local_mixer: [[5, 5], [5, 5], [5, 5]]
	last_stage: False
	prenorm: True
	use_first_sub: False
	Decoder:
	name: IGTRDecoder
	dim: 384
	num_layer: 1
	ar: False
	refine_iter: 0
	# next_pred: True
	next_pred: False
	pos2d: True
	ds: True
	# pos_len: False
	# rec_layer: 1


	Loss:
	name: IGTRLoss

	PostProcess:
	name: IGTRLabelDecode
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char

	Metric:
	name: RecMetric
	main_indicator: acc

	Train:
	dataset:
	name: RatioDataSet
	ds_width: True
	padding: &padding False
	data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
	'../Union14M-L-LMDB-Filtered/filter_train_hard',
	'../Union14M-L-LMDB-Filtered/filter_train_medium',
	'../Union14M-L-LMDB-Filtered/filter_train_normal',
	'../Union14M-L-LMDB-Filtered/filter_train_easy',
	]
	transforms:
	- DecodeImage: # load image
	img_mode: BGR
	channel_first: False
	- PARSeqAug:
	- IGTRLabelEncode: # Class handling label
	k: 8
	prompt_error: False
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char
	max_text_length: *max_text_length
	- KeepKeys:
	keep_keys: ['image', 'label', 'prompt_pos_idx_list',
	'prompt_char_idx_list', 'ques_pos_idx_list', 'ques1_answer_list',
	'ques2_char_idx_list', 'ques2_answer_list', 'ques3_answer', 'ques4_char_num_list',
	'ques_len_list', 'ques2_len_list', 'prompt_len_list', 'length'] # dataloader will return list in this order
	sampler:
	name: RatioSampler
	scales: [[128, 32]] # w, h
	# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
	first_bs: &bs 384
	fix_bs: false
	divided_factor: [4, 16] # w, h
	is_training: True
	loader:
	shuffle: True
	batch_size_per_card: *bs
	drop_last: True
	max_ratio: &max_ratio 4
	num_workers: 4

	Eval:
	dataset:
	name: RatioDataSet
	ds_width: True
	padding: *padding
	data_dir_list: ['../evaluation/CUTE80',
	'../evaluation/IC13_857',
	'../evaluation/IC15_1811',
	'../evaluation/IIIT5k',
	'../evaluation/SVT',
	'../evaluation/SVTP']
	transforms:
	- DecodeImage: # load image
	img_mode: BGR
	channel_first: False
	- ARLabelEncode: # Class handling label
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char
	max_text_length: *max_text_length
	- KeepKeys:
	keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
	sampler:
	name: RatioSampler
	scales: [[128, 32]] # w, h
	# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
	first_bs: 256
	fix_bs: false
	divided_factor: [4, 16] # w, h
	is_training: False
	loader:
	shuffle: False
	drop_last: False
	batch_size_per_card: 256
	max_ratio: *max_ratio
	num_workers: 4