Spaces:

topdu
/

OpenOCR-Demo

Running

App Files Files Community

OpenOCR-Demo / configs /rec /svtrv2 /repsvtr_ch.yml

topdu

openocr demo

29f689c about 2 months ago

raw

history blame

3.12 kB

	Global:
	device: gpu
	epoch_num: 20
	log_smooth_window: 20
	print_batch_step: 10
	output_dir: ./output/rec/repsvtr_ch/
	save_epoch_step: 1
	# evaluation is run every 2000 iterations
	eval_epoch_step: [0, 1]
	eval_batch_step: [0, 500]
	cal_metric_during_train: True
	pretrained_model: ./openocr_repsvtr_ch.pth
	checkpoints:
	use_tensorboard: false
	infer_img:
	# for data or label process
	character_dict_path: &character_dict_path ./tools/utils/ppocr_keys_v1.txt
	max_text_length: &max_text_length 25
	use_space_char: &use_space_char True
	save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_ctc.txt
	use_amp: True
	project_name: resvtr_ctc_nosgm_ds

	Optimizer:
	name: AdamW
	lr: 0.00065 # for 4gpus bs256/gpu
	weight_decay: 0.05
	filter_bias_and_bn: True

	LRScheduler:
	name: OneCycleLR
	warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
	cycle_momentum: False

	Architecture:
	model_type: rec
	algorithm: SVTRv2_mobile
	Transform:
	Encoder:
	name: RepSVTREncoder
	Decoder:
	name: CTCDecoder
	svtr_encoder:
	dims: 256
	depth: 2
	hidden_dims: 256
	kernel_size: [1, 3]
	use_guide: True

	Loss:
	name: CTCLoss
	zero_infinity: True

	PostProcess:
	name: CTCLabelDecode
	character_dict_path: *character_dict_path

	Metric:
	name: RecMetric
	main_indicator: acc
	ignore_space: False
	# is_filter: True

	Train:
	dataset:
	name: RatioDataSetTVResize
	ds_width: True
	padding: False
	base_shape: [[32, 32], [64, 32], [96, 32], [128, 32]]
	data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
	'../Union14M-L-LMDB-Filtered/filter_train_hard',
	'../Union14M-L-LMDB-Filtered/filter_train_medium',
	'../Union14M-L-LMDB-Filtered/filter_train_normal',
	'../Union14M-L-LMDB-Filtered/filter_train_easy',
	]
	transforms:
	- DecodeImagePIL: # load image
	img_mode: RGB
	- PARSeqAugPIL:
	- CTCLabelEncode: # Class handling label
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char
	max_text_length: *max_text_length
	- KeepKeys:
	keep_keys: ['image', 'label', 'length']
	sampler:
	name: RatioSampler
	scales: [[128, 32]] # w, h
	# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
	first_bs: &bs 256
	fix_bs: false
	divided_factor: [4, 16] # w, h
	is_training: True
	loader:
	shuffle: True
	batch_size_per_card: *bs
	drop_last: True
	max_ratio: 4
	num_workers: 4

	Eval:
	dataset:
	name: LMDBDataSet
	data_dir: ../evaluation
	transforms:
	- DecodeImage: # load image
	img_mode: BGR
	- CTCLabelEncode: # Class handling label
	character_dict_path: *character_dict_path
	use_space_char: *use_space_char
	max_text_length: *max_text_length
	- RecDynamicResize:
	image_shape: [48, 320]
	padding: False
	- KeepKeys:
	keep_keys: ['image', 'label', 'length']
	loader:
	shuffle: False
	drop_last: False
	batch_size_per_card: 1
	num_workers: 4