Spaces:

topdu
/

OpenOCR-Demo

Running

App Files Files Community

OpenOCR-Demo / configs /det /dbnet /repvit_db.yml

topdu

update app

695a4a4 about 2 months ago

raw

history blame

3.6 kB

	Global:
	device: gpu
	epoch_num: &epoch_num 500
	log_smooth_window: 20
	print_batch_step: 100
	save_model_dir: ./output/det_repsvtr_db
	save_epoch_step: 10
	eval_batch_step:
	- 0
	- 1000
	cal_metric_during_train: false
	checkpoints:
	pretrained_model: openocr_det_repvit_ch.pth
	save_inference_dir: null
	use_visualdl: false
	infer_img: ./testA
	save_res_path: ./checkpoints/det_db/predicts_db.txt
	distributed: true
	model_type: det

	Architecture:
	algorithm: DB
	Backbone:
	name: RepSVTR_det
	Neck:
	name: RSEFPN
	out_channels: 96
	shortcut: True
	Head:
	name: DBHead
	k: 50

	# Loss:
	# name: DBLoss
	# balance_loss: true
	# main_loss_type: DiceLoss
	# alpha: 5
	# beta: 10
	# ohem_ratio: 3

	# Optimizer:
	# name: Adam
	# beta1: 0.9
	# beta2: 0.999
	# lr:
	# name: Cosine
	# learning_rate: 0.001 #(8*8c)
	# warmup_epoch: 2
	# regularizer:
	# name: L2
	# factor: 5.0e-05

	PostProcess:
	name: DBPostProcess
	thresh: 0.3
	box_thresh: 0.6
	max_candidates: 1000
	unclip_ratio: 1.5
	score_mode: 'slow'

	# Metric:
	# name: DetMetric
	# main_indicator: hmean

	# Train:
	# dataset:
	# name: SimpleDataSet
	# data_dir: ./train_data/icdar2015/text_localization/
	# label_file_list:
	# - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
	# ratio_list: [1.0]
	# transforms:
	# - DecodeImage:
	# img_mode: BGR
	# channel_first: false
	# - DetLabelEncode: null
	# - CopyPaste: null
	# - IaaAugment:
	# augmenter_args:
	# - type: Fliplr
	# args:
	# p: 0.5
	# - type: Affine
	# args:
	# rotate:
	# - -10
	# - 10
	# - type: Resize
	# args:
	# size:
	# - 0.5
	# - 3
	# - EastRandomCropData:
	# size:
	# - 640
	# - 640
	# max_tries: 50
	# keep_ratio: true
	# - MakeBorderMap:
	# shrink_ratio: 0.4
	# thresh_min: 0.3
	# thresh_max: 0.7
	# total_epoch: *epoch_num
	# - MakeShrinkMap:
	# shrink_ratio: 0.4
	# min_text_size: 8
	# total_epoch: *epoch_num
	# - NormalizeImage:
	# scale: 1./255.
	# mean:
	# - 0.485
	# - 0.456
	# - 0.406
	# std:
	# - 0.229
	# - 0.224
	# - 0.225
	# order: hwc
	# - ToCHWImage: null
	# - KeepKeys:
	# keep_keys:
	# - image
	# - threshold_map
	# - threshold_mask
	# - shrink_map
	# - shrink_mask
	# loader:
	# shuffle: true
	# drop_last: false
	# batch_size_per_card: 8
	# num_workers: 8

	Eval:
	dataset:
	name: SimpleDataSet
	data_dir: ./train_data/icdar2015/text_localization/
	label_file_list:
	- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
	transforms:
	- DecodeImage:
	img_mode: BGR
	channel_first: false
	- DetLabelEncode: null
	- DetResizeForTest:
	# image_shape: [1280, 1280]
	# keep_ratio: True
	# padding: True
	limit_side_len: 960
	limit_type: max
	- NormalizeImage:
	scale: 1./255.
	mean:
	- 0.485
	- 0.456
	- 0.406
	std:
	- 0.229
	- 0.224
	- 0.225
	order: hwc
	- ToCHWImage: null
	- KeepKeys:
	keep_keys:
	- image
	- shape
	- polys
	- ignore_tags
	loader:
	shuffle: false
	drop_last: false
	batch_size_per_card: 1
	num_workers: 2
	profiler_options: null