OpenOCR-Demo / configs /det /dbnet /repvit_db.yml
topdu's picture
update app
695a4a4
raw
history blame
3.6 kB
Global:
device: gpu
epoch_num: &epoch_num 500
log_smooth_window: 20
print_batch_step: 100
save_model_dir: ./output/det_repsvtr_db
save_epoch_step: 10
eval_batch_step:
- 0
- 1000
cal_metric_during_train: false
checkpoints:
pretrained_model: openocr_det_repvit_ch.pth
save_inference_dir: null
use_visualdl: false
infer_img: ./testA
save_res_path: ./checkpoints/det_db/predicts_db.txt
distributed: true
model_type: det
Architecture:
algorithm: DB
Backbone:
name: RepSVTR_det
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
Head:
name: DBHead
k: 50
# Loss:
# name: DBLoss
# balance_loss: true
# main_loss_type: DiceLoss
# alpha: 5
# beta: 10
# ohem_ratio: 3
# Optimizer:
# name: Adam
# beta1: 0.9
# beta2: 0.999
# lr:
# name: Cosine
# learning_rate: 0.001 #(8*8c)
# warmup_epoch: 2
# regularizer:
# name: L2
# factor: 5.0e-05
PostProcess:
name: DBPostProcess
thresh: 0.3
box_thresh: 0.6
max_candidates: 1000
unclip_ratio: 1.5
score_mode: 'slow'
# Metric:
# name: DetMetric
# main_indicator: hmean
# Train:
# dataset:
# name: SimpleDataSet
# data_dir: ./train_data/icdar2015/text_localization/
# label_file_list:
# - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
# ratio_list: [1.0]
# transforms:
# - DecodeImage:
# img_mode: BGR
# channel_first: false
# - DetLabelEncode: null
# - CopyPaste: null
# - IaaAugment:
# augmenter_args:
# - type: Fliplr
# args:
# p: 0.5
# - type: Affine
# args:
# rotate:
# - -10
# - 10
# - type: Resize
# args:
# size:
# - 0.5
# - 3
# - EastRandomCropData:
# size:
# - 640
# - 640
# max_tries: 50
# keep_ratio: true
# - MakeBorderMap:
# shrink_ratio: 0.4
# thresh_min: 0.3
# thresh_max: 0.7
# total_epoch: *epoch_num
# - MakeShrinkMap:
# shrink_ratio: 0.4
# min_text_size: 8
# total_epoch: *epoch_num
# - NormalizeImage:
# scale: 1./255.
# mean:
# - 0.485
# - 0.456
# - 0.406
# std:
# - 0.229
# - 0.224
# - 0.225
# order: hwc
# - ToCHWImage: null
# - KeepKeys:
# keep_keys:
# - image
# - threshold_map
# - threshold_mask
# - shrink_map
# - shrink_mask
# loader:
# shuffle: true
# drop_last: false
# batch_size_per_card: 8
# num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/icdar2015/text_localization/
label_file_list:
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- DetLabelEncode: null
- DetResizeForTest:
# image_shape: [1280, 1280]
# keep_ratio: True
# padding: True
limit_side_len: 960
limit_type: max
- NormalizeImage:
scale: 1./255.
mean:
- 0.485
- 0.456
- 0.406
std:
- 0.229
- 0.224
- 0.225
order: hwc
- ToCHWImage: null
- KeepKeys:
keep_keys:
- image
- shape
- polys
- ignore_tags
loader:
shuffle: false
drop_last: false
batch_size_per_card: 1
num_workers: 2
profiler_options: null