File size: 2,639 Bytes
dbf644c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
model:
names:
- timm_image
timm_image:
checkpoint_name: caformer_b36.sail_in22k_ft_in1k
mix_choice: all_logits
data_types:
- image
train_transforms:
- resize_shorter_side
- center_crop
- trivial_augment
val_transforms:
- resize_shorter_side
- center_crop
image_norm: imagenet
image_size: null
max_img_num_per_col: 2
data:
image:
missing_value_strategy: zero
text:
normalize_text: false
categorical:
minimum_cat_count: 100
maximum_num_cat: 20
convert_to_text: false
numerical:
convert_to_text: false
scaler_with_mean: true
scaler_with_std: true
document:
missing_value_strategy: zero
label:
numerical_label_preprocessing: standardscaler
pos_label: null
column_features_pooling_mode: concat
mixup:
turn_on: false
mixup_alpha: 0.8
cutmix_alpha: 1.0
cutmix_minmax: null
prob: 1.0
switch_prob: 0.5
mode: batch
turn_off_epoch: 5
label_smoothing: 0.1
templates:
turn_on: false
num_templates: 30
template_length: 2048
preset_templates:
- super_glue
- rte
custom_templates: null
optimization:
optim_type: adamw
learning_rate: 0.0001
weight_decay: 0.001
lr_choice: layerwise_decay
lr_decay: 0.9
lr_schedule: cosine_decay
max_epochs: 20
max_steps: -1
warmup_steps: 0.1
end_lr: 0
lr_mult: 1
patience: 10
val_check_interval: 0.5
check_val_every_n_epoch: 1
skip_final_val: false
gradient_clip_val: 1
gradient_clip_algorithm: norm
track_grad_norm: -1
log_every_n_steps: 10
top_k: 3
top_k_average_method: greedy_soup
efficient_finetune: null
lora:
module_filter: null
filter:
- query
- value
- ^q$
- ^v$
- ^k$
- ^o$
r: 8
alpha: 8
conv_lora_expert_num: 8
loss_function: auto
focal_loss:
alpha: null
gamma: 2.0
reduction: mean
mask2former_loss:
loss_cross_entropy_weight: 10.0
loss_mask_weight: 5.0
loss_dice_weight: 5.0
extra_trainable_params: []
env:
num_gpus: 0
num_nodes: 1
batch_size: 128
per_gpu_batch_size: 8
eval_batch_size_ratio: 4
per_gpu_batch_size_evaluation: null
precision: 32
num_workers: 2
num_workers_evaluation: 2
accelerator: auto
fast_dev_run: false
deterministic: false
auto_select_gpus: true
strategy: auto
deepspeed_allgather_size: 1000000000.0
deepspeed_allreduce_size: 1000000000.0
compile:
turn_on: false
mode: default
dynamic: true
backend: inductor
|