model: | |
names: | |
- timm_image | |
timm_image: | |
checkpoint_name: caformer_b36.sail_in22k_ft_in1k | |
mix_choice: all_logits | |
data_types: | |
- image | |
train_transforms: | |
- resize_shorter_side | |
- center_crop | |
- trivial_augment | |
val_transforms: | |
- resize_shorter_side | |
- center_crop | |
image_norm: imagenet | |
image_size: null | |
max_img_num_per_col: 2 | |
data: | |
image: | |
missing_value_strategy: zero | |
text: | |
normalize_text: false | |
categorical: | |
minimum_cat_count: 100 | |
maximum_num_cat: 20 | |
convert_to_text: false | |
numerical: | |
convert_to_text: false | |
scaler_with_mean: true | |
scaler_with_std: true | |
document: | |
missing_value_strategy: zero | |
label: | |
numerical_label_preprocessing: standardscaler | |
pos_label: null | |
column_features_pooling_mode: concat | |
mixup: | |
turn_on: false | |
mixup_alpha: 0.8 | |
cutmix_alpha: 1.0 | |
cutmix_minmax: null | |
prob: 1.0 | |
switch_prob: 0.5 | |
mode: batch | |
turn_off_epoch: 5 | |
label_smoothing: 0.1 | |
templates: | |
turn_on: false | |
num_templates: 30 | |
template_length: 2048 | |
preset_templates: | |
- super_glue | |
- rte | |
custom_templates: null | |
optimization: | |
optim_type: adamw | |
learning_rate: 0.0001 | |
weight_decay: 0.001 | |
lr_choice: layerwise_decay | |
lr_decay: 0.9 | |
lr_schedule: cosine_decay | |
max_epochs: 20 | |
max_steps: -1 | |
warmup_steps: 0.1 | |
end_lr: 0 | |
lr_mult: 1 | |
patience: 10 | |
val_check_interval: 0.5 | |
check_val_every_n_epoch: 1 | |
skip_final_val: false | |
gradient_clip_val: 1 | |
gradient_clip_algorithm: norm | |
track_grad_norm: -1 | |
log_every_n_steps: 10 | |
top_k: 3 | |
top_k_average_method: greedy_soup | |
efficient_finetune: null | |
lora: | |
module_filter: null | |
filter: | |
- query | |
- value | |
- ^q$ | |
- ^v$ | |
- ^k$ | |
- ^o$ | |
r: 8 | |
alpha: 8 | |
conv_lora_expert_num: 8 | |
loss_function: auto | |
focal_loss: | |
alpha: null | |
gamma: 2.0 | |
reduction: mean | |
mask2former_loss: | |
loss_cross_entropy_weight: 10.0 | |
loss_mask_weight: 5.0 | |
loss_dice_weight: 5.0 | |
extra_trainable_params: [] | |
env: | |
num_gpus: 0 | |
num_nodes: 1 | |
batch_size: 128 | |
per_gpu_batch_size: 8 | |
eval_batch_size_ratio: 4 | |
per_gpu_batch_size_evaluation: null | |
precision: 32 | |
num_workers: 2 | |
num_workers_evaluation: 2 | |
accelerator: auto | |
fast_dev_run: false | |
deterministic: false | |
auto_select_gpus: true | |
strategy: auto | |
deepspeed_allgather_size: 1000000000.0 | |
deepspeed_allreduce_size: 1000000000.0 | |
compile: | |
turn_on: false | |
mode: default | |
dynamic: true | |
backend: inductor | |