File size: 2,639 Bytes
dbf644c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
model:
  names:
  - timm_image
  timm_image:
    checkpoint_name: caformer_b36.sail_in22k_ft_in1k
    mix_choice: all_logits
    data_types:
    - image
    train_transforms:
    - resize_shorter_side
    - center_crop
    - trivial_augment
    val_transforms:
    - resize_shorter_side
    - center_crop
    image_norm: imagenet
    image_size: null
    max_img_num_per_col: 2
data:
  image:
    missing_value_strategy: zero
  text:
    normalize_text: false
  categorical:
    minimum_cat_count: 100
    maximum_num_cat: 20
    convert_to_text: false
  numerical:
    convert_to_text: false
    scaler_with_mean: true
    scaler_with_std: true
  document:
    missing_value_strategy: zero
  label:
    numerical_label_preprocessing: standardscaler
  pos_label: null
  column_features_pooling_mode: concat
  mixup:
    turn_on: false
    mixup_alpha: 0.8
    cutmix_alpha: 1.0
    cutmix_minmax: null
    prob: 1.0
    switch_prob: 0.5
    mode: batch
    turn_off_epoch: 5
    label_smoothing: 0.1
  templates:
    turn_on: false
    num_templates: 30
    template_length: 2048
    preset_templates:
    - super_glue
    - rte
    custom_templates: null
optimization:
  optim_type: adamw
  learning_rate: 0.0001
  weight_decay: 0.001
  lr_choice: layerwise_decay
  lr_decay: 0.9
  lr_schedule: cosine_decay
  max_epochs: 20
  max_steps: -1
  warmup_steps: 0.1
  end_lr: 0
  lr_mult: 1
  patience: 10
  val_check_interval: 0.5
  check_val_every_n_epoch: 1
  skip_final_val: false
  gradient_clip_val: 1
  gradient_clip_algorithm: norm
  track_grad_norm: -1
  log_every_n_steps: 10
  top_k: 3
  top_k_average_method: greedy_soup
  efficient_finetune: null
  lora:
    module_filter: null
    filter:
    - query
    - value
    - ^q$
    - ^v$
    - ^k$
    - ^o$
    r: 8
    alpha: 8
    conv_lora_expert_num: 8
  loss_function: auto
  focal_loss:
    alpha: null
    gamma: 2.0
    reduction: mean
  mask2former_loss:
    loss_cross_entropy_weight: 10.0
    loss_mask_weight: 5.0
    loss_dice_weight: 5.0
  extra_trainable_params: []
env:
  num_gpus: 0
  num_nodes: 1
  batch_size: 128
  per_gpu_batch_size: 8
  eval_batch_size_ratio: 4
  per_gpu_batch_size_evaluation: null
  precision: 32
  num_workers: 2
  num_workers_evaluation: 2
  accelerator: auto
  fast_dev_run: false
  deterministic: false
  auto_select_gpus: true
  strategy: auto
  deepspeed_allgather_size: 1000000000.0
  deepspeed_allreduce_size: 1000000000.0
  compile:
    turn_on: false
    mode: default
    dynamic: true
    backend: inductor