File size: 1,910 Bytes
e88b58f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
anno_len_threshold: 40
base_folder: /home/mog29/compgen_saved_files/experiments/joint_training
batch_size: 2
checkpoint_dir: /home/mog29/compgen_saved_files/experiments/joint_training/r3_baseline/run/checkpoints
comprehension_prompt: verbose_instruction
context_size: 10
data_dir: /home/mog29/compgen_saved_files/kilogram/dataset
deployment_round: 3
evaluation_type: multitask
expdir: /home/mog29/compgen_saved_files/experiments/joint_training/r3_baseline/run
from_scratch: true
generation_prompt: information_after
gradient_accumulation_steps: 32
gradient_clip_norm: 1
img_dir: /home/mog29/compgen_saved_files/kilogram/dataset/square-black-imgs
ips_clip: 5
learning_rate: 0.0001
listener_filter: ''
listener_lambda: 0.5
load_from_checkpoint: false
logdir: /home/mog29/compgen_saved_files/experiments/joint_training/r3_baseline/run/logging
lora_dropout: 0.05
lora_r: 16
lora_subset: vision_resampler
max_steps: 30
model_family_name: baseline
n_epochs: 15
name: joint and multitask training defaults
name_suffix: ''
no_lora: false
no_shuffling: false
noise_filter: ''
num_samples: 10
num_training_steps: 17500
num_warmup_steps: 0
num_workers: 4
only_seed: false
past_checkpoint_dir: /home/mog29/compgen_saved_files/experiments/joint_training/r2_baseline/run/checkpoints
past_logdir: /home/mog29/compgen_saved_files/experiments/joint_training/r2_baseline/run/logging
past_name_suffix: ''
past_round: -1
patience_cutoff: 5
ref_strat: no_ips_for_pos
repetition_penalty: 1
replacement_family_name: ''
sampling_type: nucleus
save_each_epoch: true
seed: 835313
shared_parameters: true
speaker_filter: ''
speaker_lambda: 0.5
split_dir: /home/mog29/compgen_saved_files/split_info/
temperature: 0.7
test_batch_size: 2
top_k: 50
top_p: 1
training_type: multitask
use_separate_dataloaders: false
use_wandb: true
wandb_experiment_name: r3_baseline_may
wandb_project_name: tangram_continual_learning_final
weight_decay: 0.1