File size: 3,404 Bytes
17ff0d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

checkpoint_mount="01H4KVBDMMN284JQ6G2N6GS2EV:checkpoint-10000"

for task in mnli mrpc qnli qqp rte sst2
do
    EXP_NAME="${task}_fixed_eval_5k_data"
    gantry run -y -n $EXP_NAME -t $EXP_NAME --allow-dirty \
        --workspace ai2/tess2 \
        --nfs \
        --gpus 1 \
        --priority normal \
        --cluster ai2/allennlp-cirrascale \
        --env 'HF_HOME=/net/nfs.cirrascale/allennlp/hamishi/.hf' \
        --env 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' \
        --beaker-image 'ai2/pytorch2.0.0-cuda11.8-python3.10' \
        --dataset "${checkpoint_mount}:/model" \
        --venv 'base' \
        --pip requirements.txt \
        -- python -m sdlm.run_glue \
            --model_name_or_path /model \
            --dataset_name $task \
            --output_dir /results \
            --do_train \
            --do_eval \
            --max_seq_length 128 \
            --skip_special_tokens False \
            --per_device_train_batch_size 32 \
            --per_device_eval_batch_size 32 \
            --evaluation_strategy epoch \
            --save_strategy steps \
            --report_to tensorboard \
            --overwrite_output_dir \
            --pad_to_max_length \
            --simplex_value 5 \
            --max_train_samples 5000 \
            --num_train_epochs 5 \
            --num_diffusion_steps 5000 \
            --num_inference_diffusion_steps 500 \
            --conditional_generation seq2seq \
            --learning_rate 3e-5 \
            --gradient_accumulation_steps 1 \
            --lr_scheduler_type cosine \
            --beta_schedule squaredcos_improved_ddpm \
            --top_p 0.99 \
            --warmup_ratio 0.03 \
            --logging_steps 50 \
            --save_total_limit 1 \
            --max_eval_samples 500
done

# # stsb needs longer sequences
task="stsb"
EXP_NAME="${task}_orig_100k_c4_roberta_base_fixed_shuffle_smol_data_pretrained_str_label"
gantry run -y -n $EXP_NAME -t $EXP_NAME --allow-dirty \
    --workspace ai2/tess2 \
    --nfs \
    --gpus 1 \
    --priority normal \
    --cluster ai2/allennlp-cirrascale \
    --env 'HF_HOME=/net/nfs.cirrascale/allennlp/hamishi/.hf' \
    --env 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' \
    --beaker-image 'ai2/pytorch2.0.0-cuda11.8-python3.10' \
    --venv 'base' \
    --dataset "${checkpoint_mount}:/model" \
    --pip requirements.txt \
    -- python -m sdlm.run_glue \
        --model_name_or_path /model \
        --dataset_name $task \
        --output_dir /results \
        --do_train \
        --do_eval \
        --max_seq_length 256 \
        --per_device_train_batch_size 32 \
        --per_device_eval_batch_size 32 \
        --skip_special_tokens False \
        --evaluation_strategy epoch \
        --save_strategy steps \
        --report_to tensorboard \
        --overwrite_output_dir \
        --pad_to_max_length \
        --simplex_value 5 \
        --max_train_samples 1000 \
        --num_train_epochs 3 \
        --num_diffusion_steps 5000 \
        --num_inference_diffusion_steps 500 \
        --conditional_generation seq2seq \
        --learning_rate 3e-5 \
        --gradient_accumulation_steps 1 \
        --lr_scheduler_type cosine \
        --beta_schedule squaredcos_improved_ddpm \
        --top_p 0.99 \
        --warmup_ratio 0.03 \
        --logging_steps 50 \
        --save_total_limit 1 \
        --max_eval_samples 500