Spaces:
Sleeping
Sleeping
checkpoint_mount="01H4KVBDMMN284JQ6G2N6GS2EV:checkpoint-10000" | |
for task in mnli mrpc qnli qqp rte sst2 | |
do | |
EXP_NAME="${task}_fixed_eval_5k_data" | |
gantry run -y -n $EXP_NAME -t $EXP_NAME --allow-dirty \ | |
--workspace ai2/tess2 \ | |
--nfs \ | |
--gpus 1 \ | |
--priority normal \ | |
--cluster ai2/allennlp-cirrascale \ | |
--env 'HF_HOME=/net/nfs.cirrascale/allennlp/hamishi/.hf' \ | |
--env 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' \ | |
--beaker-image 'ai2/pytorch2.0.0-cuda11.8-python3.10' \ | |
--dataset "${checkpoint_mount}:/model" \ | |
--venv 'base' \ | |
--pip requirements.txt \ | |
-- python -m sdlm.run_glue \ | |
--model_name_or_path /model \ | |
--dataset_name $task \ | |
--output_dir /results \ | |
--do_train \ | |
--do_eval \ | |
--max_seq_length 128 \ | |
--skip_special_tokens False \ | |
--per_device_train_batch_size 32 \ | |
--per_device_eval_batch_size 32 \ | |
--evaluation_strategy epoch \ | |
--save_strategy steps \ | |
--report_to tensorboard \ | |
--overwrite_output_dir \ | |
--pad_to_max_length \ | |
--simplex_value 5 \ | |
--max_train_samples 5000 \ | |
--num_train_epochs 5 \ | |
--num_diffusion_steps 5000 \ | |
--num_inference_diffusion_steps 500 \ | |
--conditional_generation seq2seq \ | |
--learning_rate 3e-5 \ | |
--gradient_accumulation_steps 1 \ | |
--lr_scheduler_type cosine \ | |
--beta_schedule squaredcos_improved_ddpm \ | |
--top_p 0.99 \ | |
--warmup_ratio 0.03 \ | |
--logging_steps 50 \ | |
--save_total_limit 1 \ | |
--max_eval_samples 500 | |
done | |
# # stsb needs longer sequences | |
task="stsb" | |
EXP_NAME="${task}_orig_100k_c4_roberta_base_fixed_shuffle_smol_data_pretrained_str_label" | |
gantry run -y -n $EXP_NAME -t $EXP_NAME --allow-dirty \ | |
--workspace ai2/tess2 \ | |
--nfs \ | |
--gpus 1 \ | |
--priority normal \ | |
--cluster ai2/allennlp-cirrascale \ | |
--env 'HF_HOME=/net/nfs.cirrascale/allennlp/hamishi/.hf' \ | |
--env 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' \ | |
--beaker-image 'ai2/pytorch2.0.0-cuda11.8-python3.10' \ | |
--venv 'base' \ | |
--dataset "${checkpoint_mount}:/model" \ | |
--pip requirements.txt \ | |
-- python -m sdlm.run_glue \ | |
--model_name_or_path /model \ | |
--dataset_name $task \ | |
--output_dir /results \ | |
--do_train \ | |
--do_eval \ | |
--max_seq_length 256 \ | |
--per_device_train_batch_size 32 \ | |
--per_device_eval_batch_size 32 \ | |
--skip_special_tokens False \ | |
--evaluation_strategy epoch \ | |
--save_strategy steps \ | |
--report_to tensorboard \ | |
--overwrite_output_dir \ | |
--pad_to_max_length \ | |
--simplex_value 5 \ | |
--max_train_samples 1000 \ | |
--num_train_epochs 3 \ | |
--num_diffusion_steps 5000 \ | |
--num_inference_diffusion_steps 500 \ | |
--conditional_generation seq2seq \ | |
--learning_rate 3e-5 \ | |
--gradient_accumulation_steps 1 \ | |
--lr_scheduler_type cosine \ | |
--beta_schedule squaredcos_improved_ddpm \ | |
--top_p 0.99 \ | |
--warmup_ratio 0.03 \ | |
--logging_steps 50 \ | |
--save_total_limit 1 \ | |
--max_eval_samples 500 | |