Spaces:

hamishivi
/

tess-2-demo

Sleeping

App Files Files Community

tess-2-demo / shell_scripts /run_glue.sh

hamishivi

commit

17ff0d8 verified about 2 months ago

raw

history blame contribute delete

3.4 kB


	checkpoint_mount="01H4KVBDMMN284JQ6G2N6GS2EV:checkpoint-10000"

	for task in mnli mrpc qnli qqp rte sst2
	do
	EXP_NAME="${task}_fixed_eval_5k_data"
	gantry run -y -n $EXP_NAME -t $EXP_NAME --allow-dirty \
	--workspace ai2/tess2 \
	--nfs \
	--gpus 1 \
	--priority normal \
	--cluster ai2/allennlp-cirrascale \
	--env 'HF_HOME=/net/nfs.cirrascale/allennlp/hamishi/.hf' \
	--env 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' \
	--beaker-image 'ai2/pytorch2.0.0-cuda11.8-python3.10' \
	--dataset "${checkpoint_mount}:/model" \
	--venv 'base' \
	--pip requirements.txt \
	-- python -m sdlm.run_glue \
	--model_name_or_path /model \
	--dataset_name $task \
	--output_dir /results \
	--do_train \
	--do_eval \
	--max_seq_length 128 \
	--skip_special_tokens False \
	--per_device_train_batch_size 32 \
	--per_device_eval_batch_size 32 \
	--evaluation_strategy epoch \
	--save_strategy steps \
	--report_to tensorboard \
	--overwrite_output_dir \
	--pad_to_max_length \
	--simplex_value 5 \
	--max_train_samples 5000 \
	--num_train_epochs 5 \
	--num_diffusion_steps 5000 \
	--num_inference_diffusion_steps 500 \
	--conditional_generation seq2seq \
	--learning_rate 3e-5 \
	--gradient_accumulation_steps 1 \
	--lr_scheduler_type cosine \
	--beta_schedule squaredcos_improved_ddpm \
	--top_p 0.99 \
	--warmup_ratio 0.03 \
	--logging_steps 50 \
	--save_total_limit 1 \
	--max_eval_samples 500
	done

	# # stsb needs longer sequences
	task="stsb"
	EXP_NAME="${task}_orig_100k_c4_roberta_base_fixed_shuffle_smol_data_pretrained_str_label"
	gantry run -y -n $EXP_NAME -t $EXP_NAME --allow-dirty \
	--workspace ai2/tess2 \
	--nfs \
	--gpus 1 \
	--priority normal \
	--cluster ai2/allennlp-cirrascale \
	--env 'HF_HOME=/net/nfs.cirrascale/allennlp/hamishi/.hf' \
	--env 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' \
	--beaker-image 'ai2/pytorch2.0.0-cuda11.8-python3.10' \
	--venv 'base' \
	--dataset "${checkpoint_mount}:/model" \
	--pip requirements.txt \
	-- python -m sdlm.run_glue \
	--model_name_or_path /model \
	--dataset_name $task \
	--output_dir /results \
	--do_train \
	--do_eval \
	--max_seq_length 256 \
	--per_device_train_batch_size 32 \
	--per_device_eval_batch_size 32 \
	--skip_special_tokens False \
	--evaluation_strategy epoch \
	--save_strategy steps \
	--report_to tensorboard \
	--overwrite_output_dir \
	--pad_to_max_length \
	--simplex_value 5 \
	--max_train_samples 1000 \
	--num_train_epochs 3 \
	--num_diffusion_steps 5000 \
	--num_inference_diffusion_steps 500 \
	--conditional_generation seq2seq \
	--learning_rate 3e-5 \
	--gradient_accumulation_steps 1 \
	--lr_scheduler_type cosine \
	--beta_schedule squaredcos_improved_ddpm \
	--top_p 0.99 \
	--warmup_ratio 0.03 \
	--logging_steps 50 \
	--save_total_limit 1 \
	--max_eval_samples 500