Spaces:
Sleeping
Sleeping
File size: 2,530 Bytes
17ff0d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# tulu command for eval.
run_name=$1
checkpoint_mount=$2
eval_dataset_name=$3
CMD="
accelerate launch
--mixed_precision bf16 -m sdlm.run_tulu \
--dataset_name allenai/tulu-v2-sft-mixture \
--per_device_train_batch_size 8 \
--per_device_eval_batch_size 8 \
--evaluation_strategy steps \
--do_eval \
--num_train_epochs 2 \
--report_to tensorboard \
--max_seq_length 2048 \
--simplex_value 5 \
--num_diffusion_steps 5000 \
--lr_scheduler_type cosine \
--learning_rate 2e-5 \
--pad_to_max_length \
--beta_schedule squaredcos_improved_ddpm \
--top_p 0.99 \
--warmup_ratio 0.03 \
--logging_steps 50 \
--save_total_limit 2 \
--save_strategy steps \
--conditional_generation seq2seq \
--self_condition "logits_mean" \
--self_condition_mix_before_weights \
--bf16 \
--optim adamw_torch_fused \
--gradient_checkpointing \
--use_flash_attention2 \
--is_causal false
--line_by_line true \
--mask_padding_in_loss false \
--skip_special_tokens false \
--eval_dataset_name ${eval_dataset_name}
"
# for ai2/jupiter-cirrascale-2 cluster
if [ ! -z "${BEAKER}" ]; then
gantry run -y -n $run_name -t $run_name --allow-dirty \
--workspace ai2/tess2 \
--gpus 8 \
--priority normal \
--budget ai2/allennlp \
--preemptible \
--no-nfs \
--cluster ai2/jupiter-cirrascale-2 \
--env 'HF_HOME=/net/weka/reviz/jaket/.hf' \
--env 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' \
--env 'IS_ALPACA_EVAL_2=False' \
--env-secret OPENAI_API_KEY=OPENAI_API_KEY \
--beaker-image 'ai2/pytorch2.0.0-cuda11.8-python3.10' \
--dataset "${checkpoint_mount}:/model" \
--venv 'base' \
--pip requirements.txt \
-- ${CMD} \
--model_name_or_path /model \
--eval_steps 1000 \
--save_steps 1000 \
--max_eval_samples 1000 \
--gradient_accumulation_steps 1 \
--num_inference_diffusion_steps 100 250 \
--overwrite_output_dir false \
--beaker \
--output_dir /results
else
${CMD} \
--model_name_or_path ${checkpoint_mount} \
--eval_steps 3 \
--save_steps 5 \
--max_eval_samples 5000 \
--gradient_accumulation_steps 1 \
--num_inference_diffusion_steps 100 \
--output_dir outputs/test_gsm8k_direct \
--overwrite_output_dir true \
--load_states_in_eval_from_model_path=False
fi |