Spaces:
Sleeping
Sleeping
File size: 1,262 Bytes
17ff0d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
accelerate launch \
--mixed_precision bf16 -m sdlm.run_pretrain_ar \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--do_train \
--do_eval \
--log_level info \
--evaluation_strategy steps \
--report_to tensorboard \
--max_seq_length 1 \
--lr_scheduler_type constant_with_warmup \
--learning_rate 1e-5 \
--pad_to_max_length \
--max_steps 10000000 \
--warmup_steps 5000 \
--logging_steps 50 \
--save_total_limit 1 \
--dataset_name emozilla/dolma-v1_7-305B \
--streaming \
--bf16 \
--optim adamw_torch_fused \
--gradient_checkpointing \
--use_flash_attention2 \
--ddp_find_unused_parameters false \
--without_compute_metrics true \
--dataloader_num_workers 8 \
--remove_unused_columns true \
--dispatch_batches false \
--shuffle true \
--preprocessing_num_workers 16 \
--model_name_or_path mistralai/Mistral-7B-v0.1 \
--model_revision 26bca36bde8333b5d7f72e9ed20ccda6a618af24 \
--eval_steps 10 \
--save_steps 50 \
--max_eval_samples 16 \
--gradient_accumulation_steps 1 \
--output_dir outputs/test \
--overwrite_output_dir true \
--tokenizer_padding_side "left" \
--num_diffusion_steps 0
|