jondurbin
/

mpt-30b-qlora-compatible

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

jondurbin commited on Jun 24, 2023

Commit

3f09a99

·

1 Parent(s): 7870f62

Update README.md

Files changed (1) hide show

README.md +2 -2

README.md CHANGED Viewed

@@ -21,7 +21,7 @@ export WANDB_PROJECT=airoboros-mpt-30b-gpt4-1.4
 python qlora.py \
     --model_name_or_path ./mpt-30b \
     --output_dir ./$WANDB_PROJECT-checkpoints \
-    --num_train_epochs 4 \
     --logging_steps 1 \
     --save_strategy steps \
     --data_seed 11422 \
@@ -48,7 +48,7 @@ python qlora.py \
     --model_max_len 8192 \
     --gradient_checkpointing \
     --per_device_train_batch_size 6 \
-    --gradient_accumulation_steps 16 \
     --learning_rate 0.0001 \
     --adam_beta2 0.999 \
     --max_grad_norm 0.3 \

 python qlora.py \
     --model_name_or_path ./mpt-30b \
     --output_dir ./$WANDB_PROJECT-checkpoints \
+    --num_train_epochs 3 \
     --logging_steps 1 \
     --save_strategy steps \
     --data_seed 11422 \
     --model_max_len 8192 \
     --gradient_checkpointing \
     --per_device_train_batch_size 6 \
+    --gradient_accumulation_steps 1 \
     --learning_rate 0.0001 \
     --adam_beta2 0.999 \
     --max_grad_norm 0.3 \