lora_r: 128 lora_alpha: 128 lora_dropout: 0.25 bias: lora_only task_type: CAUSAL_LM per_device_train_batch_size: 1 gradient_accumulation_steps: 6 warmup_steps: 40 max_steps: 250 learning_rate: 0.0006 fp16: False logging_steps: 1 optim: paged_adamw_8bit output_dir: /home/p/pramukas/work/results/GPT 2/local-0/checkpoints/