add_bos: false checkpointing_steps: null clip_grad_norm: -1 config_name: null dataset_config_name: null dataset_name: null gradient_accumulation_steps: 32 gradient_checkpointing: true learning_rate: 2.0e-05 logging_steps: 1 lora_alpha: 16 lora_dropout: 0.1 lora_rank: 64 low_cpu_mem_usage: false lr_scheduler_type: linear max_seq_length: 700 max_train_steps: 28 model_name_or_path: codellama/CodeLlama-7b-Python-hf num_train_epochs: 4 output_dir: outputs/refinement_7b_4ep/ overwrite_cache: false per_device_train_batch_size: 2 preprocessing_num_workers: 16 reduce_loss: mean report_to: tensorboard resume_from_checkpoint: null seed: null timeout: 1800 tokenizer_name: codellama/CodeLlama-7b-Python-hf train_file: /local1/mohsenfayyaz/projects/cs247/cs247/xq_scripts/data_refinement_train.jsonl trust_remote_code: false use_8bit_optimizer: false use_flash_attn: true use_lora: false use_qlora: false use_slow_tokenizer: true warmup_ratio: 0.03 weight_decay: 0.0 with_tracking: true