{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9982668977469671,
  "eval_steps": 18,
  "global_step": 360,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.04991334488734835,
      "grad_norm": 0.4083130955696106,
      "learning_rate": 0.002,
      "loss": 1.2589,
      "step": 18
    },
    {
      "epoch": 0.0998266897746967,
      "grad_norm": 0.3094785809516907,
      "learning_rate": 0.0008944271909999159,
      "loss": 1.0636,
      "step": 36
    },
    {
      "epoch": 0.14974003466204505,
      "grad_norm": 0.37615087628364563,
      "learning_rate": 0.0006488856845230502,
      "loss": 0.8574,
      "step": 54
    },
    {
      "epoch": 0.1996533795493934,
      "grad_norm": 0.2839493453502655,
      "learning_rate": 0.0005345224838248488,
      "loss": 0.8292,
      "step": 72
    },
    {
      "epoch": 0.24956672443674177,
      "grad_norm": 0.2941131889820099,
      "learning_rate": 0.00046499055497527714,
      "loss": 0.7934,
      "step": 90
    },
    {
      "epoch": 0.2994800693240901,
      "grad_norm": 0.3659161627292633,
      "learning_rate": 0.0004170288281141495,
      "loss": 0.7727,
      "step": 108
    },
    {
      "epoch": 0.3493934142114385,
      "grad_norm": 0.3437303304672241,
      "learning_rate": 0.00038138503569823694,
      "loss": 0.7557,
      "step": 126
    },
    {
      "epoch": 0.3993067590987868,
      "grad_norm": 0.2811639904975891,
      "learning_rate": 0.00035355339059327376,
      "loss": 0.7285,
      "step": 144
    },
    {
      "epoch": 0.44922010398613516,
      "grad_norm": 0.35479724407196045,
      "learning_rate": 0.00033104235544094716,
      "loss": 0.7107,
      "step": 162
    },
    {
      "epoch": 0.49913344887348354,
      "grad_norm": 0.3011772036552429,
      "learning_rate": 0.0003123475237772121,
      "loss": 0.7186,
      "step": 180
    },
    {
      "epoch": 0.5490467937608319,
      "grad_norm": 0.29623347520828247,
      "learning_rate": 0.00029649972666444046,
      "loss": 0.6818,
      "step": 198
    },
    {
      "epoch": 0.5989601386481802,
      "grad_norm": 0.3092997074127197,
      "learning_rate": 0.000282842712474619,
      "loss": 0.6701,
      "step": 216
    },
    {
      "epoch": 0.6488734835355287,
      "grad_norm": 0.32858991622924805,
      "learning_rate": 0.00027091418459143857,
      "loss": 0.6733,
      "step": 234
    },
    {
      "epoch": 0.698786828422877,
      "grad_norm": 0.3046702742576599,
      "learning_rate": 0.0002603778219616478,
      "loss": 0.6523,
      "step": 252
    },
    {
      "epoch": 0.7487001733102253,
      "grad_norm": 0.41049444675445557,
      "learning_rate": 0.00025098232205526344,
      "loss": 0.6473,
      "step": 270
    },
    {
      "epoch": 0.7986135181975736,
      "grad_norm": 0.35009312629699707,
      "learning_rate": 0.00024253562503633296,
      "loss": 0.6309,
      "step": 288
    },
    {
      "epoch": 0.848526863084922,
      "grad_norm": 0.3388204276561737,
      "learning_rate": 0.0002348880878058814,
      "loss": 0.6264,
      "step": 306
    },
    {
      "epoch": 0.8984402079722703,
      "grad_norm": 0.28809213638305664,
      "learning_rate": 0.0002279211529192759,
      "loss": 0.6046,
      "step": 324
    },
    {
      "epoch": 0.9483535528596188,
      "grad_norm": 0.3333686292171478,
      "learning_rate": 0.0002215395102486845,
      "loss": 0.5891,
      "step": 342
    },
    {
      "epoch": 0.9982668977469671,
      "grad_norm": 0.3479894697666168,
      "learning_rate": 0.00021566554640687683,
      "loss": 0.5952,
      "step": 360
    }
  ],
  "logging_steps": 18,
  "max_steps": 360,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 18,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.656326962122588e+18,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}