{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9953917050691244,
  "eval_steps": 500,
  "global_step": 27,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.03686635944700461,
      "grad_norm": 0.2873728095047169,
      "learning_rate": 0.0,
      "loss": 1.4734,
      "step": 1
    },
    {
      "epoch": 0.07373271889400922,
      "grad_norm": 0.27419596643661837,
      "learning_rate": 0.0003,
      "loss": 1.279,
      "step": 2
    },
    {
      "epoch": 0.11059907834101383,
      "grad_norm": 0.3351799319426796,
      "learning_rate": 0.0003,
      "loss": 1.4582,
      "step": 3
    },
    {
      "epoch": 0.14746543778801843,
      "grad_norm": 0.2789893810816952,
      "learning_rate": 0.0003,
      "loss": 1.1216,
      "step": 4
    },
    {
      "epoch": 0.18433179723502305,
      "grad_norm": 0.14190144952821382,
      "learning_rate": 0.0003,
      "loss": 1.0121,
      "step": 5
    },
    {
      "epoch": 0.22119815668202766,
      "grad_norm": 0.1542021738719797,
      "learning_rate": 0.0003,
      "loss": 1.0723,
      "step": 6
    },
    {
      "epoch": 0.25806451612903225,
      "grad_norm": 0.17476255091645262,
      "learning_rate": 0.0003,
      "loss": 0.8962,
      "step": 7
    },
    {
      "epoch": 0.29493087557603687,
      "grad_norm": 0.30183805297227384,
      "learning_rate": 0.0003,
      "loss": 0.9577,
      "step": 8
    },
    {
      "epoch": 0.3317972350230415,
      "grad_norm": 0.2945209326852545,
      "learning_rate": 0.0003,
      "loss": 0.9143,
      "step": 9
    },
    {
      "epoch": 0.3686635944700461,
      "grad_norm": 0.21480966699076806,
      "learning_rate": 0.0003,
      "loss": 0.7952,
      "step": 10
    },
    {
      "epoch": 0.4055299539170507,
      "grad_norm": 0.18078986894945484,
      "learning_rate": 0.0003,
      "loss": 0.8234,
      "step": 11
    },
    {
      "epoch": 0.4423963133640553,
      "grad_norm": 0.15453708977718567,
      "learning_rate": 0.0003,
      "loss": 0.7589,
      "step": 12
    },
    {
      "epoch": 0.4792626728110599,
      "grad_norm": 0.1631172234239537,
      "learning_rate": 0.0003,
      "loss": 0.7419,
      "step": 13
    },
    {
      "epoch": 0.5161290322580645,
      "grad_norm": 0.09781085387100458,
      "learning_rate": 0.0003,
      "loss": 0.7239,
      "step": 14
    },
    {
      "epoch": 0.5529953917050692,
      "grad_norm": 0.09897379010199117,
      "learning_rate": 0.0003,
      "loss": 0.7673,
      "step": 15
    },
    {
      "epoch": 0.5898617511520737,
      "grad_norm": 0.11558640849854486,
      "learning_rate": 0.0003,
      "loss": 0.7533,
      "step": 16
    },
    {
      "epoch": 0.6267281105990783,
      "grad_norm": 0.11345769354838794,
      "learning_rate": 0.0003,
      "loss": 0.7581,
      "step": 17
    },
    {
      "epoch": 0.663594470046083,
      "grad_norm": 0.1013501193678853,
      "learning_rate": 0.0003,
      "loss": 0.7224,
      "step": 18
    },
    {
      "epoch": 0.7004608294930875,
      "grad_norm": 0.09930580785134363,
      "learning_rate": 0.0003,
      "loss": 0.7597,
      "step": 19
    },
    {
      "epoch": 0.7373271889400922,
      "grad_norm": 0.10206714996240562,
      "learning_rate": 0.0003,
      "loss": 0.704,
      "step": 20
    },
    {
      "epoch": 0.7741935483870968,
      "grad_norm": 0.10775281367207125,
      "learning_rate": 0.0003,
      "loss": 0.6639,
      "step": 21
    },
    {
      "epoch": 0.8110599078341014,
      "grad_norm": 0.12015377273414085,
      "learning_rate": 0.0003,
      "loss": 0.7494,
      "step": 22
    },
    {
      "epoch": 0.847926267281106,
      "grad_norm": 0.08770642908913276,
      "learning_rate": 0.0003,
      "loss": 0.7115,
      "step": 23
    },
    {
      "epoch": 0.8847926267281107,
      "grad_norm": 0.135245894998221,
      "learning_rate": 0.0003,
      "loss": 0.7169,
      "step": 24
    },
    {
      "epoch": 0.9216589861751152,
      "grad_norm": 0.0993611544536447,
      "learning_rate": 0.0003,
      "loss": 0.6667,
      "step": 25
    },
    {
      "epoch": 0.9585253456221198,
      "grad_norm": 0.09795283307056235,
      "learning_rate": 0.0003,
      "loss": 0.6859,
      "step": 26
    },
    {
      "epoch": 0.9953917050691244,
      "grad_norm": 0.10408097730031732,
      "learning_rate": 0.0003,
      "loss": 0.6844,
      "step": 27
    },
    {
      "epoch": 0.9953917050691244,
      "step": 27,
      "total_flos": 5728527974400.0,
      "train_loss": 0.8656142420238919,
      "train_runtime": 448.8389,
      "train_samples_per_second": 1.934,
      "train_steps_per_second": 0.06
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 27,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 10,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 5728527974400.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}