File size: 2,661 Bytes

2efc4ed
 
 
4cea407
2efc4ed
4cea407
2efc4ed
 
 
 
 
 
eefc788
4cea407
eefc788
2efc4ed
 
 
 
eefc788
4cea407
eefc788
2efc4ed
 
 
 
eefc788
4cea407
eefc788
2efc4ed
 
 
 
eefc788
4cea407
eefc788
2efc4ed
 
 
 
eefc788
4cea407
eefc788
2efc4ed
 
 
 
eefc788
 
 
2efc4ed
 
 
 
eefc788
 
 
2efc4ed
 
 
 
eefc788
 
 
2efc4ed
 
 
 
eefc788
 
 
2efc4ed
 
 
 
eefc788
 
 
2efc4ed
 
 
4cea407
 
 
eefc788
 
 
 
2efc4ed
 
 
4cea407
2efc4ed
4cea407
2efc4ed
 
 
 
 
 
 
 
 
 
 
 
 
4cea407
2efc4ed

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.6666666666666665,
  "eval_steps": 500,
  "global_step": 21,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.26666666666666666,
      "grad_norm": 7.614686012268066,
      "learning_rate": 9.047619047619048e-05,
      "loss": 9.6721,
      "step": 2
    },
    {
      "epoch": 0.5333333333333333,
      "grad_norm": 10.107391357421875,
      "learning_rate": 8.095238095238096e-05,
      "loss": 9.4969,
      "step": 4
    },
    {
      "epoch": 0.8,
      "grad_norm": 11.758378028869629,
      "learning_rate": 7.142857142857143e-05,
      "loss": 9.3036,
      "step": 6
    },
    {
      "epoch": 1.0,
      "grad_norm": 8.324995994567871,
      "learning_rate": 6.19047619047619e-05,
      "loss": 6.8191,
      "step": 8
    },
    {
      "epoch": 1.2666666666666666,
      "grad_norm": 15.543440818786621,
      "learning_rate": 5.2380952380952384e-05,
      "loss": 8.8411,
      "step": 10
    },
    {
      "epoch": 1.5333333333333332,
      "grad_norm": 16.783220291137695,
      "learning_rate": 4.2857142857142856e-05,
      "loss": 8.5329,
      "step": 12
    },
    {
      "epoch": 1.8,
      "grad_norm": 19.108684539794922,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 8.2567,
      "step": 14
    },
    {
      "epoch": 2.0,
      "grad_norm": 8.789897918701172,
      "learning_rate": 2.380952380952381e-05,
      "loss": 6.1492,
      "step": 16
    },
    {
      "epoch": 2.2666666666666666,
      "grad_norm": 17.98725128173828,
      "learning_rate": 1.4285714285714285e-05,
      "loss": 8.0423,
      "step": 18
    },
    {
      "epoch": 2.533333333333333,
      "grad_norm": 14.338889122009277,
      "learning_rate": 4.7619047619047615e-06,
      "loss": 7.9496,
      "step": 20
    },
    {
      "epoch": 2.6666666666666665,
      "step": 21,
      "total_flos": 82187025809664.0,
      "train_loss": 8.28742908296131,
      "train_runtime": 577.7205,
      "train_samples_per_second": 0.623,
      "train_steps_per_second": 0.036
    }
  ],
  "logging_steps": 2,
  "max_steps": 21,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 82187025809664.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}