File size: 1,904 Bytes
22b26ad
 
 
 
 
 
 
 
 
 
 
 
0c65bb7
22b26ad
0c65bb7
22b26ad
 
 
 
0c65bb7
22b26ad
0c65bb7
22b26ad
 
 
 
0c65bb7
22b26ad
0c65bb7
22b26ad
 
 
 
0c65bb7
22b26ad
0c65bb7
22b26ad
 
 
 
0c65bb7
22b26ad
0c65bb7
22b26ad
 
 
 
0c65bb7
22b26ad
0c65bb7
22b26ad
 
 
 
 
0c65bb7
 
 
 
 
22b26ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c65bb7
22b26ad
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 12,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.64,
      "grad_norm": 8.833975791931152,
      "learning_rate": 0.0001666666666666667,
      "loss": 76.2267,
      "step": 2
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7217656373977661,
      "learning_rate": 0.00013333333333333334,
      "loss": 41.8397,
      "step": 4
    },
    {
      "epoch": 1.6400000000000001,
      "grad_norm": 16.44710350036621,
      "learning_rate": 0.0001,
      "loss": 70.0907,
      "step": 6
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9887499809265137,
      "learning_rate": 6.666666666666667e-05,
      "loss": 38.6177,
      "step": 8
    },
    {
      "epoch": 2.64,
      "grad_norm": 15.259599685668945,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 65.9877,
      "step": 10
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.4123783111572266,
      "learning_rate": 0.0,
      "loss": 36.3016,
      "step": 12
    },
    {
      "epoch": 3.0,
      "step": 12,
      "total_flos": 40668535028568.0,
      "train_loss": 54.844011306762695,
      "train_runtime": 47.4541,
      "train_samples_per_second": 4.215,
      "train_steps_per_second": 0.253
    }
  ],
  "logging_steps": 2,
  "max_steps": 12,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 40668535028568.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}