File size: 2,593 Bytes
ce30918
 
 
0e2e36c
ce30918
0e2e36c
ce30918
 
 
 
 
0e2e36c
 
87c1092
ce30918
 
 
0e2e36c
 
87c1092
ce30918
 
 
0e2e36c
 
87c1092
ce30918
 
 
0e2e36c
87c1092
 
 
 
ce30918
 
 
0e2e36c
 
87c1092
ce30918
 
 
0e2e36c
 
87c1092
ce30918
 
 
0e2e36c
87c1092
 
 
 
ce30918
 
 
0e2e36c
 
87c1092
ce30918
 
 
0e2e36c
 
87c1092
0e2e36c
 
 
 
87c1092
 
 
 
0e2e36c
 
 
 
 
87c1092
0e2e36c
 
 
 
 
87c1092
0e2e36c
 
 
 
87c1092
 
 
 
0e2e36c
 
 
 
 
 
87c1092
 
 
0e2e36c
ce30918
 
 
0e2e36c
ce30918
 
 
0e2e36c
 
ce30918
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9905660377358491,
  "eval_steps": 10,
  "global_step": 42,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02,
      "learning_rate": 4.9930094929529506e-05,
      "loss": 1.0696,
      "step": 1
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.827184371610511e-05,
      "loss": 1.063,
      "step": 5
    },
    {
      "epoch": 0.24,
      "learning_rate": 4.332629679574566e-05,
      "loss": 1.0776,
      "step": 10
    },
    {
      "epoch": 0.24,
      "eval_loss": 1.05855131149292,
      "eval_runtime": 25.0203,
      "eval_samples_per_second": 5.156,
      "eval_steps_per_second": 1.719,
      "step": 10
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.5847093477938956e-05,
      "loss": 1.0711,
      "step": 15
    },
    {
      "epoch": 0.47,
      "learning_rate": 2.686825233966061e-05,
      "loss": 1.0657,
      "step": 20
    },
    {
      "epoch": 0.47,
      "eval_loss": 1.0499261617660522,
      "eval_runtime": 25.0117,
      "eval_samples_per_second": 5.158,
      "eval_steps_per_second": 1.719,
      "step": 20
    },
    {
      "epoch": 0.59,
      "learning_rate": 1.7631120639727393e-05,
      "loss": 1.0523,
      "step": 25
    },
    {
      "epoch": 0.71,
      "learning_rate": 9.412754953531663e-06,
      "loss": 1.0643,
      "step": 30
    },
    {
      "epoch": 0.71,
      "eval_loss": 1.0445888042449951,
      "eval_runtime": 25.0257,
      "eval_samples_per_second": 5.155,
      "eval_steps_per_second": 1.718,
      "step": 30
    },
    {
      "epoch": 0.83,
      "learning_rate": 3.3493649053890326e-06,
      "loss": 1.053,
      "step": 35
    },
    {
      "epoch": 0.94,
      "learning_rate": 2.7922934437178695e-07,
      "loss": 1.0473,
      "step": 40
    },
    {
      "epoch": 0.94,
      "eval_loss": 1.043198585510254,
      "eval_runtime": 25.0118,
      "eval_samples_per_second": 5.158,
      "eval_steps_per_second": 1.719,
      "step": 40
    },
    {
      "epoch": 0.99,
      "step": 42,
      "total_flos": 6.447644673468006e+16,
      "train_loss": 1.0616472789219447,
      "train_runtime": 3232.5069,
      "train_samples_per_second": 1.573,
      "train_steps_per_second": 0.013
    }
  ],
  "logging_steps": 5,
  "max_steps": 42,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 20,
  "total_flos": 6.447644673468006e+16,
  "train_batch_size": 6,
  "trial_name": null,
  "trial_params": null
}