w11wo commited on
Commit
6562024
·
1 Parent(s): 2332da0

Added logs

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.36827531884478026,
4
+ "train_runtime": 2516.6862,
5
+ "train_samples": 48777,
6
+ "train_samples_per_second": 193.814,
7
+ "train_steps_per_second": 1.518
8
+ }
logs/byt5-small-wikipron-eng-latn-us-broad/log.json ADDED
The diff for this file is too large to render. See raw diff
 
logs/byt5-small-wikipron-eng-latn-us-broad/metrics.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ PER: 0.157
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.36827531884478026,
4
+ "train_runtime": 2516.6862,
5
+ "train_samples": 48777,
6
+ "train_samples_per_second": 193.814,
7
+ "train_steps_per_second": 1.518
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 3820,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 0.0002,
13
+ "loss": 1.9462,
14
+ "step": 382
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_gen_len": 15.453,
19
+ "eval_loss": 0.2963322103023529,
20
+ "eval_per": 0.3495,
21
+ "eval_runtime": 122.3144,
22
+ "eval_samples_per_second": 56.976,
23
+ "eval_steps_per_second": 1.782,
24
+ "step": 382
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "learning_rate": 0.00017777777777777779,
29
+ "loss": 0.2979,
30
+ "step": 764
31
+ },
32
+ {
33
+ "epoch": 2.0,
34
+ "eval_gen_len": 15.6759,
35
+ "eval_loss": 0.21439459919929504,
36
+ "eval_per": 0.2941,
37
+ "eval_runtime": 121.944,
38
+ "eval_samples_per_second": 57.149,
39
+ "eval_steps_per_second": 1.788,
40
+ "step": 764
41
+ },
42
+ {
43
+ "epoch": 3.0,
44
+ "learning_rate": 0.00015555555555555556,
45
+ "loss": 0.2319,
46
+ "step": 1146
47
+ },
48
+ {
49
+ "epoch": 3.0,
50
+ "eval_gen_len": 15.7219,
51
+ "eval_loss": 0.18984708189964294,
52
+ "eval_per": 0.2765,
53
+ "eval_runtime": 120.5224,
54
+ "eval_samples_per_second": 57.823,
55
+ "eval_steps_per_second": 1.809,
56
+ "step": 1146
57
+ },
58
+ {
59
+ "epoch": 4.0,
60
+ "learning_rate": 0.00013333333333333334,
61
+ "loss": 0.2042,
62
+ "step": 1528
63
+ },
64
+ {
65
+ "epoch": 4.0,
66
+ "eval_gen_len": 15.7235,
67
+ "eval_loss": 0.1799211949110031,
68
+ "eval_per": 0.2719,
69
+ "eval_runtime": 119.8528,
70
+ "eval_samples_per_second": 58.146,
71
+ "eval_steps_per_second": 1.819,
72
+ "step": 1528
73
+ },
74
+ {
75
+ "epoch": 5.0,
76
+ "learning_rate": 0.00011111111111111112,
77
+ "loss": 0.1879,
78
+ "step": 1910
79
+ },
80
+ {
81
+ "epoch": 5.0,
82
+ "eval_gen_len": 15.7054,
83
+ "eval_loss": 0.1743946522474289,
84
+ "eval_per": 0.2657,
85
+ "eval_runtime": 120.4255,
86
+ "eval_samples_per_second": 57.87,
87
+ "eval_steps_per_second": 1.81,
88
+ "step": 1910
89
+ },
90
+ {
91
+ "epoch": 6.0,
92
+ "learning_rate": 8.888888888888889e-05,
93
+ "loss": 0.1763,
94
+ "step": 2292
95
+ },
96
+ {
97
+ "epoch": 6.0,
98
+ "eval_gen_len": 15.7406,
99
+ "eval_loss": 0.1728605031967163,
100
+ "eval_per": 0.2645,
101
+ "eval_runtime": 119.6482,
102
+ "eval_samples_per_second": 58.246,
103
+ "eval_steps_per_second": 1.822,
104
+ "step": 2292
105
+ },
106
+ {
107
+ "epoch": 7.0,
108
+ "learning_rate": 6.666666666666667e-05,
109
+ "loss": 0.1677,
110
+ "step": 2674
111
+ },
112
+ {
113
+ "epoch": 7.0,
114
+ "eval_gen_len": 15.7269,
115
+ "eval_loss": 0.1693088412284851,
116
+ "eval_per": 0.2619,
117
+ "eval_runtime": 119.4883,
118
+ "eval_samples_per_second": 58.324,
119
+ "eval_steps_per_second": 1.824,
120
+ "step": 2674
121
+ },
122
+ {
123
+ "epoch": 8.0,
124
+ "learning_rate": 4.4444444444444447e-05,
125
+ "loss": 0.1613,
126
+ "step": 3056
127
+ },
128
+ {
129
+ "epoch": 8.0,
130
+ "eval_gen_len": 15.7371,
131
+ "eval_loss": 0.1667601615190506,
132
+ "eval_per": 0.2604,
133
+ "eval_runtime": 118.43,
134
+ "eval_samples_per_second": 58.845,
135
+ "eval_steps_per_second": 1.841,
136
+ "step": 3056
137
+ },
138
+ {
139
+ "epoch": 9.0,
140
+ "learning_rate": 2.2222222222222223e-05,
141
+ "loss": 0.1559,
142
+ "step": 3438
143
+ },
144
+ {
145
+ "epoch": 9.0,
146
+ "eval_gen_len": 15.7383,
147
+ "eval_loss": 0.16669157147407532,
148
+ "eval_per": 0.2585,
149
+ "eval_runtime": 118.8757,
150
+ "eval_samples_per_second": 58.624,
151
+ "eval_steps_per_second": 1.834,
152
+ "step": 3438
153
+ },
154
+ {
155
+ "epoch": 10.0,
156
+ "learning_rate": 0.0,
157
+ "loss": 0.1534,
158
+ "step": 3820
159
+ },
160
+ {
161
+ "epoch": 10.0,
162
+ "eval_gen_len": 15.7318,
163
+ "eval_loss": 0.16682308912277222,
164
+ "eval_per": 0.2588,
165
+ "eval_runtime": 118.3076,
166
+ "eval_samples_per_second": 58.906,
167
+ "eval_steps_per_second": 1.843,
168
+ "step": 3820
169
+ },
170
+ {
171
+ "epoch": 10.0,
172
+ "step": 3820,
173
+ "total_flos": 5.601731015983104e+16,
174
+ "train_loss": 0.36827531884478026,
175
+ "train_runtime": 2516.6862,
176
+ "train_samples_per_second": 193.814,
177
+ "train_steps_per_second": 1.518
178
+ }
179
+ ],
180
+ "max_steps": 3820,
181
+ "num_train_epochs": 10,
182
+ "total_flos": 5.601731015983104e+16,
183
+ "trial_name": null,
184
+ "trial_params": null
185
+ }