Pittawat Taveekitworachai commited on
Commit
e8b20bd
·
1 Parent(s): 5c76aa4

chore: update default temperature

Browse files
all_results.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "epoch": 1.9936479128856623,
3
- "eval_loss": 0.4997712969779968,
4
- "eval_runtime": 472.9101,
5
- "eval_samples_per_second": 5.887,
6
- "eval_steps_per_second": 0.736,
7
- "total_flos": 192523449925632.0,
8
- "train_loss": 0.47888606233023556,
9
- "train_runtime": 10712.2216,
10
- "train_samples_per_second": 9.875,
11
- "train_steps_per_second": 0.034
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_results.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "epoch": 1.9936479128856623,
3
- "eval_loss": 0.4997712969779968,
4
- "eval_runtime": 472.9101,
5
- "eval_samples_per_second": 5.887,
6
- "eval_steps_per_second": 0.736
7
- }
 
 
 
 
 
 
 
 
generation_config.json CHANGED
@@ -11,7 +11,7 @@
11
  ],
12
  "gamma": 1.0,
13
  "kl_temperature": 1.0,
14
- "temperature": 0.7,
15
  "top_p": 0.95,
16
  "transformers_version": "4.46.1"
17
- }
 
11
  ],
12
  "gamma": 1.0,
13
  "kl_temperature": 1.0,
14
+ "temperature": 0.4,
15
  "top_p": 0.95,
16
  "transformers_version": "4.46.1"
17
+ }
train_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 1.9936479128856623,
3
- "total_flos": 192523449925632.0,
4
- "train_loss": 0.47888606233023556,
5
- "train_runtime": 10712.2216,
6
- "train_samples_per_second": 9.875,
7
- "train_steps_per_second": 0.034
8
- }
 
 
 
 
 
 
 
 
 
trainer_log.jsonl DELETED
@@ -1,37 +0,0 @@
1
- {"current_steps": 10, "total_steps": 366, "loss": 0.6313, "lr": 1.9963183634476757e-05, "epoch": 0.0544464609800363, "percentage": 2.73, "elapsed_time": "0:04:56", "remaining_time": "2:55:58"}
2
- {"current_steps": 20, "total_steps": 366, "loss": 0.5461, "lr": 1.985300562686109e-05, "epoch": 0.1088929219600726, "percentage": 5.46, "elapsed_time": "0:09:48", "remaining_time": "2:49:45"}
3
- {"current_steps": 30, "total_steps": 366, "loss": 0.5357, "lr": 1.9670277247913205e-05, "epoch": 0.16333938294010888, "percentage": 8.2, "elapsed_time": "0:14:40", "remaining_time": "2:44:18"}
4
- {"current_steps": 40, "total_steps": 366, "loss": 0.5269, "lr": 1.941634397659126e-05, "epoch": 0.2177858439201452, "percentage": 10.93, "elapsed_time": "0:19:32", "remaining_time": "2:39:13"}
5
- {"current_steps": 50, "total_steps": 366, "loss": 0.5195, "lr": 1.909307559292236e-05, "epoch": 0.27223230490018147, "percentage": 13.66, "elapsed_time": "0:24:24", "remaining_time": "2:34:13"}
6
- {"current_steps": 60, "total_steps": 366, "loss": 0.5296, "lr": 1.8702852410301556e-05, "epoch": 0.32667876588021777, "percentage": 16.39, "elapsed_time": "0:29:16", "remaining_time": "2:29:15"}
7
- {"current_steps": 70, "total_steps": 366, "loss": 0.5077, "lr": 1.8248547748594246e-05, "epoch": 0.3811252268602541, "percentage": 19.13, "elapsed_time": "0:34:08", "remaining_time": "2:24:21"}
8
- {"current_steps": 80, "total_steps": 366, "loss": 0.518, "lr": 1.77335067770973e-05, "epoch": 0.4355716878402904, "percentage": 21.86, "elapsed_time": "0:39:00", "remaining_time": "2:19:27"}
9
- {"current_steps": 90, "total_steps": 366, "loss": 0.5148, "lr": 1.7161521883143936e-05, "epoch": 0.4900181488203267, "percentage": 24.59, "elapsed_time": "0:43:52", "remaining_time": "2:14:32"}
10
- {"current_steps": 100, "total_steps": 366, "loss": 0.512, "lr": 1.653680474772006e-05, "epoch": 0.5444646098003629, "percentage": 27.32, "elapsed_time": "0:48:44", "remaining_time": "2:09:39"}
11
- {"current_steps": 110, "total_steps": 366, "loss": 0.5029, "lr": 1.586395533370696e-05, "epoch": 0.5989110707803993, "percentage": 30.05, "elapsed_time": "0:53:36", "remaining_time": "2:04:44"}
12
- {"current_steps": 120, "total_steps": 366, "loss": 0.5089, "lr": 1.5147928015098309e-05, "epoch": 0.6533575317604355, "percentage": 32.79, "elapsed_time": "0:58:27", "remaining_time": "1:59:50"}
13
- {"current_steps": 130, "total_steps": 366, "loss": 0.5084, "lr": 1.4393995096591415e-05, "epoch": 0.7078039927404719, "percentage": 35.52, "elapsed_time": "1:03:19", "remaining_time": "1:54:57"}
14
- {"current_steps": 140, "total_steps": 366, "loss": 0.4943, "lr": 1.3607707992167836e-05, "epoch": 0.7622504537205081, "percentage": 38.25, "elapsed_time": "1:08:11", "remaining_time": "1:50:05"}
15
- {"current_steps": 150, "total_steps": 366, "loss": 0.5068, "lr": 1.2794856348516095e-05, "epoch": 0.8166969147005445, "percentage": 40.98, "elapsed_time": "1:13:03", "remaining_time": "1:45:12"}
16
- {"current_steps": 160, "total_steps": 366, "loss": 0.5003, "lr": 1.196142541428197e-05, "epoch": 0.8711433756805808, "percentage": 43.72, "elapsed_time": "1:17:54", "remaining_time": "1:40:18"}
17
- {"current_steps": 170, "total_steps": 366, "loss": 0.5053, "lr": 1.1113551969048088e-05, "epoch": 0.925589836660617, "percentage": 46.45, "elapsed_time": "1:22:47", "remaining_time": "1:35:26"}
18
- {"current_steps": 180, "total_steps": 366, "loss": 0.5017, "lr": 1.0257479136549889e-05, "epoch": 0.9800362976406534, "percentage": 49.18, "elapsed_time": "1:27:39", "remaining_time": "1:30:34"}
19
- {"current_steps": 190, "total_steps": 366, "loss": 0.4806, "lr": 9.399510414850518e-06, "epoch": 1.0353901996370236, "percentage": 51.91, "elapsed_time": "1:32:31", "remaining_time": "1:25:42"}
20
- {"current_steps": 200, "total_steps": 366, "loss": 0.4415, "lr": 8.545963261963102e-06, "epoch": 1.08983666061706, "percentage": 54.64, "elapsed_time": "1:37:23", "remaining_time": "1:20:49"}
21
- {"current_steps": 210, "total_steps": 366, "loss": 0.4406, "lr": 7.703122578682047e-06, "epoch": 1.144283121597096, "percentage": 57.38, "elapsed_time": "1:42:14", "remaining_time": "1:15:57"}
22
- {"current_steps": 220, "total_steps": 366, "loss": 0.4345, "lr": 6.877194431142055e-06, "epoch": 1.1987295825771325, "percentage": 60.11, "elapsed_time": "1:47:06", "remaining_time": "1:11:05"}
23
- {"current_steps": 230, "total_steps": 366, "loss": 0.4443, "lr": 6.074260353858283e-06, "epoch": 1.2531760435571688, "percentage": 62.84, "elapsed_time": "1:51:58", "remaining_time": "1:06:12"}
24
- {"current_steps": 240, "total_steps": 366, "loss": 0.4329, "lr": 5.300232569726805e-06, "epoch": 1.3076225045372052, "percentage": 65.57, "elapsed_time": "1:56:50", "remaining_time": "1:01:20"}
25
- {"current_steps": 250, "total_steps": 366, "loss": 0.4376, "lr": 4.560810456712754e-06, "epoch": 1.3620689655172413, "percentage": 68.31, "elapsed_time": "2:01:42", "remaining_time": "0:56:28"}
26
- {"current_steps": 260, "total_steps": 366, "loss": 0.4375, "lr": 3.86143858177388e-06, "epoch": 1.4165154264972777, "percentage": 71.04, "elapsed_time": "2:06:34", "remaining_time": "0:51:36"}
27
- {"current_steps": 270, "total_steps": 366, "loss": 0.439, "lr": 3.207266611027069e-06, "epoch": 1.470961887477314, "percentage": 73.77, "elapsed_time": "2:11:25", "remaining_time": "0:46:43"}
28
- {"current_steps": 280, "total_steps": 366, "loss": 0.4437, "lr": 2.6031113913503337e-06, "epoch": 1.5254083484573502, "percentage": 76.5, "elapsed_time": "2:16:17", "remaining_time": "0:41:51"}
29
- {"current_steps": 290, "total_steps": 366, "loss": 0.4413, "lr": 2.0534214826237486e-06, "epoch": 1.5798548094373865, "percentage": 79.23, "elapsed_time": "2:21:09", "remaining_time": "0:36:59"}
30
- {"current_steps": 300, "total_steps": 366, "loss": 0.4321, "lr": 1.5622444017681438e-06, "epoch": 1.634301270417423, "percentage": 81.97, "elapsed_time": "2:26:02", "remaining_time": "0:32:07"}
31
- {"current_steps": 310, "total_steps": 366, "loss": 0.4335, "lr": 1.1331968197725985e-06, "epoch": 1.6887477313974593, "percentage": 84.7, "elapsed_time": "2:30:54", "remaining_time": "0:27:15"}
32
- {"current_steps": 320, "total_steps": 366, "loss": 0.437, "lr": 7.694379311582401e-07, "epoch": 1.7431941923774956, "percentage": 87.43, "elapsed_time": "2:35:46", "remaining_time": "0:22:23"}
33
- {"current_steps": 330, "total_steps": 366, "loss": 0.4404, "lr": 4.73646191966175e-07, "epoch": 1.7976406533575318, "percentage": 90.16, "elapsed_time": "2:40:37", "remaining_time": "0:17:31"}
34
- {"current_steps": 340, "total_steps": 366, "loss": 0.4364, "lr": 2.479995975541749e-07, "epoch": 1.852087114337568, "percentage": 92.9, "elapsed_time": "2:45:29", "remaining_time": "0:12:39"}
35
- {"current_steps": 350, "total_steps": 366, "loss": 0.422, "lr": 9.415964542203059e-08, "epoch": 1.9065335753176043, "percentage": 95.63, "elapsed_time": "2:50:21", "remaining_time": "0:07:47"}
36
- {"current_steps": 360, "total_steps": 366, "loss": 0.4297, "lr": 1.325910115169471e-08, "epoch": 1.9609800362976406, "percentage": 98.36, "elapsed_time": "2:55:13", "remaining_time": "0:02:55"}
37
- {"current_steps": 366, "total_steps": 366, "epoch": 1.9936479128856623, "percentage": 100.0, "elapsed_time": "2:58:30", "remaining_time": "0:00:00"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
trainer_state.json DELETED
@@ -1,294 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.9936479128856623,
5
- "eval_steps": 400,
6
- "global_step": 366,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0544464609800363,
13
- "grad_norm": 0.5656834527948309,
14
- "learning_rate": 1.9963183634476757e-05,
15
- "loss": 0.6313,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.1088929219600726,
20
- "grad_norm": 0.38873067732428473,
21
- "learning_rate": 1.985300562686109e-05,
22
- "loss": 0.5461,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.16333938294010888,
27
- "grad_norm": 0.3356665320772086,
28
- "learning_rate": 1.9670277247913205e-05,
29
- "loss": 0.5357,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.2177858439201452,
34
- "grad_norm": 0.3461983668544632,
35
- "learning_rate": 1.941634397659126e-05,
36
- "loss": 0.5269,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.27223230490018147,
41
- "grad_norm": 0.32613528937825276,
42
- "learning_rate": 1.909307559292236e-05,
43
- "loss": 0.5195,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.32667876588021777,
48
- "grad_norm": 0.36288090303738013,
49
- "learning_rate": 1.8702852410301556e-05,
50
- "loss": 0.5296,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.3811252268602541,
55
- "grad_norm": 0.34412368424241435,
56
- "learning_rate": 1.8248547748594246e-05,
57
- "loss": 0.5077,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.4355716878402904,
62
- "grad_norm": 0.3623487490391009,
63
- "learning_rate": 1.77335067770973e-05,
64
- "loss": 0.518,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.4900181488203267,
69
- "grad_norm": 0.352530554726309,
70
- "learning_rate": 1.7161521883143936e-05,
71
- "loss": 0.5148,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.5444646098003629,
76
- "grad_norm": 0.33271158256626093,
77
- "learning_rate": 1.653680474772006e-05,
78
- "loss": 0.512,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.5989110707803993,
83
- "grad_norm": 0.3411018751679214,
84
- "learning_rate": 1.586395533370696e-05,
85
- "loss": 0.5029,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.6533575317604355,
90
- "grad_norm": 0.3194587165760062,
91
- "learning_rate": 1.5147928015098309e-05,
92
- "loss": 0.5089,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 0.7078039927404719,
97
- "grad_norm": 0.3254055350803121,
98
- "learning_rate": 1.4393995096591415e-05,
99
- "loss": 0.5084,
100
- "step": 130
101
- },
102
- {
103
- "epoch": 0.7622504537205081,
104
- "grad_norm": 0.3258921051684478,
105
- "learning_rate": 1.3607707992167836e-05,
106
- "loss": 0.4943,
107
- "step": 140
108
- },
109
- {
110
- "epoch": 0.8166969147005445,
111
- "grad_norm": 0.3270931532081039,
112
- "learning_rate": 1.2794856348516095e-05,
113
- "loss": 0.5068,
114
- "step": 150
115
- },
116
- {
117
- "epoch": 0.8711433756805808,
118
- "grad_norm": 0.3230906659414158,
119
- "learning_rate": 1.196142541428197e-05,
120
- "loss": 0.5003,
121
- "step": 160
122
- },
123
- {
124
- "epoch": 0.925589836660617,
125
- "grad_norm": 0.2935800627644772,
126
- "learning_rate": 1.1113551969048088e-05,
127
- "loss": 0.5053,
128
- "step": 170
129
- },
130
- {
131
- "epoch": 0.9800362976406534,
132
- "grad_norm": 0.3326717159128385,
133
- "learning_rate": 1.0257479136549889e-05,
134
- "loss": 0.5017,
135
- "step": 180
136
- },
137
- {
138
- "epoch": 1.0353901996370236,
139
- "grad_norm": 0.32878956222583294,
140
- "learning_rate": 9.399510414850518e-06,
141
- "loss": 0.4806,
142
- "step": 190
143
- },
144
- {
145
- "epoch": 1.08983666061706,
146
- "grad_norm": 0.34757108365501066,
147
- "learning_rate": 8.545963261963102e-06,
148
- "loss": 0.4415,
149
- "step": 200
150
- },
151
- {
152
- "epoch": 1.144283121597096,
153
- "grad_norm": 0.3284219242043158,
154
- "learning_rate": 7.703122578682047e-06,
155
- "loss": 0.4406,
156
- "step": 210
157
- },
158
- {
159
- "epoch": 1.1987295825771325,
160
- "grad_norm": 0.32386273566803453,
161
- "learning_rate": 6.877194431142055e-06,
162
- "loss": 0.4345,
163
- "step": 220
164
- },
165
- {
166
- "epoch": 1.2531760435571688,
167
- "grad_norm": 0.3142506822733221,
168
- "learning_rate": 6.074260353858283e-06,
169
- "loss": 0.4443,
170
- "step": 230
171
- },
172
- {
173
- "epoch": 1.3076225045372052,
174
- "grad_norm": 0.3003800601330196,
175
- "learning_rate": 5.300232569726805e-06,
176
- "loss": 0.4329,
177
- "step": 240
178
- },
179
- {
180
- "epoch": 1.3620689655172413,
181
- "grad_norm": 0.3069697265508909,
182
- "learning_rate": 4.560810456712754e-06,
183
- "loss": 0.4376,
184
- "step": 250
185
- },
186
- {
187
- "epoch": 1.4165154264972777,
188
- "grad_norm": 0.293876545095534,
189
- "learning_rate": 3.86143858177388e-06,
190
- "loss": 0.4375,
191
- "step": 260
192
- },
193
- {
194
- "epoch": 1.470961887477314,
195
- "grad_norm": 0.30476524182675285,
196
- "learning_rate": 3.207266611027069e-06,
197
- "loss": 0.439,
198
- "step": 270
199
- },
200
- {
201
- "epoch": 1.5254083484573502,
202
- "grad_norm": 0.30324429690826615,
203
- "learning_rate": 2.6031113913503337e-06,
204
- "loss": 0.4437,
205
- "step": 280
206
- },
207
- {
208
- "epoch": 1.5798548094373865,
209
- "grad_norm": 0.29418290787317664,
210
- "learning_rate": 2.0534214826237486e-06,
211
- "loss": 0.4413,
212
- "step": 290
213
- },
214
- {
215
- "epoch": 1.634301270417423,
216
- "grad_norm": 0.2916751951846426,
217
- "learning_rate": 1.5622444017681438e-06,
218
- "loss": 0.4321,
219
- "step": 300
220
- },
221
- {
222
- "epoch": 1.6887477313974593,
223
- "grad_norm": 0.2931814539684998,
224
- "learning_rate": 1.1331968197725985e-06,
225
- "loss": 0.4335,
226
- "step": 310
227
- },
228
- {
229
- "epoch": 1.7431941923774956,
230
- "grad_norm": 0.2911760868779227,
231
- "learning_rate": 7.694379311582401e-07,
232
- "loss": 0.437,
233
- "step": 320
234
- },
235
- {
236
- "epoch": 1.7976406533575318,
237
- "grad_norm": 0.3025253032847626,
238
- "learning_rate": 4.73646191966175e-07,
239
- "loss": 0.4404,
240
- "step": 330
241
- },
242
- {
243
- "epoch": 1.852087114337568,
244
- "grad_norm": 0.2836449089434229,
245
- "learning_rate": 2.479995975541749e-07,
246
- "loss": 0.4364,
247
- "step": 340
248
- },
249
- {
250
- "epoch": 1.9065335753176043,
251
- "grad_norm": 0.2777013740756467,
252
- "learning_rate": 9.415964542203059e-08,
253
- "loss": 0.422,
254
- "step": 350
255
- },
256
- {
257
- "epoch": 1.9609800362976406,
258
- "grad_norm": 0.29069644830400626,
259
- "learning_rate": 1.325910115169471e-08,
260
- "loss": 0.4297,
261
- "step": 360
262
- },
263
- {
264
- "epoch": 1.9936479128856623,
265
- "step": 366,
266
- "total_flos": 192523449925632.0,
267
- "train_loss": 0.47888606233023556,
268
- "train_runtime": 10712.2216,
269
- "train_samples_per_second": 9.875,
270
- "train_steps_per_second": 0.034
271
- }
272
- ],
273
- "logging_steps": 10,
274
- "max_steps": 366,
275
- "num_input_tokens_seen": 0,
276
- "num_train_epochs": 2,
277
- "save_steps": 400,
278
- "stateful_callbacks": {
279
- "TrainerControl": {
280
- "args": {
281
- "should_epoch_stop": false,
282
- "should_evaluate": false,
283
- "should_log": false,
284
- "should_save": true,
285
- "should_training_stop": true
286
- },
287
- "attributes": {}
288
- }
289
- },
290
- "total_flos": 192523449925632.0,
291
- "train_batch_size": 6,
292
- "trial_name": null,
293
- "trial_params": null
294
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bff2f2a55e5e287584c6c848ef23be2862cfb47d6614fbf6580562c88c552ca
3
- size 7224
 
 
 
 
training_loss.png DELETED
Binary file (34.4 kB)