nlparabic commited on
Commit
dd92e82
·
verified ·
1 Parent(s): 864d781

End of training

Browse files
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.8827
22
- - Bleu: 0.2463
23
- - Rouge1: 0.6087
24
- - Rouge2: 0.3631
25
- - Rougel: 0.6049
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.7316
22
+ - Bleu: 0.2459
23
+ - Rouge1: 0.6017
24
+ - Rouge2: 0.3519
25
+ - Rougel: 0.5983
26
 
27
  ## Model description
28
 
all_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "epoch": 9.0,
3
  "eval_bleu": 0.24587227576979195,
4
  "eval_loss": 0.7315686941146851,
5
  "eval_rouge1": 0.6017197427075045,
6
  "eval_rouge2": 0.3518746485163118,
7
  "eval_rougeL": 0.5982542515796094,
8
- "eval_runtime": 47.1591,
9
  "eval_samples": 14212,
10
- "eval_samples_per_second": 301.363,
11
- "eval_steps_per_second": 37.681,
12
  "perplexity": 2.078338328926906,
13
- "total_flos": 3.3423104950272e+16,
14
- "train_loss": 0.5779510789562912,
15
- "train_runtime": 8660.6869,
16
  "train_samples": 56851,
17
- "train_samples_per_second": 131.285,
18
- "train_steps_per_second": 16.412
19
  }
 
1
  {
2
+ "epoch": 14.0,
3
  "eval_bleu": 0.24587227576979195,
4
  "eval_loss": 0.7315686941146851,
5
  "eval_rouge1": 0.6017197427075045,
6
  "eval_rouge2": 0.3518746485163118,
7
  "eval_rougeL": 0.5982542515796094,
8
+ "eval_runtime": 47.0307,
9
  "eval_samples": 14212,
10
+ "eval_samples_per_second": 302.185,
11
+ "eval_steps_per_second": 37.784,
12
  "perplexity": 2.078338328926906,
13
+ "total_flos": 5.1991496589312e+16,
14
+ "train_loss": 0.09352104669935263,
15
+ "train_runtime": 3380.5822,
16
  "train_samples": 56851,
17
+ "train_samples_per_second": 336.339,
18
+ "train_steps_per_second": 42.046
19
  }
egy_training_log.txt CHANGED
@@ -317,3 +317,5 @@ INFO:root:Epoch 13.0: Train Loss = 0.2586, Eval Loss = 0.8498404026031494
317
  INFO:absl:Using default tokenizer.
318
  INFO:root:Epoch 14.0: Train Loss = 0.2392, Eval Loss = 0.867546021938324
319
  INFO:absl:Using default tokenizer.
 
 
 
317
  INFO:absl:Using default tokenizer.
318
  INFO:root:Epoch 14.0: Train Loss = 0.2392, Eval Loss = 0.867546021938324
319
  INFO:absl:Using default tokenizer.
320
+ INFO:__main__:*** Evaluate ***
321
+ INFO:absl:Using default tokenizer.
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 9.0,
3
  "eval_bleu": 0.24587227576979195,
4
  "eval_loss": 0.7315686941146851,
5
  "eval_rouge1": 0.6017197427075045,
6
  "eval_rouge2": 0.3518746485163118,
7
  "eval_rougeL": 0.5982542515796094,
8
- "eval_runtime": 47.1591,
9
  "eval_samples": 14212,
10
- "eval_samples_per_second": 301.363,
11
- "eval_steps_per_second": 37.681,
12
  "perplexity": 2.078338328926906
13
  }
 
1
  {
2
+ "epoch": 14.0,
3
  "eval_bleu": 0.24587227576979195,
4
  "eval_loss": 0.7315686941146851,
5
  "eval_rouge1": 0.6017197427075045,
6
  "eval_rouge2": 0.3518746485163118,
7
  "eval_rougeL": 0.5982542515796094,
8
+ "eval_runtime": 47.0307,
9
  "eval_samples": 14212,
10
+ "eval_samples_per_second": 302.185,
11
+ "eval_steps_per_second": 37.784,
12
  "perplexity": 2.078338328926906
13
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 9.0,
3
- "total_flos": 3.3423104950272e+16,
4
- "train_loss": 0.5779510789562912,
5
- "train_runtime": 8660.6869,
6
  "train_samples": 56851,
7
- "train_samples_per_second": 131.285,
8
- "train_steps_per_second": 16.412
9
  }
 
1
  {
2
+ "epoch": 14.0,
3
+ "total_flos": 5.1991496589312e+16,
4
+ "train_loss": 0.09352104669935263,
5
+ "train_runtime": 3380.5822,
6
  "train_samples": 56851,
7
+ "train_samples_per_second": 336.339,
8
+ "train_steps_per_second": 42.046
9
  }
train_vs_val_loss.png CHANGED
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7315686941146851,
3
  "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg/checkpoint-28428",
4
- "epoch": 9.0,
5
  "eval_steps": 500,
6
- "global_step": 63963,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -180,13 +180,108 @@
180
  "step": 63963
181
  },
182
  {
183
- "epoch": 9.0,
184
- "step": 63963,
185
- "total_flos": 3.3423104950272e+16,
186
- "train_loss": 0.5779510789562912,
187
- "train_runtime": 8660.6869,
188
- "train_samples_per_second": 131.285,
189
- "train_steps_per_second": 16.412
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  }
191
  ],
192
  "logging_steps": 500,
@@ -215,7 +310,7 @@
215
  "attributes": {}
216
  }
217
  },
218
- "total_flos": 3.3423104950272e+16,
219
  "train_batch_size": 8,
220
  "trial_name": null,
221
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7315686941146851,
3
  "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg/checkpoint-28428",
4
+ "epoch": 14.0,
5
  "eval_steps": 500,
6
+ "global_step": 99498,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
180
  "step": 63963
181
  },
182
  {
183
+ "epoch": 10.0,
184
+ "grad_norm": 2.328486442565918,
185
+ "learning_rate": 2.5088251906241178e-05,
186
+ "loss": 0.3074,
187
+ "step": 71070
188
+ },
189
+ {
190
+ "epoch": 10.0,
191
+ "eval_bleu": 0.24784142265151649,
192
+ "eval_loss": 0.8155524134635925,
193
+ "eval_rouge1": 0.6101301030440347,
194
+ "eval_rouge2": 0.364415924488246,
195
+ "eval_rougeL": 0.606258270290786,
196
+ "eval_runtime": 82.2965,
197
+ "eval_samples_per_second": 172.693,
198
+ "eval_steps_per_second": 21.593,
199
+ "step": 71070
200
+ },
201
+ {
202
+ "epoch": 11.0,
203
+ "grad_norm": 2.1735293865203857,
204
+ "learning_rate": 2.2579426715617057e-05,
205
+ "loss": 0.2813,
206
+ "step": 78177
207
+ },
208
+ {
209
+ "epoch": 11.0,
210
+ "eval_bleu": 0.24877766407334076,
211
+ "eval_loss": 0.8325821757316589,
212
+ "eval_rouge1": 0.6108646154589945,
213
+ "eval_rouge2": 0.36567755520636375,
214
+ "eval_rougeL": 0.6071552064184781,
215
+ "eval_runtime": 46.9697,
216
+ "eval_samples_per_second": 302.578,
217
+ "eval_steps_per_second": 37.833,
218
+ "step": 78177
219
+ },
220
+ {
221
+ "epoch": 12.0,
222
+ "grad_norm": 2.380600929260254,
223
+ "learning_rate": 2.0070601524992942e-05,
224
+ "loss": 0.2586,
225
+ "step": 85284
226
+ },
227
+ {
228
+ "epoch": 12.0,
229
+ "eval_bleu": 0.24613125712387748,
230
+ "eval_loss": 0.8498404026031494,
231
+ "eval_rouge1": 0.6082478770786404,
232
+ "eval_rouge2": 0.3630077370066766,
233
+ "eval_rougeL": 0.604462114793365,
234
+ "eval_runtime": 46.8897,
235
+ "eval_samples_per_second": 303.094,
236
+ "eval_steps_per_second": 37.897,
237
+ "step": 85284
238
+ },
239
+ {
240
+ "epoch": 13.0,
241
+ "grad_norm": 2.3239755630493164,
242
+ "learning_rate": 1.7561776334368824e-05,
243
+ "loss": 0.2392,
244
+ "step": 92391
245
+ },
246
+ {
247
+ "epoch": 13.0,
248
+ "eval_bleu": 0.24701514132558042,
249
+ "eval_loss": 0.867546021938324,
250
+ "eval_rouge1": 0.6094521129523778,
251
+ "eval_rouge2": 0.364158573223837,
252
+ "eval_rougeL": 0.6054673467586391,
253
+ "eval_runtime": 169.8152,
254
+ "eval_samples_per_second": 83.691,
255
+ "eval_steps_per_second": 10.464,
256
+ "step": 92391
257
+ },
258
+ {
259
+ "epoch": 14.0,
260
+ "grad_norm": 2.4565744400024414,
261
+ "learning_rate": 1.5052951143744705e-05,
262
+ "loss": 0.2227,
263
+ "step": 99498
264
+ },
265
+ {
266
+ "epoch": 14.0,
267
+ "eval_bleu": 0.24626598003060837,
268
+ "eval_loss": 0.8826896548271179,
269
+ "eval_rouge1": 0.6087178921955774,
270
+ "eval_rouge2": 0.36314063359901005,
271
+ "eval_rougeL": 0.6048623164787159,
272
+ "eval_runtime": 170.1544,
273
+ "eval_samples_per_second": 83.524,
274
+ "eval_steps_per_second": 10.443,
275
+ "step": 99498
276
+ },
277
+ {
278
+ "epoch": 14.0,
279
+ "step": 99498,
280
+ "total_flos": 5.1991496589312e+16,
281
+ "train_loss": 0.09352104669935263,
282
+ "train_runtime": 3380.5822,
283
+ "train_samples_per_second": 336.339,
284
+ "train_steps_per_second": 42.046
285
  }
286
  ],
287
  "logging_steps": 500,
 
310
  "attributes": {}
311
  }
312
  },
313
+ "total_flos": 5.1991496589312e+16,
314
  "train_batch_size": 8,
315
  "trial_name": null,
316
  "trial_params": null