nlparabic commited on
Commit
f37522a
1 Parent(s): 31a1255

End of training

Browse files
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-03B](https://huggingface.co/riotu-lab/ArabianGPT-03B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.3964
22
- - Bleu: 0.4567
23
- - Rouge1: 0.7127
24
- - Rouge2: 0.4779
25
- - Rougel: 0.7120
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-03B](https://huggingface.co/riotu-lab/ArabianGPT-03B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.3495
22
+ - Bleu: 0.4355
23
+ - Rouge1: 0.6808
24
+ - Rouge2: 0.4218
25
+ - Rougel: 0.6799
26
 
27
  ## Model description
28
 
all_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.0,
3
+ "eval_bleu": 0.43550162080921356,
4
+ "eval_loss": 0.3495321273803711,
5
+ "eval_rouge1": 0.6807566585005222,
6
+ "eval_rouge2": 0.42184236623206983,
7
+ "eval_rougeL": 0.6798697145255593,
8
+ "eval_runtime": 135.4262,
9
+ "eval_samples": 1672,
10
+ "eval_samples_per_second": 12.346,
11
+ "eval_steps_per_second": 1.543,
12
+ "perplexity": 1.4184037610379865,
13
+ "total_flos": 1.086463725010944e+16,
14
+ "train_loss": 0.29683067467794466,
15
+ "train_runtime": 4067.7631,
16
+ "train_samples": 6685,
17
+ "train_samples_per_second": 32.868,
18
+ "train_steps_per_second": 4.11
19
+ }
egy_training_log.txt CHANGED
@@ -154,3 +154,5 @@ INFO:root:Epoch 6.0: Train Loss = 0.11, Eval Loss = 0.3791240453720093
154
  INFO:absl:Using default tokenizer.
155
  INFO:root:Epoch 7.0: Train Loss = 0.0981, Eval Loss = 0.39224255084991455
156
  INFO:absl:Using default tokenizer.
 
 
 
154
  INFO:absl:Using default tokenizer.
155
  INFO:root:Epoch 7.0: Train Loss = 0.0981, Eval Loss = 0.39224255084991455
156
  INFO:absl:Using default tokenizer.
157
+ INFO:__main__:*** Evaluate ***
158
+ INFO:absl:Using default tokenizer.
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.0,
3
+ "eval_bleu": 0.43550162080921356,
4
+ "eval_loss": 0.3495321273803711,
5
+ "eval_rouge1": 0.6807566585005222,
6
+ "eval_rouge2": 0.42184236623206983,
7
+ "eval_rougeL": 0.6798697145255593,
8
+ "eval_runtime": 135.4262,
9
+ "eval_samples": 1672,
10
+ "eval_samples_per_second": 12.346,
11
+ "eval_steps_per_second": 1.543,
12
+ "perplexity": 1.4184037610379865
13
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.0,
3
+ "total_flos": 1.086463725010944e+16,
4
+ "train_loss": 0.29683067467794466,
5
+ "train_runtime": 4067.7631,
6
+ "train_samples": 6685,
7
+ "train_samples_per_second": 32.868,
8
+ "train_steps_per_second": 4.11
9
+ }
train_vs_val_loss.png ADDED
trainer_state.json ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3495321273803711,
3
+ "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_gulf_03/checkpoint-1672",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5852,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.8581334352493286,
14
+ "learning_rate": 4.896424167694204e-05,
15
+ "loss": 1.1759,
16
+ "step": 836
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_bleu": 0.41556477975575595,
21
+ "eval_loss": 0.37735649943351746,
22
+ "eval_rouge1": 0.6364308285085805,
23
+ "eval_rouge2": 0.36296002645709646,
24
+ "eval_rougeL": 0.6351682159497727,
25
+ "eval_runtime": 16.4559,
26
+ "eval_samples_per_second": 101.605,
27
+ "eval_steps_per_second": 12.701,
28
+ "step": 836
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 2.0768027305603027,
33
+ "learning_rate": 4.638717632552405e-05,
34
+ "loss": 0.2824,
35
+ "step": 1672
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_bleu": 0.43550162080921356,
40
+ "eval_loss": 0.3495321273803711,
41
+ "eval_rouge1": 0.6807566585005222,
42
+ "eval_rouge2": 0.42184236623206983,
43
+ "eval_rougeL": 0.6798697145255593,
44
+ "eval_runtime": 12.3396,
45
+ "eval_samples_per_second": 135.499,
46
+ "eval_steps_per_second": 16.937,
47
+ "step": 1672
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 1.4890309572219849,
52
+ "learning_rate": 4.3810110974106046e-05,
53
+ "loss": 0.1846,
54
+ "step": 2508
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_bleu": 0.44798984557044996,
59
+ "eval_loss": 0.35791030526161194,
60
+ "eval_rouge1": 0.6988772317351744,
61
+ "eval_rouge2": 0.4541195944585126,
62
+ "eval_rougeL": 0.6983599524493045,
63
+ "eval_runtime": 13.092,
64
+ "eval_samples_per_second": 127.712,
65
+ "eval_steps_per_second": 15.964,
66
+ "step": 2508
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 0.9949209690093994,
71
+ "learning_rate": 4.1233045622688044e-05,
72
+ "loss": 0.1351,
73
+ "step": 3344
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_bleu": 0.4449484201171957,
78
+ "eval_loss": 0.36742404103279114,
79
+ "eval_rouge1": 0.6992663461784911,
80
+ "eval_rouge2": 0.45557653256736347,
81
+ "eval_rougeL": 0.6984837283971637,
82
+ "eval_runtime": 12.3128,
83
+ "eval_samples_per_second": 135.794,
84
+ "eval_steps_per_second": 16.974,
85
+ "step": 3344
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 1.245552897453308,
90
+ "learning_rate": 3.8655980271270036e-05,
91
+ "loss": 0.11,
92
+ "step": 4180
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_bleu": 0.4488174409372482,
97
+ "eval_loss": 0.3791240453720093,
98
+ "eval_rouge1": 0.7066508462378398,
99
+ "eval_rouge2": 0.46813923665655116,
100
+ "eval_rougeL": 0.7055578408590141,
101
+ "eval_runtime": 18.2521,
102
+ "eval_samples_per_second": 91.606,
103
+ "eval_steps_per_second": 11.451,
104
+ "step": 4180
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 1.253645896911621,
109
+ "learning_rate": 3.6078914919852034e-05,
110
+ "loss": 0.0981,
111
+ "step": 5016
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_bleu": 0.4582553582864403,
116
+ "eval_loss": 0.39224255084991455,
117
+ "eval_rouge1": 0.7135223253955782,
118
+ "eval_rouge2": 0.4789126140106226,
119
+ "eval_rougeL": 0.7129027733719826,
120
+ "eval_runtime": 65.9112,
121
+ "eval_samples_per_second": 25.367,
122
+ "eval_steps_per_second": 3.171,
123
+ "step": 5016
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "grad_norm": 0.6491497755050659,
128
+ "learning_rate": 3.350184956843403e-05,
129
+ "loss": 0.0917,
130
+ "step": 5852
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_bleu": 0.4567065309085701,
135
+ "eval_loss": 0.3964388072490692,
136
+ "eval_rouge1": 0.7126519996998933,
137
+ "eval_rouge2": 0.47787123299802714,
138
+ "eval_rougeL": 0.7120354739021442,
139
+ "eval_runtime": 21.7898,
140
+ "eval_samples_per_second": 76.733,
141
+ "eval_steps_per_second": 9.592,
142
+ "step": 5852
143
+ },
144
+ {
145
+ "epoch": 7.0,
146
+ "step": 5852,
147
+ "total_flos": 1.086463725010944e+16,
148
+ "train_loss": 0.29683067467794466,
149
+ "train_runtime": 4067.7631,
150
+ "train_samples_per_second": 32.868,
151
+ "train_steps_per_second": 4.11
152
+ }
153
+ ],
154
+ "logging_steps": 500,
155
+ "max_steps": 16720,
156
+ "num_input_tokens_seen": 0,
157
+ "num_train_epochs": 20,
158
+ "save_steps": 500,
159
+ "stateful_callbacks": {
160
+ "EarlyStoppingCallback": {
161
+ "args": {
162
+ "early_stopping_patience": 5,
163
+ "early_stopping_threshold": 0.0
164
+ },
165
+ "attributes": {
166
+ "early_stopping_patience_counter": 0
167
+ }
168
+ },
169
+ "TrainerControl": {
170
+ "args": {
171
+ "should_epoch_stop": false,
172
+ "should_evaluate": false,
173
+ "should_log": false,
174
+ "should_save": true,
175
+ "should_training_stop": true
176
+ },
177
+ "attributes": {}
178
+ }
179
+ },
180
+ "total_flos": 1.086463725010944e+16,
181
+ "train_batch_size": 8,
182
+ "trial_name": null,
183
+ "trial_params": null
184
+ }