Recep Bulbul commited on
Commit
d8bbdc5
1 Parent(s): acc1f42

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Turkish-NLP/t5-efficient-base-turkish",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 36,
22
+ "num_heads": 12,
23
+ "num_layers": 36,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.42.4",
30
+ "use_cache": true,
31
+ "vocab_size": 32128
32
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.42.4"
7
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:269dbf2213fadf2fab87d4e6cb9754109bbb43bf28ee2ee952a420ea0870b34f
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b097d732cba2ad47b9977234e3349f0a6ee2c416c7a8fb6f99d6ff0b3dd99027
3
+ size 1064
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ca0ade8cf006bd16c9d7dafebce0f7fad8ba4018f3e70947e972cf10a2b3b03
3
+ size 839200
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "special_tokens_map_file": null, "name_or_path": "./", "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 15010,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1665556295802798,
13
+ "grad_norm": 1.6862213611602783,
14
+ "learning_rate": 0.00048334443704197203,
15
+ "loss": 2.122,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.3331112591605596,
20
+ "grad_norm": 0.9895781874656677,
21
+ "learning_rate": 0.00046668887408394405,
22
+ "loss": 1.6754,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.4996668887408394,
27
+ "grad_norm": 1.4011154174804688,
28
+ "learning_rate": 0.00045003331112591607,
29
+ "loss": 1.5258,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.6662225183211192,
34
+ "grad_norm": 1.263901948928833,
35
+ "learning_rate": 0.0004333777481678881,
36
+ "loss": 1.3949,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.832778147901399,
41
+ "grad_norm": 0.9926736354827881,
42
+ "learning_rate": 0.0004167221852098601,
43
+ "loss": 1.323,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.9993337774816788,
48
+ "grad_norm": 1.2104265689849854,
49
+ "learning_rate": 0.0004000666222518321,
50
+ "loss": 1.2978,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 1.0,
55
+ "eval_loss": 1.1156065464019775,
56
+ "eval_runtime": 37.2553,
57
+ "eval_samples_per_second": 80.579,
58
+ "eval_steps_per_second": 10.093,
59
+ "step": 3002
60
+ },
61
+ {
62
+ "epoch": 1.1658894070619588,
63
+ "grad_norm": 0.9220499992370605,
64
+ "learning_rate": 0.00038341105929380414,
65
+ "loss": 0.964,
66
+ "step": 3500
67
+ },
68
+ {
69
+ "epoch": 1.3324450366422385,
70
+ "grad_norm": 1.2684720754623413,
71
+ "learning_rate": 0.00036675549633577616,
72
+ "loss": 0.9784,
73
+ "step": 4000
74
+ },
75
+ {
76
+ "epoch": 1.4990006662225184,
77
+ "grad_norm": 1.3914306163787842,
78
+ "learning_rate": 0.0003500999333777481,
79
+ "loss": 0.9265,
80
+ "step": 4500
81
+ },
82
+ {
83
+ "epoch": 1.6655562958027983,
84
+ "grad_norm": 1.0393187999725342,
85
+ "learning_rate": 0.0003334443704197202,
86
+ "loss": 0.9401,
87
+ "step": 5000
88
+ },
89
+ {
90
+ "epoch": 1.832111925383078,
91
+ "grad_norm": 1.1595275402069092,
92
+ "learning_rate": 0.0003167888074616922,
93
+ "loss": 0.9091,
94
+ "step": 5500
95
+ },
96
+ {
97
+ "epoch": 1.9986675549633577,
98
+ "grad_norm": 1.2401949167251587,
99
+ "learning_rate": 0.00030013324450366423,
100
+ "loss": 0.9271,
101
+ "step": 6000
102
+ },
103
+ {
104
+ "epoch": 2.0,
105
+ "eval_loss": 0.9488099217414856,
106
+ "eval_runtime": 37.415,
107
+ "eval_samples_per_second": 80.235,
108
+ "eval_steps_per_second": 10.049,
109
+ "step": 6004
110
+ },
111
+ {
112
+ "epoch": 2.1652231845436374,
113
+ "grad_norm": 0.6324372887611389,
114
+ "learning_rate": 0.00028347768154563625,
115
+ "loss": 0.6576,
116
+ "step": 6500
117
+ },
118
+ {
119
+ "epoch": 2.3317788141239175,
120
+ "grad_norm": 1.377943992614746,
121
+ "learning_rate": 0.00026682211858760827,
122
+ "loss": 0.6499,
123
+ "step": 7000
124
+ },
125
+ {
126
+ "epoch": 2.498334443704197,
127
+ "grad_norm": 0.9929794669151306,
128
+ "learning_rate": 0.0002501665556295803,
129
+ "loss": 0.6509,
130
+ "step": 7500
131
+ },
132
+ {
133
+ "epoch": 2.664890073284477,
134
+ "grad_norm": 1.331009030342102,
135
+ "learning_rate": 0.0002335109926715523,
136
+ "loss": 0.6492,
137
+ "step": 8000
138
+ },
139
+ {
140
+ "epoch": 2.831445702864757,
141
+ "grad_norm": 0.9260538816452026,
142
+ "learning_rate": 0.00021685542971352432,
143
+ "loss": 0.6765,
144
+ "step": 8500
145
+ },
146
+ {
147
+ "epoch": 2.9980013324450367,
148
+ "grad_norm": 1.6844342947006226,
149
+ "learning_rate": 0.00020019986675549634,
150
+ "loss": 0.6452,
151
+ "step": 9000
152
+ },
153
+ {
154
+ "epoch": 3.0,
155
+ "eval_loss": 0.9022971391677856,
156
+ "eval_runtime": 37.548,
157
+ "eval_samples_per_second": 79.951,
158
+ "eval_steps_per_second": 10.014,
159
+ "step": 9006
160
+ },
161
+ {
162
+ "epoch": 3.1645569620253164,
163
+ "grad_norm": 0.6371086835861206,
164
+ "learning_rate": 0.00018354430379746836,
165
+ "loss": 0.4646,
166
+ "step": 9500
167
+ },
168
+ {
169
+ "epoch": 3.331112591605596,
170
+ "grad_norm": 1.005698323249817,
171
+ "learning_rate": 0.00016688874083944038,
172
+ "loss": 0.4706,
173
+ "step": 10000
174
+ },
175
+ {
176
+ "epoch": 3.497668221185876,
177
+ "grad_norm": 0.990774393081665,
178
+ "learning_rate": 0.0001502331778814124,
179
+ "loss": 0.434,
180
+ "step": 10500
181
+ },
182
+ {
183
+ "epoch": 3.664223850766156,
184
+ "grad_norm": 0.7444645762443542,
185
+ "learning_rate": 0.00013357761492338441,
186
+ "loss": 0.4682,
187
+ "step": 11000
188
+ },
189
+ {
190
+ "epoch": 3.8307794803464357,
191
+ "grad_norm": 1.1938289403915405,
192
+ "learning_rate": 0.00011692205196535643,
193
+ "loss": 0.4428,
194
+ "step": 11500
195
+ },
196
+ {
197
+ "epoch": 3.9973351099267154,
198
+ "grad_norm": 1.2232627868652344,
199
+ "learning_rate": 0.00010026648900732845,
200
+ "loss": 0.4446,
201
+ "step": 12000
202
+ },
203
+ {
204
+ "epoch": 4.0,
205
+ "eval_loss": 0.9124976396560669,
206
+ "eval_runtime": 37.6584,
207
+ "eval_samples_per_second": 79.717,
208
+ "eval_steps_per_second": 9.985,
209
+ "step": 12008
210
+ },
211
+ {
212
+ "epoch": 4.1638907395069955,
213
+ "grad_norm": 1.035305380821228,
214
+ "learning_rate": 8.361092604930047e-05,
215
+ "loss": 0.3292,
216
+ "step": 12500
217
+ },
218
+ {
219
+ "epoch": 4.330446369087275,
220
+ "grad_norm": 1.409875512123108,
221
+ "learning_rate": 6.695536309127249e-05,
222
+ "loss": 0.321,
223
+ "step": 13000
224
+ },
225
+ {
226
+ "epoch": 4.497001998667555,
227
+ "grad_norm": 1.468259334564209,
228
+ "learning_rate": 5.0299800133244506e-05,
229
+ "loss": 0.3161,
230
+ "step": 13500
231
+ },
232
+ {
233
+ "epoch": 4.663557628247835,
234
+ "grad_norm": 1.00761878490448,
235
+ "learning_rate": 3.3644237175216524e-05,
236
+ "loss": 0.3214,
237
+ "step": 14000
238
+ },
239
+ {
240
+ "epoch": 4.830113257828114,
241
+ "grad_norm": 0.7190210223197937,
242
+ "learning_rate": 1.698867421718854e-05,
243
+ "loss": 0.3037,
244
+ "step": 14500
245
+ },
246
+ {
247
+ "epoch": 4.996668887408394,
248
+ "grad_norm": 0.6449595093727112,
249
+ "learning_rate": 3.3311125916055966e-07,
250
+ "loss": 0.3045,
251
+ "step": 15000
252
+ }
253
+ ],
254
+ "logging_steps": 500,
255
+ "max_steps": 15010,
256
+ "num_input_tokens_seen": 0,
257
+ "num_train_epochs": 5,
258
+ "save_steps": 10000,
259
+ "stateful_callbacks": {
260
+ "TrainerControl": {
261
+ "args": {
262
+ "should_epoch_stop": false,
263
+ "should_evaluate": false,
264
+ "should_log": false,
265
+ "should_save": true,
266
+ "should_training_stop": true
267
+ },
268
+ "attributes": {}
269
+ }
270
+ },
271
+ "total_flos": 6083104659545088.0,
272
+ "train_batch_size": 4,
273
+ "trial_name": null,
274
+ "trial_params": null
275
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bc3740d66ce7f13a9bf52c0f372ab07710a244dec0c4cce502fad110e30edb3
3
+ size 5048