iHateNLP commited on
Commit
8dcc16b
·
verified ·
1 Parent(s): 92daccc

Training Examples: 10-20k

Browse files
README.md CHANGED
@@ -1,8 +1,6 @@
1
  ---
2
  base_model: unsloth/Phi-3.5-mini-instruct
3
  library_name: peft
4
- tags:
5
- - unsloth
6
  ---
7
 
8
  # Model Card for Model ID
 
1
  ---
2
  base_model: unsloth/Phi-3.5-mini-instruct
3
  library_name: peft
 
 
4
  ---
5
 
6
  # Model Card for Model ID
adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "q_proj",
27
- "down_proj",
28
- "gate_proj",
29
  "o_proj",
 
 
30
  "up_proj",
31
  "k_proj",
32
- "v_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
26
  "o_proj",
27
+ "q_proj",
28
+ "v_proj",
29
  "up_proj",
30
  "k_proj",
31
+ "down_proj",
32
+ "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f43d47027386dc4eafd698323a31d8c6c40fbd882c2bfe9dd43b1122c186929f
3
  size 119597408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec128fc25c61bba289ed835cd135bd259f2e6b0e7461a47f722930c775371b4d
3
  size 119597408
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83438eaa9a3c3886c6e226e34f99c9abf71eeff99962b4cc773061653827afbd
3
  size 61227348
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b04f3fdac6e766bf2c5cf4423858daf620bd0e5672f6b35021ccd14bdf331a4
3
  size 61227348
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d847fa8b77307ec3f023cf0a917c492c209c15c78747caba0466134193ab7151
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:195e5184004bc0db9b878c771cd5c8a05988dae6ecbf71bd37c791801c0f3c2f
3
  size 988
trainer_state.json CHANGED
@@ -11,345 +11,345 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.00020742584526031943,
14
- "grad_norm": 0.572188675403595,
15
  "learning_rate": 0.0,
16
- "loss": 0.9048,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.020742584526031945,
21
- "grad_norm": 0.8375779390335083,
22
  "learning_rate": 2.0518134715025907e-05,
23
- "loss": 0.6388,
24
  "step": 100
25
  },
26
  {
27
  "epoch": 0.04148516905206389,
28
- "grad_norm": 0.80202317237854,
29
  "learning_rate": 4.1243523316062174e-05,
30
- "loss": 0.5854,
31
  "step": 200
32
  },
33
  {
34
  "epoch": 0.06222775357809583,
35
- "grad_norm": 0.7062171101570129,
36
  "learning_rate": 6.196891191709845e-05,
37
- "loss": 0.4707,
38
  "step": 300
39
  },
40
  {
41
  "epoch": 0.08297033810412778,
42
- "grad_norm": 0.644052267074585,
43
  "learning_rate": 8.269430051813471e-05,
44
- "loss": 0.3773,
45
  "step": 400
46
  },
47
  {
48
  "epoch": 0.10371292263015972,
49
- "grad_norm": 0.6500552296638489,
50
  "learning_rate": 0.000103419689119171,
51
- "loss": 0.339,
52
  "step": 500
53
  },
54
  {
55
  "epoch": 0.12445550715619166,
56
- "grad_norm": 0.9881173968315125,
57
- "learning_rate": 0.00012414507772020726,
58
- "loss": 0.3264,
59
  "step": 600
60
  },
61
  {
62
  "epoch": 0.1451980916822236,
63
- "grad_norm": 0.8607994318008423,
64
- "learning_rate": 0.00014487046632124352,
65
- "loss": 0.3395,
66
  "step": 700
67
  },
68
  {
69
  "epoch": 0.16594067620825556,
70
- "grad_norm": 1.1204837560653687,
71
- "learning_rate": 0.0001655958549222798,
72
- "loss": 0.356,
73
  "step": 800
74
  },
75
  {
76
  "epoch": 0.18668326073428748,
77
- "grad_norm": 1.4915101528167725,
78
- "learning_rate": 0.00018632124352331608,
79
- "loss": 0.3399,
80
  "step": 900
81
  },
82
  {
83
  "epoch": 0.20742584526031943,
84
- "grad_norm": 1.2340389490127563,
85
- "learning_rate": 0.00019996163583391267,
86
- "loss": 0.371,
87
  "step": 1000
88
  },
89
  {
90
  "epoch": 0.22816842978635138,
91
- "grad_norm": 0.657108724117279,
92
- "learning_rate": 0.00019940464789344699,
93
- "loss": 0.3402,
94
  "step": 1100
95
  },
96
  {
97
  "epoch": 0.24891101431238333,
98
- "grad_norm": 1.1355221271514893,
99
- "learning_rate": 0.00019818819435915685,
100
- "loss": 0.3604,
101
  "step": 1200
102
  },
103
  {
104
  "epoch": 0.2696535988384153,
105
- "grad_norm": 0.8293834924697876,
106
- "learning_rate": 0.00019632034536930397,
107
- "loss": 0.355,
108
  "step": 1300
109
  },
110
  {
111
  "epoch": 0.2903961833644472,
112
- "grad_norm": 1.1846222877502441,
113
- "learning_rate": 0.00019381349251894317,
114
- "loss": 0.3562,
115
  "step": 1400
116
  },
117
  {
118
  "epoch": 0.3111387678904792,
119
- "grad_norm": 0.7575041055679321,
120
- "learning_rate": 0.0001906842666521912,
121
- "loss": 0.3853,
122
  "step": 1500
123
  },
124
  {
125
  "epoch": 0.3318813524165111,
126
- "grad_norm": 0.8805419206619263,
127
- "learning_rate": 0.0001869534275306705,
128
- "loss": 0.3789,
129
  "step": 1600
130
  },
131
  {
132
  "epoch": 0.352623936942543,
133
- "grad_norm": 0.5712432861328125,
134
- "learning_rate": 0.00018264572611008452,
135
- "loss": 0.4197,
136
  "step": 1700
137
  },
138
  {
139
  "epoch": 0.37336652146857496,
140
- "grad_norm": 1.414759874343872,
141
- "learning_rate": 0.00017778974033860325,
142
- "loss": 0.3831,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 0.3941091059946069,
147
- "grad_norm": 1.0402040481567383,
148
- "learning_rate": 0.00017241768556639647,
149
- "loss": 0.4039,
150
  "step": 1900
151
  },
152
  {
153
  "epoch": 0.41485169052063886,
154
- "grad_norm": 0.68588787317276,
155
- "learning_rate": 0.0001665652008240878,
156
- "loss": 0.3909,
157
  "step": 2000
158
  },
159
  {
160
  "epoch": 0.4355942750466708,
161
- "grad_norm": 0.967073380947113,
162
- "learning_rate": 0.00016027111238799057,
163
- "loss": 0.4252,
164
  "step": 2100
165
  },
166
  {
167
  "epoch": 0.45633685957270276,
168
- "grad_norm": 1.229313850402832,
169
- "learning_rate": 0.00015357717620066938,
170
- "loss": 0.4296,
171
  "step": 2200
172
  },
173
  {
174
  "epoch": 0.4770794440987347,
175
- "grad_norm": 1.2722011804580688,
176
- "learning_rate": 0.00014652780085564921,
177
- "loss": 0.4027,
178
  "step": 2300
179
  },
180
  {
181
  "epoch": 0.49782202862476665,
182
- "grad_norm": 0.9985523819923401,
183
- "learning_rate": 0.00013916975298403346,
184
- "loss": 0.4733,
185
  "step": 2400
186
  },
187
  {
188
  "epoch": 0.5185646131507986,
189
- "grad_norm": 1.0977421998977661,
190
- "learning_rate": 0.00013155184699754102,
191
- "loss": 0.4848,
192
  "step": 2500
193
  },
194
  {
195
  "epoch": 0.5393071976768306,
196
- "grad_norm": 0.9423943758010864,
197
- "learning_rate": 0.00012372462124625452,
198
- "loss": 0.4491,
199
  "step": 2600
200
  },
201
  {
202
  "epoch": 0.5600497822028625,
203
- "grad_norm": 1.0384944677352905,
204
- "learning_rate": 0.00011574000273949858,
205
- "loss": 0.4421,
206
  "step": 2700
207
  },
208
  {
209
  "epoch": 0.5807923667288944,
210
- "grad_norm": 0.6461535692214966,
211
- "learning_rate": 0.00010765096265414077,
212
- "loss": 0.4887,
213
  "step": 2800
214
  },
215
  {
216
  "epoch": 0.6015349512549264,
217
- "grad_norm": 0.7776329517364502,
218
- "learning_rate": 9.95111649157258e-05,
219
- "loss": 0.5306,
220
  "step": 2900
221
  },
222
  {
223
  "epoch": 0.6222775357809583,
224
- "grad_norm": 0.6103058457374573,
225
- "learning_rate": 9.137461018380963e-05,
226
- "loss": 0.4847,
227
  "step": 3000
228
  },
229
  {
230
  "epoch": 0.6430201203069903,
231
- "grad_norm": 1.384641170501709,
232
- "learning_rate": 8.329527760334861e-05,
233
- "loss": 0.5293,
234
  "step": 3100
235
  },
236
  {
237
  "epoch": 0.6637627048330222,
238
- "grad_norm": 1.2251664400100708,
239
- "learning_rate": 7.532676669881955e-05,
240
- "loss": 0.55,
241
  "step": 3200
242
  },
243
  {
244
  "epoch": 0.6845052893590542,
245
- "grad_norm": 0.5400375127792358,
246
- "learning_rate": 6.752194178680041e-05,
247
- "loss": 0.5329,
248
  "step": 3300
249
  },
250
  {
251
  "epoch": 0.705247873885086,
252
- "grad_norm": 1.0325515270233154,
253
- "learning_rate": 5.9932581266031694e-05,
254
- "loss": 0.5638,
255
  "step": 3400
256
  },
257
  {
258
  "epoch": 0.725990458411118,
259
- "grad_norm": 0.4699115455150604,
260
  "learning_rate": 5.2680792652421385e-05,
261
- "loss": 0.5527,
262
  "step": 3500
263
  },
264
  {
265
  "epoch": 0.7467330429371499,
266
- "grad_norm": 0.5923639535903931,
267
  "learning_rate": 4.5668266493661425e-05,
268
- "loss": 0.5647,
269
  "step": 3600
270
  },
271
  {
272
  "epoch": 0.7674756274631819,
273
- "grad_norm": 1.6123884916305542,
274
  "learning_rate": 3.901618534083994e-05,
275
- "loss": 0.6208,
276
  "step": 3700
277
  },
278
  {
279
  "epoch": 0.7882182119892138,
280
- "grad_norm": 0.882792055606842,
281
  "learning_rate": 3.2768680114799956e-05,
282
- "loss": 0.585,
283
  "step": 3800
284
  },
285
  {
286
  "epoch": 0.8089607965152458,
287
- "grad_norm": 0.8842360973358154,
288
  "learning_rate": 2.696719771798648e-05,
289
- "loss": 0.6046,
290
  "step": 3900
291
  },
292
  {
293
  "epoch": 0.8297033810412777,
294
- "grad_norm": 0.9587863087654114,
295
  "learning_rate": 2.1650226069374525e-05,
296
- "loss": 0.6127,
297
  "step": 4000
298
  },
299
  {
300
  "epoch": 0.8504459655673097,
301
- "grad_norm": 0.6551477909088135,
302
  "learning_rate": 1.6853038769745467e-05,
303
- "loss": 0.6291,
304
  "step": 4100
305
  },
306
  {
307
  "epoch": 0.8711885500933416,
308
- "grad_norm": 0.7264061570167542,
309
  "learning_rate": 1.2607461091239803e-05,
310
- "loss": 0.6627,
311
  "step": 4200
312
  },
313
  {
314
  "epoch": 0.8919311346193736,
315
- "grad_norm": 0.40014514327049255,
316
  "learning_rate": 8.941658843648237e-06,
317
- "loss": 0.6575,
318
  "step": 4300
319
  },
320
  {
321
  "epoch": 0.9126737191454055,
322
- "grad_norm": 1.0279369354248047,
323
  "learning_rate": 5.879951518134263e-06,
324
- "loss": 0.7132,
325
  "step": 4400
326
  },
327
  {
328
  "epoch": 0.9334163036714375,
329
- "grad_norm": 0.559190034866333,
330
  "learning_rate": 3.4426509480207646e-06,
331
- "loss": 0.6866,
332
  "step": 4500
333
  },
334
  {
335
  "epoch": 0.9541588881974694,
336
- "grad_norm": 1.0593820810317993,
337
  "learning_rate": 1.6459265569902738e-06,
338
- "loss": 0.6781,
339
  "step": 4600
340
  },
341
  {
342
  "epoch": 0.9749014727235014,
343
- "grad_norm": 0.7888472080230713,
344
  "learning_rate": 5.016980886622169e-07,
345
- "loss": 0.7098,
346
  "step": 4700
347
  },
348
  {
349
  "epoch": 0.9956440572495333,
350
- "grad_norm": 1.0247892141342163,
351
  "learning_rate": 1.755652919597228e-08,
352
- "loss": 0.6915,
353
  "step": 4800
354
  }
355
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.00020742584526031943,
14
+ "grad_norm": 0.6779253482818604,
15
  "learning_rate": 0.0,
16
+ "loss": 0.9216,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.020742584526031945,
21
+ "grad_norm": 0.9759976267814636,
22
  "learning_rate": 2.0518134715025907e-05,
23
+ "loss": 0.5876,
24
  "step": 100
25
  },
26
  {
27
  "epoch": 0.04148516905206389,
28
+ "grad_norm": 1.1005536317825317,
29
  "learning_rate": 4.1243523316062174e-05,
30
+ "loss": 0.4583,
31
  "step": 200
32
  },
33
  {
34
  "epoch": 0.06222775357809583,
35
+ "grad_norm": 1.5758947134017944,
36
  "learning_rate": 6.196891191709845e-05,
37
+ "loss": 0.3009,
38
  "step": 300
39
  },
40
  {
41
  "epoch": 0.08297033810412778,
42
+ "grad_norm": 1.0562893152236938,
43
  "learning_rate": 8.269430051813471e-05,
44
+ "loss": 0.2051,
45
  "step": 400
46
  },
47
  {
48
  "epoch": 0.10371292263015972,
49
+ "grad_norm": 0.8340764045715332,
50
  "learning_rate": 0.000103419689119171,
51
+ "loss": 0.1822,
52
  "step": 500
53
  },
54
  {
55
  "epoch": 0.12445550715619166,
56
+ "grad_norm": 0.7953233122825623,
57
+ "learning_rate": 0.0001239378238341969,
58
+ "loss": 0.1671,
59
  "step": 600
60
  },
61
  {
62
  "epoch": 0.1451980916822236,
63
+ "grad_norm": 0.6487672924995422,
64
+ "learning_rate": 0.00014466321243523318,
65
+ "loss": 0.1672,
66
  "step": 700
67
  },
68
  {
69
  "epoch": 0.16594067620825556,
70
+ "grad_norm": 1.0472800731658936,
71
+ "learning_rate": 0.00016538860103626943,
72
+ "loss": 0.167,
73
  "step": 800
74
  },
75
  {
76
  "epoch": 0.18668326073428748,
77
+ "grad_norm": 1.3705922365188599,
78
+ "learning_rate": 0.0001861139896373057,
79
+ "loss": 0.1519,
80
  "step": 900
81
  },
82
  {
83
  "epoch": 0.20742584526031943,
84
+ "grad_norm": 1.5635592937469482,
85
+ "learning_rate": 0.00019996385922862659,
86
+ "loss": 0.1767,
87
  "step": 1000
88
  },
89
  {
90
  "epoch": 0.22816842978635138,
91
+ "grad_norm": 0.9496662616729736,
92
+ "learning_rate": 0.00019941349192574383,
93
+ "loss": 0.1558,
94
  "step": 1100
95
  },
96
  {
97
  "epoch": 0.24891101431238333,
98
+ "grad_norm": 0.8596046566963196,
99
+ "learning_rate": 0.00019820360035637763,
100
+ "loss": 0.1647,
101
  "step": 1200
102
  },
103
  {
104
  "epoch": 0.2696535988384153,
105
+ "grad_norm": 0.9286707043647766,
106
+ "learning_rate": 0.0001963422111257136,
107
+ "loss": 0.1737,
108
  "step": 1300
109
  },
110
  {
111
  "epoch": 0.2903961833644472,
112
+ "grad_norm": 1.3370907306671143,
113
+ "learning_rate": 0.000193841672973779,
114
+ "loss": 0.1646,
115
  "step": 1400
116
  },
117
  {
118
  "epoch": 0.3111387678904792,
119
+ "grad_norm": 0.9638619422912598,
120
+ "learning_rate": 0.00019071857485201819,
121
+ "loss": 0.1841,
122
  "step": 1500
123
  },
124
  {
125
  "epoch": 0.3318813524165111,
126
+ "grad_norm": 1.182265043258667,
127
+ "learning_rate": 0.0001869936358696588,
128
+ "loss": 0.1736,
129
  "step": 1600
130
  },
131
  {
132
  "epoch": 0.352623936942543,
133
+ "grad_norm": 0.7590048909187317,
134
+ "learning_rate": 0.00018269156783998138,
135
+ "loss": 0.2114,
136
  "step": 1700
137
  },
138
  {
139
  "epoch": 0.37336652146857496,
140
+ "grad_norm": 1.873295545578003,
141
+ "learning_rate": 0.00017784091133838138,
142
+ "loss": 0.1915,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 0.3941091059946069,
147
+ "grad_norm": 1.4994107484817505,
148
+ "learning_rate": 0.00017247384635983953,
149
+ "loss": 0.2032,
150
  "step": 1900
151
  },
152
  {
153
  "epoch": 0.41485169052063886,
154
+ "grad_norm": 0.8668350577354431,
155
+ "learning_rate": 0.00016662597883192732,
156
+ "loss": 0.1936,
157
  "step": 2000
158
  },
159
  {
160
  "epoch": 0.4355942750466708,
161
+ "grad_norm": 1.6480194330215454,
162
+ "learning_rate": 0.00016033610439965355,
163
+ "loss": 0.2191,
164
  "step": 2100
165
  },
166
  {
167
  "epoch": 0.45633685957270276,
168
+ "grad_norm": 1.6885930299758911,
169
+ "learning_rate": 0.00015364595104923887,
170
+ "loss": 0.2218,
171
  "step": 2200
172
  },
173
  {
174
  "epoch": 0.4770794440987347,
175
+ "grad_norm": 1.2831690311431885,
176
+ "learning_rate": 0.0001465999022782913,
177
+ "loss": 0.2198,
178
  "step": 2300
179
  },
180
  {
181
  "epoch": 0.49782202862476665,
182
+ "grad_norm": 1.4296784400939941,
183
+ "learning_rate": 0.0001392447026489145,
184
+ "loss": 0.2638,
185
  "step": 2400
186
  },
187
  {
188
  "epoch": 0.5185646131507986,
189
+ "grad_norm": 1.3762316703796387,
190
+ "learning_rate": 0.00013162914767715403,
191
+ "loss": 0.2742,
192
  "step": 2500
193
  },
194
  {
195
  "epoch": 0.5393071976768306,
196
+ "grad_norm": 0.78104567527771,
197
+ "learning_rate": 0.00012380376011610222,
198
+ "loss": 0.2495,
199
  "step": 2600
200
  },
201
  {
202
  "epoch": 0.5600497822028625,
203
+ "grad_norm": 1.5943220853805542,
204
+ "learning_rate": 0.00011582045478024929,
205
+ "loss": 0.2562,
206
  "step": 2700
207
  },
208
  {
209
  "epoch": 0.5807923667288944,
210
+ "grad_norm": 0.9165586829185486,
211
+ "learning_rate": 0.00010773219413468682,
212
+ "loss": 0.2905,
213
  "step": 2800
214
  },
215
  {
216
  "epoch": 0.6015349512549264,
217
+ "grad_norm": 0.923703670501709,
218
+ "learning_rate": 9.959263693403704e-05,
219
+ "loss": 0.3557,
220
  "step": 2900
221
  },
222
  {
223
  "epoch": 0.6222775357809583,
224
+ "grad_norm": 0.8286433219909668,
225
+ "learning_rate": 9.145578224209172e-05,
226
+ "loss": 0.3085,
227
  "step": 3000
228
  },
229
  {
230
  "epoch": 0.6430201203069903,
231
+ "grad_norm": 1.4901179075241089,
232
+ "learning_rate": 8.337561119378777e-05,
233
+ "loss": 0.3469,
234
  "step": 3100
235
  },
236
  {
237
  "epoch": 0.6637627048330222,
238
+ "grad_norm": 1.5367130041122437,
239
+ "learning_rate": 7.540572887612554e-05,
240
+ "loss": 0.3813,
241
  "step": 3200
242
  },
243
  {
244
  "epoch": 0.6845052893590542,
245
+ "grad_norm": 0.7450740933418274,
246
+ "learning_rate": 6.759900870384683e-05,
247
+ "loss": 0.3884,
248
  "step": 3300
249
  },
250
  {
251
  "epoch": 0.705247873885086,
252
+ "grad_norm": 1.3471629619598389,
253
+ "learning_rate": 6.00072416491378e-05,
254
+ "loss": 0.4307,
255
  "step": 3400
256
  },
257
  {
258
  "epoch": 0.725990458411118,
259
+ "grad_norm": 0.6345047354698181,
260
  "learning_rate": 5.2680792652421385e-05,
261
+ "loss": 0.4336,
262
  "step": 3500
263
  },
264
  {
265
  "epoch": 0.7467330429371499,
266
+ "grad_norm": 0.8957846164703369,
267
  "learning_rate": 4.5668266493661425e-05,
268
+ "loss": 0.4618,
269
  "step": 3600
270
  },
271
  {
272
  "epoch": 0.7674756274631819,
273
+ "grad_norm": 1.8546568155288696,
274
  "learning_rate": 3.901618534083994e-05,
275
+ "loss": 0.5159,
276
  "step": 3700
277
  },
278
  {
279
  "epoch": 0.7882182119892138,
280
+ "grad_norm": 1.1825199127197266,
281
  "learning_rate": 3.2768680114799956e-05,
282
+ "loss": 0.5052,
283
  "step": 3800
284
  },
285
  {
286
  "epoch": 0.8089607965152458,
287
+ "grad_norm": 1.0945169925689697,
288
  "learning_rate": 2.696719771798648e-05,
289
+ "loss": 0.539,
290
  "step": 3900
291
  },
292
  {
293
  "epoch": 0.8297033810412777,
294
+ "grad_norm": 1.0912768840789795,
295
  "learning_rate": 2.1650226069374525e-05,
296
+ "loss": 0.5648,
297
  "step": 4000
298
  },
299
  {
300
  "epoch": 0.8504459655673097,
301
+ "grad_norm": 0.7788926362991333,
302
  "learning_rate": 1.6853038769745467e-05,
303
+ "loss": 0.5894,
304
  "step": 4100
305
  },
306
  {
307
  "epoch": 0.8711885500933416,
308
+ "grad_norm": 0.9643399119377136,
309
  "learning_rate": 1.2607461091239803e-05,
310
+ "loss": 0.6307,
311
  "step": 4200
312
  },
313
  {
314
  "epoch": 0.8919311346193736,
315
+ "grad_norm": 0.5643659234046936,
316
  "learning_rate": 8.941658843648237e-06,
317
+ "loss": 0.6353,
318
  "step": 4300
319
  },
320
  {
321
  "epoch": 0.9126737191454055,
322
+ "grad_norm": 1.2083373069763184,
323
  "learning_rate": 5.879951518134263e-06,
324
+ "loss": 0.7055,
325
  "step": 4400
326
  },
327
  {
328
  "epoch": 0.9334163036714375,
329
+ "grad_norm": 0.8048790097236633,
330
  "learning_rate": 3.4426509480207646e-06,
331
+ "loss": 0.6827,
332
  "step": 4500
333
  },
334
  {
335
  "epoch": 0.9541588881974694,
336
+ "grad_norm": 1.1787850856781006,
337
  "learning_rate": 1.6459265569902738e-06,
338
+ "loss": 0.6791,
339
  "step": 4600
340
  },
341
  {
342
  "epoch": 0.9749014727235014,
343
+ "grad_norm": 0.9065990447998047,
344
  "learning_rate": 5.016980886622169e-07,
345
+ "loss": 0.7151,
346
  "step": 4700
347
  },
348
  {
349
  "epoch": 0.9956440572495333,
350
+ "grad_norm": 1.174241542816162,
351
  "learning_rate": 1.755652919597228e-08,
352
+ "loss": 0.6988,
353
  "step": 4800
354
  }
355
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:621a556d1da2455eed4e6cd8bccd4f42a7c4222b1175c833beedf9b39ece534d
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1490bdd6543d7012eebfc68c6a93d950ca298ac26298e504a163253dfb7c948
3
  size 5560