xinyanghuang commited on
Commit
2d966ee
·
verified ·
1 Parent(s): ef1258e

Delete checkpoint-7000

Browse files
checkpoint-7000/checkpoint-7000/adapter_config.json DELETED
@@ -1,18 +0,0 @@
1
- {
2
- "base_model_name_or_path": "Qwen/Qwen-7B-Chat",
3
- "bias": "none",
4
- "fan_in_fan_out": false,
5
- "inference_mode": false,
6
- "init_lora_weights": true,
7
- "lora_alpha": 32,
8
- "lora_dropout": 0.1,
9
- "modules_to_save": null,
10
- "peft_type": "LORA",
11
- "r": 128,
12
- "target_modules": [
13
- "c_attn",
14
- "w1",
15
- "w2"
16
- ],
17
- "task_type": "CAUSAL_LM"
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-7000/checkpoint-7000/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec69714a183fca4b61032438a74423a558d21b16af0507c38bd1d30c8717c349
3
- size 763431181
 
 
 
 
checkpoint-7000/checkpoint-7000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b168e87e12d24dbef145953697ab52eecbeee73dc09fd31202ba7512f5e45c1e
3
- size 1612910405
 
 
 
 
checkpoint-7000/checkpoint-7000/other_params.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a23f357e944105f9e1814f0ddc317ec25a7e2981a02503c6b29b32430f81784
3
- size 43009725
 
 
 
 
checkpoint-7000/checkpoint-7000/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d22e1c1f9dcc565cf6884c3e8f5ae98131cd3919a7b401d2958b6d5d72680fce
3
- size 17655
 
 
 
 
checkpoint-7000/checkpoint-7000/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ee1a6d444a26b3153d226442177110c8c5949bfe7a582d1583081613b56cffe
3
- size 17655
 
 
 
 
checkpoint-7000/checkpoint-7000/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:58379c3e95dc9fa5cbd8dadccaf699da8720e1613e0f1f9268ee8898218a1a57
3
- size 17655
 
 
 
 
checkpoint-7000/checkpoint-7000/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:26340e115f8e048c2e52d34b4766dba594848350c7915ba3dd2ea82564ace1c5
3
- size 17655
 
 
 
 
checkpoint-7000/checkpoint-7000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c17cec678cdfc08e2ee6daa1c23cfc32d3a4ee58795129200a07f8092a63bfd
3
- size 627
 
 
 
 
checkpoint-7000/checkpoint-7000/trainer_state.json DELETED
@@ -1,523 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.3878975950349108,
5
- "eval_steps": 500,
6
- "global_step": 7000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.00554139421478444,
13
- "grad_norm": 0.7744311690330505,
14
- "learning_rate": 2.9916879086778235e-05,
15
- "loss": 2.174,
16
- "step": 100
17
- },
18
- {
19
- "epoch": 0.01108278842956888,
20
- "grad_norm": 1.1890419721603394,
21
- "learning_rate": 2.983375817355647e-05,
22
- "loss": 1.7489,
23
- "step": 200
24
- },
25
- {
26
- "epoch": 0.01662418264435332,
27
- "grad_norm": 2.874730348587036,
28
- "learning_rate": 2.97506372603347e-05,
29
- "loss": 1.6593,
30
- "step": 300
31
- },
32
- {
33
- "epoch": 0.02216557685913776,
34
- "grad_norm": 0.4631105363368988,
35
- "learning_rate": 2.9667516347112933e-05,
36
- "loss": 1.5928,
37
- "step": 400
38
- },
39
- {
40
- "epoch": 0.0277069710739222,
41
- "grad_norm": 1.1168841123580933,
42
- "learning_rate": 2.9584395433891167e-05,
43
- "loss": 1.5494,
44
- "step": 500
45
- },
46
- {
47
- "epoch": 0.03324836528870664,
48
- "grad_norm": 0.8050689101219177,
49
- "learning_rate": 2.95012745206694e-05,
50
- "loss": 1.5041,
51
- "step": 600
52
- },
53
- {
54
- "epoch": 0.038789759503491075,
55
- "grad_norm": 2.871495008468628,
56
- "learning_rate": 2.9418153607447632e-05,
57
- "loss": 1.4829,
58
- "step": 700
59
- },
60
- {
61
- "epoch": 0.04433115371827552,
62
- "grad_norm": 0.6971802711486816,
63
- "learning_rate": 2.9335032694225866e-05,
64
- "loss": 1.4492,
65
- "step": 800
66
- },
67
- {
68
- "epoch": 0.04987254793305996,
69
- "grad_norm": 1.1272006034851074,
70
- "learning_rate": 2.92519117810041e-05,
71
- "loss": 1.45,
72
- "step": 900
73
- },
74
- {
75
- "epoch": 0.0554139421478444,
76
- "grad_norm": 0.9110287427902222,
77
- "learning_rate": 2.9168790867782334e-05,
78
- "loss": 1.4553,
79
- "step": 1000
80
- },
81
- {
82
- "epoch": 0.060955336362628836,
83
- "grad_norm": 2.7382473945617676,
84
- "learning_rate": 2.9085669954560568e-05,
85
- "loss": 1.4244,
86
- "step": 1100
87
- },
88
- {
89
- "epoch": 0.06649673057741327,
90
- "grad_norm": 2.678687334060669,
91
- "learning_rate": 2.9002549041338802e-05,
92
- "loss": 1.4399,
93
- "step": 1200
94
- },
95
- {
96
- "epoch": 0.07203812479219772,
97
- "grad_norm": 0.8868110775947571,
98
- "learning_rate": 2.8919428128117036e-05,
99
- "loss": 1.408,
100
- "step": 1300
101
- },
102
- {
103
- "epoch": 0.07757951900698215,
104
- "grad_norm": 0.7350853085517883,
105
- "learning_rate": 2.883630721489527e-05,
106
- "loss": 1.4128,
107
- "step": 1400
108
- },
109
- {
110
- "epoch": 0.0831209132217666,
111
- "grad_norm": 2.349001169204712,
112
- "learning_rate": 2.8753186301673504e-05,
113
- "loss": 1.4154,
114
- "step": 1500
115
- },
116
- {
117
- "epoch": 0.08866230743655104,
118
- "grad_norm": 2.588608503341675,
119
- "learning_rate": 2.8670065388451735e-05,
120
- "loss": 1.4102,
121
- "step": 1600
122
- },
123
- {
124
- "epoch": 0.09420370165133547,
125
- "grad_norm": 3.461966037750244,
126
- "learning_rate": 2.858694447522997e-05,
127
- "loss": 1.3899,
128
- "step": 1700
129
- },
130
- {
131
- "epoch": 0.09974509586611992,
132
- "grad_norm": 0.9384596943855286,
133
- "learning_rate": 2.8503823562008203e-05,
134
- "loss": 1.3946,
135
- "step": 1800
136
- },
137
- {
138
- "epoch": 0.10528649008090435,
139
- "grad_norm": 2.6963839530944824,
140
- "learning_rate": 2.8420702648786437e-05,
141
- "loss": 1.3625,
142
- "step": 1900
143
- },
144
- {
145
- "epoch": 0.1108278842956888,
146
- "grad_norm": 1.2156896591186523,
147
- "learning_rate": 2.8337581735564668e-05,
148
- "loss": 1.3998,
149
- "step": 2000
150
- },
151
- {
152
- "epoch": 0.11636927851047324,
153
- "grad_norm": 0.9613581299781799,
154
- "learning_rate": 2.82544608223429e-05,
155
- "loss": 1.3672,
156
- "step": 2100
157
- },
158
- {
159
- "epoch": 0.12191067272525767,
160
- "grad_norm": 0.8994789719581604,
161
- "learning_rate": 2.8171339909121136e-05,
162
- "loss": 1.3587,
163
- "step": 2200
164
- },
165
- {
166
- "epoch": 0.1274520669400421,
167
- "grad_norm": 1.7463105916976929,
168
- "learning_rate": 2.808821899589937e-05,
169
- "loss": 1.3688,
170
- "step": 2300
171
- },
172
- {
173
- "epoch": 0.13299346115482655,
174
- "grad_norm": 0.7639077305793762,
175
- "learning_rate": 2.8005098082677604e-05,
176
- "loss": 1.3798,
177
- "step": 2400
178
- },
179
- {
180
- "epoch": 0.138534855369611,
181
- "grad_norm": 1.313306450843811,
182
- "learning_rate": 2.7921977169455834e-05,
183
- "loss": 1.3713,
184
- "step": 2500
185
- },
186
- {
187
- "epoch": 0.14407624958439544,
188
- "grad_norm": 1.796030044555664,
189
- "learning_rate": 2.783885625623407e-05,
190
- "loss": 1.3589,
191
- "step": 2600
192
- },
193
- {
194
- "epoch": 0.14961764379917988,
195
- "grad_norm": 0.40334078669548035,
196
- "learning_rate": 2.7755735343012302e-05,
197
- "loss": 1.3527,
198
- "step": 2700
199
- },
200
- {
201
- "epoch": 0.1551590380139643,
202
- "grad_norm": 0.7727888822555542,
203
- "learning_rate": 2.7672614429790536e-05,
204
- "loss": 1.338,
205
- "step": 2800
206
- },
207
- {
208
- "epoch": 0.16070043222874875,
209
- "grad_norm": 0.8815018534660339,
210
- "learning_rate": 2.7589493516568767e-05,
211
- "loss": 1.3638,
212
- "step": 2900
213
- },
214
- {
215
- "epoch": 0.1662418264435332,
216
- "grad_norm": 1.1675957441329956,
217
- "learning_rate": 2.7506372603347e-05,
218
- "loss": 1.3462,
219
- "step": 3000
220
- },
221
- {
222
- "epoch": 0.17178322065831764,
223
- "grad_norm": 0.9449867606163025,
224
- "learning_rate": 2.7423251690125235e-05,
225
- "loss": 1.3286,
226
- "step": 3100
227
- },
228
- {
229
- "epoch": 0.17732461487310208,
230
- "grad_norm": 0.4934576749801636,
231
- "learning_rate": 2.734013077690347e-05,
232
- "loss": 1.3326,
233
- "step": 3200
234
- },
235
- {
236
- "epoch": 0.1828660090878865,
237
- "grad_norm": 1.0242582559585571,
238
- "learning_rate": 2.7257009863681703e-05,
239
- "loss": 1.3423,
240
- "step": 3300
241
- },
242
- {
243
- "epoch": 0.18840740330267094,
244
- "grad_norm": 1.253219723701477,
245
- "learning_rate": 2.7173888950459934e-05,
246
- "loss": 1.3473,
247
- "step": 3400
248
- },
249
- {
250
- "epoch": 0.1939487975174554,
251
- "grad_norm": 2.4441049098968506,
252
- "learning_rate": 2.7090768037238168e-05,
253
- "loss": 1.3321,
254
- "step": 3500
255
- },
256
- {
257
- "epoch": 0.19949019173223984,
258
- "grad_norm": 1.119807481765747,
259
- "learning_rate": 2.7007647124016405e-05,
260
- "loss": 1.3282,
261
- "step": 3600
262
- },
263
- {
264
- "epoch": 0.20503158594702428,
265
- "grad_norm": 0.5911545157432556,
266
- "learning_rate": 2.692452621079464e-05,
267
- "loss": 1.329,
268
- "step": 3700
269
- },
270
- {
271
- "epoch": 0.2105729801618087,
272
- "grad_norm": 1.959672451019287,
273
- "learning_rate": 2.684140529757287e-05,
274
- "loss": 1.3322,
275
- "step": 3800
276
- },
277
- {
278
- "epoch": 0.21611437437659314,
279
- "grad_norm": 0.9169633984565735,
280
- "learning_rate": 2.6758284384351104e-05,
281
- "loss": 1.3248,
282
- "step": 3900
283
- },
284
- {
285
- "epoch": 0.2216557685913776,
286
- "grad_norm": 0.8207225203514099,
287
- "learning_rate": 2.6675163471129338e-05,
288
- "loss": 1.3303,
289
- "step": 4000
290
- },
291
- {
292
- "epoch": 0.22719716280616203,
293
- "grad_norm": 1.0271358489990234,
294
- "learning_rate": 2.6592042557907572e-05,
295
- "loss": 1.3231,
296
- "step": 4100
297
- },
298
- {
299
- "epoch": 0.23273855702094648,
300
- "grad_norm": 1.2483460903167725,
301
- "learning_rate": 2.6508921644685806e-05,
302
- "loss": 1.3234,
303
- "step": 4200
304
- },
305
- {
306
- "epoch": 0.2382799512357309,
307
- "grad_norm": 0.8641893863677979,
308
- "learning_rate": 2.6425800731464037e-05,
309
- "loss": 1.2856,
310
- "step": 4300
311
- },
312
- {
313
- "epoch": 0.24382134545051534,
314
- "grad_norm": 1.6275527477264404,
315
- "learning_rate": 2.634267981824227e-05,
316
- "loss": 1.3039,
317
- "step": 4400
318
- },
319
- {
320
- "epoch": 0.2493627396652998,
321
- "grad_norm": 2.3479559421539307,
322
- "learning_rate": 2.6259558905020505e-05,
323
- "loss": 1.331,
324
- "step": 4500
325
- },
326
- {
327
- "epoch": 0.2549041338800842,
328
- "grad_norm": 2.3728585243225098,
329
- "learning_rate": 2.617643799179874e-05,
330
- "loss": 1.3115,
331
- "step": 4600
332
- },
333
- {
334
- "epoch": 0.26044552809486865,
335
- "grad_norm": 1.7304351329803467,
336
- "learning_rate": 2.609331707857697e-05,
337
- "loss": 1.32,
338
- "step": 4700
339
- },
340
- {
341
- "epoch": 0.2659869223096531,
342
- "grad_norm": 3.1707870960235596,
343
- "learning_rate": 2.6010196165355203e-05,
344
- "loss": 1.3166,
345
- "step": 4800
346
- },
347
- {
348
- "epoch": 0.27152831652443754,
349
- "grad_norm": 0.6431360840797424,
350
- "learning_rate": 2.5927075252133437e-05,
351
- "loss": 1.3117,
352
- "step": 4900
353
- },
354
- {
355
- "epoch": 0.277069710739222,
356
- "grad_norm": 0.7438284754753113,
357
- "learning_rate": 2.584395433891167e-05,
358
- "loss": 1.3049,
359
- "step": 5000
360
- },
361
- {
362
- "epoch": 0.28261110495400643,
363
- "grad_norm": 0.8397496342658997,
364
- "learning_rate": 2.5760833425689905e-05,
365
- "loss": 1.3013,
366
- "step": 5100
367
- },
368
- {
369
- "epoch": 0.2881524991687909,
370
- "grad_norm": 1.3787566423416138,
371
- "learning_rate": 2.5677712512468136e-05,
372
- "loss": 1.32,
373
- "step": 5200
374
- },
375
- {
376
- "epoch": 0.2936938933835753,
377
- "grad_norm": 0.6342838406562805,
378
- "learning_rate": 2.559459159924637e-05,
379
- "loss": 1.3265,
380
- "step": 5300
381
- },
382
- {
383
- "epoch": 0.29923528759835977,
384
- "grad_norm": 1.0924850702285767,
385
- "learning_rate": 2.5511470686024604e-05,
386
- "loss": 1.3224,
387
- "step": 5400
388
- },
389
- {
390
- "epoch": 0.3047766818131442,
391
- "grad_norm": 3.104914665222168,
392
- "learning_rate": 2.5428349772802838e-05,
393
- "loss": 1.2897,
394
- "step": 5500
395
- },
396
- {
397
- "epoch": 0.3103180760279286,
398
- "grad_norm": 1.219292163848877,
399
- "learning_rate": 2.534522885958107e-05,
400
- "loss": 1.3013,
401
- "step": 5600
402
- },
403
- {
404
- "epoch": 0.31585947024271305,
405
- "grad_norm": 0.743425726890564,
406
- "learning_rate": 2.5262107946359303e-05,
407
- "loss": 1.3069,
408
- "step": 5700
409
- },
410
- {
411
- "epoch": 0.3214008644574975,
412
- "grad_norm": 3.0548131465911865,
413
- "learning_rate": 2.5178987033137537e-05,
414
- "loss": 1.3006,
415
- "step": 5800
416
- },
417
- {
418
- "epoch": 0.32694225867228194,
419
- "grad_norm": 2.6230878829956055,
420
- "learning_rate": 2.5095866119915774e-05,
421
- "loss": 1.2954,
422
- "step": 5900
423
- },
424
- {
425
- "epoch": 0.3324836528870664,
426
- "grad_norm": 1.5894700288772583,
427
- "learning_rate": 2.5012745206694005e-05,
428
- "loss": 1.2918,
429
- "step": 6000
430
- },
431
- {
432
- "epoch": 0.33802504710185083,
433
- "grad_norm": 2.298469066619873,
434
- "learning_rate": 2.492962429347224e-05,
435
- "loss": 1.2974,
436
- "step": 6100
437
- },
438
- {
439
- "epoch": 0.3435664413166353,
440
- "grad_norm": 2.3570539951324463,
441
- "learning_rate": 2.4846503380250473e-05,
442
- "loss": 1.2943,
443
- "step": 6200
444
- },
445
- {
446
- "epoch": 0.3491078355314197,
447
- "grad_norm": 0.7656165361404419,
448
- "learning_rate": 2.4763382467028707e-05,
449
- "loss": 1.2774,
450
- "step": 6300
451
- },
452
- {
453
- "epoch": 0.35464922974620416,
454
- "grad_norm": 0.9744777679443359,
455
- "learning_rate": 2.468026155380694e-05,
456
- "loss": 1.3062,
457
- "step": 6400
458
- },
459
- {
460
- "epoch": 0.3601906239609886,
461
- "grad_norm": 2.929172992706299,
462
- "learning_rate": 2.459714064058517e-05,
463
- "loss": 1.2871,
464
- "step": 6500
465
- },
466
- {
467
- "epoch": 0.365732018175773,
468
- "grad_norm": 1.1587573289871216,
469
- "learning_rate": 2.4514019727363405e-05,
470
- "loss": 1.3037,
471
- "step": 6600
472
- },
473
- {
474
- "epoch": 0.37127341239055744,
475
- "grad_norm": 1.055335521697998,
476
- "learning_rate": 2.443089881414164e-05,
477
- "loss": 1.2823,
478
- "step": 6700
479
- },
480
- {
481
- "epoch": 0.3768148066053419,
482
- "grad_norm": 2.3376619815826416,
483
- "learning_rate": 2.4347777900919873e-05,
484
- "loss": 1.2854,
485
- "step": 6800
486
- },
487
- {
488
- "epoch": 0.38235620082012634,
489
- "grad_norm": 0.94229656457901,
490
- "learning_rate": 2.4264656987698104e-05,
491
- "loss": 1.3007,
492
- "step": 6900
493
- },
494
- {
495
- "epoch": 0.3878975950349108,
496
- "grad_norm": 1.2927848100662231,
497
- "learning_rate": 2.4181536074476338e-05,
498
- "loss": 1.278,
499
- "step": 7000
500
- }
501
- ],
502
- "logging_steps": 100,
503
- "max_steps": 36092,
504
- "num_input_tokens_seen": 0,
505
- "num_train_epochs": 2,
506
- "save_steps": 1000,
507
- "stateful_callbacks": {
508
- "TrainerControl": {
509
- "args": {
510
- "should_epoch_stop": false,
511
- "should_evaluate": false,
512
- "should_log": false,
513
- "should_save": true,
514
- "should_training_stop": false
515
- },
516
- "attributes": {}
517
- }
518
- },
519
- "total_flos": 0.0,
520
- "train_batch_size": 4,
521
- "trial_name": null,
522
- "trial_params": null
523
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-7000/checkpoint-7000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a725a323205e8f7faf8ef9c3151f198ebf1f0262412da00544cb74991be2dedd
3
- size 4667