MeedoSam commited on
Commit
d9f7088
1 Parent(s): 433f15a

Uploaded checkpoint-3000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:529afbfd214e67f0cd6ed38c46a882f69bf7229384ab1df0ce60cb1f5e4f2965
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8153c1ec03df5594efef9482f36db85ac0366821320fba1ba8aae357d7d7188
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0cf9aa45dd205493bc09191810b39b693ad29080f038d14b0cea034dec265cb
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48f7a51201319a8a132484b82cfc2452f1693df833826e55a13467cec23fc927
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6953db975b289c1d4893316fe618b5891abd5920bad079a04f9bc032f0d6a4f
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a6ecb1fb2aa6a3c4d8bd6fdae6076f15725b87d99a6f3bffd86e06ab5951a4
3
+ size 14180
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78180a777fabc8bdc6e37d70a05529fbfe5bbeb093c49dd0124cc2bdeb32db78
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:770db92ac44ccb712216aece2abb8a41e68fd6d952c7ae7884e9032fb3cc3f81
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6266154930680661,
5
  "eval_steps": 100,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -307,6 +307,156 @@
307
  "eval_samples_per_second": 5.199,
308
  "eval_steps_per_second": 5.199,
309
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  }
311
  ],
312
  "logging_steps": 100,
@@ -314,7 +464,7 @@
314
  "num_input_tokens_seen": 0,
315
  "num_train_epochs": 2,
316
  "save_steps": 1000,
317
- "total_flos": 3.2204251987968e+16,
318
  "train_batch_size": 1,
319
  "trial_name": null,
320
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9399232396020991,
5
  "eval_steps": 100,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
307
  "eval_samples_per_second": 5.199,
308
  "eval_steps_per_second": 5.199,
309
  "step": 2000
310
+ },
311
+ {
312
+ "epoch": 0.66,
313
+ "grad_norm": 1.1315308809280396,
314
+ "learning_rate": 1.288888888888889e-05,
315
+ "loss": 0.0523,
316
+ "step": 2100
317
+ },
318
+ {
319
+ "epoch": 0.66,
320
+ "eval_loss": 0.019370460882782936,
321
+ "eval_runtime": 192.4625,
322
+ "eval_samples_per_second": 5.196,
323
+ "eval_steps_per_second": 5.196,
324
+ "step": 2100
325
+ },
326
+ {
327
+ "epoch": 0.69,
328
+ "grad_norm": 0.0026583941653370857,
329
+ "learning_rate": 1.2444444444444446e-05,
330
+ "loss": 0.0553,
331
+ "step": 2200
332
+ },
333
+ {
334
+ "epoch": 0.69,
335
+ "eval_loss": 0.01725778914988041,
336
+ "eval_runtime": 192.3261,
337
+ "eval_samples_per_second": 5.2,
338
+ "eval_steps_per_second": 5.2,
339
+ "step": 2200
340
+ },
341
+ {
342
+ "epoch": 0.72,
343
+ "grad_norm": 0.004530389327555895,
344
+ "learning_rate": 1.2e-05,
345
+ "loss": 0.0538,
346
+ "step": 2300
347
+ },
348
+ {
349
+ "epoch": 0.72,
350
+ "eval_loss": 0.022032534703612328,
351
+ "eval_runtime": 192.3926,
352
+ "eval_samples_per_second": 5.198,
353
+ "eval_steps_per_second": 5.198,
354
+ "step": 2300
355
+ },
356
+ {
357
+ "epoch": 0.75,
358
+ "grad_norm": 0.0024464745074510574,
359
+ "learning_rate": 1.1555555555555556e-05,
360
+ "loss": 0.0456,
361
+ "step": 2400
362
+ },
363
+ {
364
+ "epoch": 0.75,
365
+ "eval_loss": 0.030184298753738403,
366
+ "eval_runtime": 192.2035,
367
+ "eval_samples_per_second": 5.203,
368
+ "eval_steps_per_second": 5.203,
369
+ "step": 2400
370
+ },
371
+ {
372
+ "epoch": 0.78,
373
+ "grad_norm": 0.0019896693993359804,
374
+ "learning_rate": 1.1111111111111113e-05,
375
+ "loss": 0.0499,
376
+ "step": 2500
377
+ },
378
+ {
379
+ "epoch": 0.78,
380
+ "eval_loss": 0.01628696359694004,
381
+ "eval_runtime": 192.4383,
382
+ "eval_samples_per_second": 5.196,
383
+ "eval_steps_per_second": 5.196,
384
+ "step": 2500
385
+ },
386
+ {
387
+ "epoch": 0.81,
388
+ "grad_norm": 0.001884658238850534,
389
+ "learning_rate": 1.0666666666666667e-05,
390
+ "loss": 0.0493,
391
+ "step": 2600
392
+ },
393
+ {
394
+ "epoch": 0.81,
395
+ "eval_loss": 0.025292817503213882,
396
+ "eval_runtime": 192.2059,
397
+ "eval_samples_per_second": 5.203,
398
+ "eval_steps_per_second": 5.203,
399
+ "step": 2600
400
+ },
401
+ {
402
+ "epoch": 0.85,
403
+ "grad_norm": 0.04386008903384209,
404
+ "learning_rate": 1.0222222222222223e-05,
405
+ "loss": 0.0452,
406
+ "step": 2700
407
+ },
408
+ {
409
+ "epoch": 0.85,
410
+ "eval_loss": 0.022591974586248398,
411
+ "eval_runtime": 192.8163,
412
+ "eval_samples_per_second": 5.186,
413
+ "eval_steps_per_second": 5.186,
414
+ "step": 2700
415
+ },
416
+ {
417
+ "epoch": 0.88,
418
+ "grad_norm": 0.0019288246985524893,
419
+ "learning_rate": 9.777777777777779e-06,
420
+ "loss": 0.0438,
421
+ "step": 2800
422
+ },
423
+ {
424
+ "epoch": 0.88,
425
+ "eval_loss": 0.0226399265229702,
426
+ "eval_runtime": 192.6394,
427
+ "eval_samples_per_second": 5.191,
428
+ "eval_steps_per_second": 5.191,
429
+ "step": 2800
430
+ },
431
+ {
432
+ "epoch": 0.91,
433
+ "grad_norm": 0.056311335414648056,
434
+ "learning_rate": 9.333333333333334e-06,
435
+ "loss": 0.0579,
436
+ "step": 2900
437
+ },
438
+ {
439
+ "epoch": 0.91,
440
+ "eval_loss": 0.02137078531086445,
441
+ "eval_runtime": 192.3652,
442
+ "eval_samples_per_second": 5.198,
443
+ "eval_steps_per_second": 5.198,
444
+ "step": 2900
445
+ },
446
+ {
447
+ "epoch": 0.94,
448
+ "grad_norm": 0.003287563333287835,
449
+ "learning_rate": 8.888888888888888e-06,
450
+ "loss": 0.036,
451
+ "step": 3000
452
+ },
453
+ {
454
+ "epoch": 0.94,
455
+ "eval_loss": 0.015787875279784203,
456
+ "eval_runtime": 192.7182,
457
+ "eval_samples_per_second": 5.189,
458
+ "eval_steps_per_second": 5.189,
459
+ "step": 3000
460
  }
461
  ],
462
  "logging_steps": 100,
 
464
  "num_input_tokens_seen": 0,
465
  "num_train_epochs": 2,
466
  "save_steps": 1000,
467
+ "total_flos": 4.8306377981952e+16,
468
  "train_batch_size": 1,
469
  "trial_name": null,
470
  "trial_params": null