lesso16 commited on
Commit
3a31ef6
·
verified ·
1 Parent(s): 1e661c4

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be5216629ea69f9fb4c2ebc6d91de6cdb39bf7b47bfe3c2a4614c14c6392ac46
3
  size 2373352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06519740f41f0f97d141a51cf88e3b990f6b4f56e300f52d8f44e7672b1bc600
3
  size 2373352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:527549a60a00a7ba0dd154d42fcaf1b952119a73cc41120b662d298de646af32
3
  size 4899962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11c2aa02c3cd70fa400e08b77e41df284e6d1a6be911d6f2c2816beefe521972
3
  size 4899962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8a7f3dff33da40653102d2b55173b54aa49a8e44d93fe6c8855bd0382b20f1c
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2e607c8dd8f0a3c6f93906d571c6ecc9de742cab47229951f8ad32026900fc
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:930c624ce7c523ea80e34eda4c97c8070b2558cdda7cf31f871a20b9da39f272
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a767c4c5b1aaa59f0fd0570fdb5104ecc494d72638ae3ce76b6ca2641c80f69
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 11.538701057434082,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8500",
4
- "epoch": 2.443406395975566,
5
  "eval_steps": 500,
6
- "global_step": 8500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1341,6 +1341,84 @@
1341
  "eval_samples_per_second": 64.56,
1342
  "eval_steps_per_second": 16.143,
1343
  "step": 8500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1344
  }
1345
  ],
1346
  "logging_steps": 50,
@@ -1355,7 +1433,7 @@
1355
  "early_stopping_threshold": 0.0
1356
  },
1357
  "attributes": {
1358
- "early_stopping_patience_counter": 0
1359
  }
1360
  },
1361
  "TrainerControl": {
@@ -1364,12 +1442,12 @@
1364
  "should_evaluate": false,
1365
  "should_log": false,
1366
  "should_save": true,
1367
- "should_training_stop": false
1368
  },
1369
  "attributes": {}
1370
  }
1371
  },
1372
- "total_flos": 9372731424768000.0,
1373
  "train_batch_size": 4,
1374
  "trial_name": null,
1375
  "trial_params": null
 
1
  {
2
  "best_metric": 11.538701057434082,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8500",
4
+ "epoch": 2.5871361839741285,
5
  "eval_steps": 500,
6
+ "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1341
  "eval_samples_per_second": 64.56,
1342
  "eval_steps_per_second": 16.143,
1343
  "step": 8500
1344
+ },
1345
+ {
1346
+ "epoch": 2.457779374775422,
1347
+ "grad_norm": 0.24602361023426056,
1348
+ "learning_rate": 1.3596438587477278e-06,
1349
+ "loss": 11.5591,
1350
+ "step": 8550
1351
+ },
1352
+ {
1353
+ "epoch": 2.4721523535752783,
1354
+ "grad_norm": 0.21134650707244873,
1355
+ "learning_rate": 1.074760864438935e-06,
1356
+ "loss": 11.5513,
1357
+ "step": 8600
1358
+ },
1359
+ {
1360
+ "epoch": 2.486525332375135,
1361
+ "grad_norm": 0.22006399929523468,
1362
+ "learning_rate": 8.231843471170714e-07,
1363
+ "loss": 11.5595,
1364
+ "step": 8650
1365
+ },
1366
+ {
1367
+ "epoch": 2.500898311174991,
1368
+ "grad_norm": 0.20436766743659973,
1369
+ "learning_rate": 6.049926711379494e-07,
1370
+ "loss": 11.558,
1371
+ "step": 8700
1372
+ },
1373
+ {
1374
+ "epoch": 2.515271289974847,
1375
+ "grad_norm": 0.21755096316337585,
1376
+ "learning_rate": 4.2025380170877514e-07,
1377
+ "loss": 11.5426,
1378
+ "step": 8750
1379
+ },
1380
+ {
1381
+ "epoch": 2.5296442687747036,
1382
+ "grad_norm": 0.1975020170211792,
1383
+ "learning_rate": 2.690252837175113e-07,
1384
+ "loss": 11.5594,
1385
+ "step": 8800
1386
+ },
1387
+ {
1388
+ "epoch": 2.5440172475745597,
1389
+ "grad_norm": 0.23776116967201233,
1390
+ "learning_rate": 1.5135422380793571e-07,
1391
+ "loss": 11.5562,
1392
+ "step": 8850
1393
+ },
1394
+ {
1395
+ "epoch": 2.5583902263744163,
1396
+ "grad_norm": 0.23030632734298706,
1397
+ "learning_rate": 6.727727570635444e-08,
1398
+ "loss": 11.5548,
1399
+ "step": 8900
1400
+ },
1401
+ {
1402
+ "epoch": 2.5727632051742724,
1403
+ "grad_norm": 0.2245226800441742,
1404
+ "learning_rate": 1.6820628804155024e-08,
1405
+ "loss": 11.5547,
1406
+ "step": 8950
1407
+ },
1408
+ {
1409
+ "epoch": 2.5871361839741285,
1410
+ "grad_norm": 0.2259846031665802,
1411
+ "learning_rate": 0.0,
1412
+ "loss": 11.5524,
1413
+ "step": 9000
1414
+ },
1415
+ {
1416
+ "epoch": 2.5871361839741285,
1417
+ "eval_loss": 11.53870677947998,
1418
+ "eval_runtime": 91.574,
1419
+ "eval_samples_per_second": 63.981,
1420
+ "eval_steps_per_second": 15.998,
1421
+ "step": 9000
1422
  }
1423
  ],
1424
  "logging_steps": 50,
 
1433
  "early_stopping_threshold": 0.0
1434
  },
1435
  "attributes": {
1436
+ "early_stopping_patience_counter": 1
1437
  }
1438
  },
1439
  "TrainerControl": {
 
1442
  "should_evaluate": false,
1443
  "should_log": false,
1444
  "should_save": true,
1445
+ "should_training_stop": true
1446
  },
1447
  "attributes": {}
1448
  }
1449
  },
1450
+ "total_flos": 9924177936384000.0,
1451
  "train_batch_size": 4,
1452
  "trial_name": null,
1453
  "trial_params": null