rakhman-llm commited on
Commit
3128e81
·
verified ·
1 Parent(s): 82ac1fb

Training in progress, step 15500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e05637d9fe00567351aebe30b8907548391539066a69466b08d62fb0de2c8b6a
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47ec1fd68430fdacb80810ecd09adfa63b256edd752390b582d7134186c11fe5
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd2dde3b7d6cb9a958c80e4da86c9ac7e84d7b0aad33d337c26e27372676e0e8
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dedc049b8b109e2a1e1d8ca3aa0a4db96cc41f55ad87c5be1b4f1d3b32922bb
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ac6d446adeddd129c374743386b9fda911e1104accc0a9ad12d81db0a9913ff
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc7b29b924630649498d282773659d36f4c79fef1453c3983d39d7e7e53066c4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e468fb4a523a6bf7dace3eac71fcc8bc1ed6b95078548573228e864e9505bcd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0adcdf1185fa7d1b89a354568f52beda1398968ea15296ac3bc948fc9b42fcfd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.08158940076828003,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-12500",
4
- "epoch": 2.4,
5
  "eval_steps": 500,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2347,6 +2347,84 @@
2347
  "eval_samples_per_second": 17.145,
2348
  "eval_steps_per_second": 2.143,
2349
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2350
  }
2351
  ],
2352
  "logging_steps": 50,
@@ -2366,7 +2444,7 @@
2366
  "attributes": {}
2367
  }
2368
  },
2369
- "total_flos": 7.30749468672e+16,
2370
  "train_batch_size": 8,
2371
  "trial_name": null,
2372
  "trial_params": null
 
1
  {
2
  "best_metric": 0.08158940076828003,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-12500",
4
+ "epoch": 2.48,
5
  "eval_steps": 500,
6
+ "global_step": 15500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2347
  "eval_samples_per_second": 17.145,
2348
  "eval_steps_per_second": 2.143,
2349
  "step": 15000
2350
+ },
2351
+ {
2352
+ "epoch": 2.408,
2353
+ "grad_norm": 7706.4375,
2354
+ "learning_rate": 5.92e-06,
2355
+ "loss": 0.0513,
2356
+ "step": 15050
2357
+ },
2358
+ {
2359
+ "epoch": 2.416,
2360
+ "grad_norm": 6188.396484375,
2361
+ "learning_rate": 5.84e-06,
2362
+ "loss": 0.0511,
2363
+ "step": 15100
2364
+ },
2365
+ {
2366
+ "epoch": 2.424,
2367
+ "grad_norm": 6621.79345703125,
2368
+ "learning_rate": 5.76e-06,
2369
+ "loss": 0.0506,
2370
+ "step": 15150
2371
+ },
2372
+ {
2373
+ "epoch": 2.432,
2374
+ "grad_norm": 5284.65185546875,
2375
+ "learning_rate": 5.68e-06,
2376
+ "loss": 0.0486,
2377
+ "step": 15200
2378
+ },
2379
+ {
2380
+ "epoch": 2.44,
2381
+ "grad_norm": 6653.84716796875,
2382
+ "learning_rate": 5.600000000000001e-06,
2383
+ "loss": 0.053,
2384
+ "step": 15250
2385
+ },
2386
+ {
2387
+ "epoch": 2.448,
2388
+ "grad_norm": 6338.93505859375,
2389
+ "learning_rate": 5.52e-06,
2390
+ "loss": 0.0517,
2391
+ "step": 15300
2392
+ },
2393
+ {
2394
+ "epoch": 2.456,
2395
+ "grad_norm": 6020.87548828125,
2396
+ "learning_rate": 5.4400000000000004e-06,
2397
+ "loss": 0.0524,
2398
+ "step": 15350
2399
+ },
2400
+ {
2401
+ "epoch": 2.464,
2402
+ "grad_norm": 7275.64697265625,
2403
+ "learning_rate": 5.36e-06,
2404
+ "loss": 0.0516,
2405
+ "step": 15400
2406
+ },
2407
+ {
2408
+ "epoch": 2.472,
2409
+ "grad_norm": 5086.87744140625,
2410
+ "learning_rate": 5.279999999999999e-06,
2411
+ "loss": 0.0514,
2412
+ "step": 15450
2413
+ },
2414
+ {
2415
+ "epoch": 2.48,
2416
+ "grad_norm": 4989.05078125,
2417
+ "learning_rate": 5.2e-06,
2418
+ "loss": 0.0526,
2419
+ "step": 15500
2420
+ },
2421
+ {
2422
+ "epoch": 2.48,
2423
+ "eval_loss": 0.08169461041688919,
2424
+ "eval_runtime": 116.7302,
2425
+ "eval_samples_per_second": 17.134,
2426
+ "eval_steps_per_second": 2.142,
2427
+ "step": 15500
2428
  }
2429
  ],
2430
  "logging_steps": 50,
 
2444
  "attributes": {}
2445
  }
2446
  },
2447
+ "total_flos": 7.551077842944e+16,
2448
  "train_batch_size": 8,
2449
  "trial_name": null,
2450
  "trial_params": null