ashanhr commited on
Commit
5443140
·
verified ·
1 Parent(s): c586717

Training in progress, step 15700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8d9f8936035594d661114b781a4eb14f9085f107b414f8cdb04fae2af1f5af1
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebec8683b94d94fc8d9612c1c65b13eb71fe6bb5bb5cae5646d15330e2237b15
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4e230029c183533b0b9fe6b84165574a15e9c485917cb915f98f6e96e5f4e98
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59bf8a8143b87e68fd71c8a270a03cfa582fc1bfdd2b29a7d15ff8d8b4213d41
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e7906e6327d11d85d338ea4339351ae9c0034f62b94b4da68358eaacc6ab527
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da9ed2936cadc331d13cb99e9dd4c8e3574daaac258fa92af0538fb2ee9612b5
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9b0b65f0420889adb98f67d37d7d10f35fcca76dcf8679e43243891e5dcf6a5
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a6d63bd7c4e166ca59e260fbbde05182634cc6b1f0a4d12136bc73308ef55e3
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d310350e2d89a95e16a26ae72f1a3c391fc38d88fcd18e2b9a8d9e4038006a55
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726650f2459dfa6278e31becc2531b3a3d91514d7f35b02bbac401fc857ec409
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.524942117448958,
5
  "eval_steps": 100,
6
- "global_step": 15500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2487,6 +2487,38 @@
2487
  "eval_samples_per_second": 26.833,
2488
  "eval_steps_per_second": 3.355,
2489
  "step": 15500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2490
  }
2491
  ],
2492
  "logging_steps": 100,
@@ -2494,7 +2526,7 @@
2494
  "num_input_tokens_seen": 0,
2495
  "num_train_epochs": 30,
2496
  "save_steps": 100,
2497
- "total_flos": 1.6980516265683495e+20,
2498
  "train_batch_size": 8,
2499
  "trial_name": null,
2500
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.609134918964428,
5
  "eval_steps": 100,
6
+ "global_step": 15700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2487
  "eval_samples_per_second": 26.833,
2488
  "eval_steps_per_second": 3.355,
2489
  "step": 15500
2490
+ },
2491
+ {
2492
+ "epoch": 6.57,
2493
+ "grad_norm": 5.287090301513672,
2494
+ "learning_rate": 3.933639575971731e-05,
2495
+ "loss": 1.6601,
2496
+ "step": 15600
2497
+ },
2498
+ {
2499
+ "epoch": 6.57,
2500
+ "eval_cer": 0.4719262054671307,
2501
+ "eval_loss": 2.0440595149993896,
2502
+ "eval_runtime": 377.8824,
2503
+ "eval_samples_per_second": 25.082,
2504
+ "eval_steps_per_second": 3.136,
2505
+ "step": 15600
2506
+ },
2507
+ {
2508
+ "epoch": 6.61,
2509
+ "grad_norm": 2.0426106452941895,
2510
+ "learning_rate": 3.926572438162545e-05,
2511
+ "loss": 1.9773,
2512
+ "step": 15700
2513
+ },
2514
+ {
2515
+ "epoch": 6.61,
2516
+ "eval_cer": 0.4639239763794924,
2517
+ "eval_loss": 2.3498713970184326,
2518
+ "eval_runtime": 365.2548,
2519
+ "eval_samples_per_second": 25.949,
2520
+ "eval_steps_per_second": 3.244,
2521
+ "step": 15700
2522
  }
2523
  ],
2524
  "logging_steps": 100,
 
2526
  "num_input_tokens_seen": 0,
2527
  "num_train_epochs": 30,
2528
  "save_steps": 100,
2529
+ "total_flos": 1.7201807515766456e+20,
2530
  "train_batch_size": 8,
2531
  "trial_name": null,
2532
  "trial_params": null