ashanhr commited on
Commit
76ebff2
1 Parent(s): b28b371

Training in progress, step 12500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:990f79ce5d5fce5b54c543410da49311c8727e5393eaf8de5beb75ddea62f025
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e66cbff19b835e3a4efcd7383600618ff3c16476f0e6cd4eea579eddd1014bc
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:348350ce5be165c3b33d0652bff5953348b5181192242ec398df2e8b058bc2bb
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc5198d3d1c6fadab280de2b6cc23fa58362393be6da3bf2b3a8b16408384ea7
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fda19efc8188b89a824ef6b745bab7a4b2df0fcc62fc3ee12571612ab5443e8
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55d601ccae8eead4998f520c0fb37f262fdc4af9674f4598e11e05776a91d57d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac3b1bd46b3911f03359a3982a0c03f865d3787800599fe7d28e536bbc352b08
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0247765af733f816c82cb6abb0bd3fc92237d543333cca727d37eb273ded1c69
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fca80c682586ea565475c8cb2e3f5097ebcafda0408dbe21093035fc5d9ba92
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f487427df0aa8bfe421a7abbb408985bddad611dfcbca3d7c23ae5d19832455
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.093664491685961,
5
  "eval_steps": 100,
6
- "global_step": 12100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1943,6 +1943,70 @@
1943
  "eval_samples_per_second": 25.772,
1944
  "eval_steps_per_second": 3.222,
1945
  "step": 12100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1946
  }
1947
  ],
1948
  "logging_steps": 100,
@@ -1950,7 +2014,7 @@
1950
  "num_input_tokens_seen": 0,
1951
  "num_train_epochs": 30,
1952
  "save_steps": 100,
1953
- "total_flos": 1.3265676741232484e+20,
1954
  "train_batch_size": 8,
1955
  "trial_name": null,
1956
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.262050094716901,
5
  "eval_steps": 100,
6
+ "global_step": 12500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1943
  "eval_samples_per_second": 25.772,
1944
  "eval_steps_per_second": 3.222,
1945
  "step": 12100
1946
+ },
1947
+ {
1948
+ "epoch": 5.14,
1949
+ "grad_norm": 3.436452865600586,
1950
+ "learning_rate": 4.173851590106007e-05,
1951
+ "loss": 1.4205,
1952
+ "step": 12200
1953
+ },
1954
+ {
1955
+ "epoch": 5.14,
1956
+ "eval_cer": 0.48146582065621213,
1957
+ "eval_loss": 2.3205745220184326,
1958
+ "eval_runtime": 384.4394,
1959
+ "eval_samples_per_second": 24.654,
1960
+ "eval_steps_per_second": 3.082,
1961
+ "step": 12200
1962
+ },
1963
+ {
1964
+ "epoch": 5.18,
1965
+ "grad_norm": 1.96918523311615,
1966
+ "learning_rate": 4.16678445229682e-05,
1967
+ "loss": 1.3964,
1968
+ "step": 12300
1969
+ },
1970
+ {
1971
+ "epoch": 5.18,
1972
+ "eval_cer": 0.4692033944702984,
1973
+ "eval_loss": 2.9126245975494385,
1974
+ "eval_runtime": 358.8304,
1975
+ "eval_samples_per_second": 26.414,
1976
+ "eval_steps_per_second": 3.302,
1977
+ "step": 12300
1978
+ },
1979
+ {
1980
+ "epoch": 5.22,
1981
+ "grad_norm": 6.869575500488281,
1982
+ "learning_rate": 4.159717314487633e-05,
1983
+ "loss": 2.0721,
1984
+ "step": 12400
1985
+ },
1986
+ {
1987
+ "epoch": 5.22,
1988
+ "eval_cer": 0.4858066755308748,
1989
+ "eval_loss": 3.2426090240478516,
1990
+ "eval_runtime": 377.7703,
1991
+ "eval_samples_per_second": 25.089,
1992
+ "eval_steps_per_second": 3.137,
1993
+ "step": 12400
1994
+ },
1995
+ {
1996
+ "epoch": 5.26,
1997
+ "grad_norm": 2.511401414871216,
1998
+ "learning_rate": 4.1526501766784455e-05,
1999
+ "loss": 1.747,
2000
+ "step": 12500
2001
+ },
2002
+ {
2003
+ "epoch": 5.26,
2004
+ "eval_cer": 0.5091607680575652,
2005
+ "eval_loss": 3.057870864868164,
2006
+ "eval_runtime": 381.9144,
2007
+ "eval_samples_per_second": 24.817,
2008
+ "eval_steps_per_second": 3.103,
2009
+ "step": 12500
2010
  }
2011
  ],
2012
  "logging_steps": 100,
 
2014
  "num_input_tokens_seen": 0,
2015
  "num_train_epochs": 30,
2016
  "save_steps": 100,
2017
+ "total_flos": 1.3698617507925189e+20,
2018
  "train_batch_size": 8,
2019
  "trial_name": null,
2020
  "trial_params": null