plip commited on
Commit
326b0df
1 Parent(s): bc6719d

Training in progress, step 260000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bcf163d4c24e7cc42f1a074e3cd2b5b764177304bfde57871568392f90420af
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406ff60c9fbc96f08fa188658defda9ad6b5e381b6cf799e20366bdf81afbd2a
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da2ade6d09e2d53692039b0c86af61bee174f1818f4f81e3d0671d8e99591ab1
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0c29fd1ad93ebeb1e95f532ec268fb56806f7c232e1c94e4622474be46744f
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b91b2dfc358c1cb0a487d1d59c48a632d3a28a8d92f2e4877dc656b73845941
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b91b2dfc358c1cb0a487d1d59c48a632d3a28a8d92f2e4877dc656b73845941
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b91b2dfc358c1cb0a487d1d59c48a632d3a28a8d92f2e4877dc656b73845941
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b91b2dfc358c1cb0a487d1d59c48a632d3a28a8d92f2e4877dc656b73845941
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b91b2dfc358c1cb0a487d1d59c48a632d3a28a8d92f2e4877dc656b73845941
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b91b2dfc358c1cb0a487d1d59c48a632d3a28a8d92f2e4877dc656b73845941
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b91b2dfc358c1cb0a487d1d59c48a632d3a28a8d92f2e4877dc656b73845941
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b91b2dfc358c1cb0a487d1d59c48a632d3a28a8d92f2e4877dc656b73845941
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:438f3db980ad0547f739432602e1f85cc46b6fbef312b9261fd3b355ceeb97af
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b7fe86b6bf62db9f7989d6e264b9b70447a29a8d4bbea419af77ab1989ca356
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.824926023439502,
5
- "global_step": 250000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5006,11 +5006,211 @@
5006
  "eval_samples_per_second": 1542.185,
5007
  "eval_steps_per_second": 24.557,
5008
  "step": 250000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5009
  }
5010
  ],
5011
  "max_steps": 500000,
5012
  "num_train_epochs": 12,
5013
- "total_flos": 7.987165984407966e+21,
5014
  "trial_name": null,
5015
  "trial_params": null
5016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.057923064377082,
5
+ "global_step": 260000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5006
  "eval_samples_per_second": 1542.185,
5007
  "eval_steps_per_second": 24.557,
5008
  "step": 250000
5009
+ },
5010
+ {
5011
+ "epoch": 5.84,
5012
+ "learning_rate": 0.00016649607200138356,
5013
+ "loss": 0.2728,
5014
+ "step": 250500
5015
+ },
5016
+ {
5017
+ "epoch": 5.85,
5018
+ "learning_rate": 0.00016601801319007743,
5019
+ "loss": 0.2729,
5020
+ "step": 251000
5021
+ },
5022
+ {
5023
+ "epoch": 5.85,
5024
+ "eval_loss": 0.25545501708984375,
5025
+ "eval_runtime": 1.4675,
5026
+ "eval_samples_per_second": 1497.796,
5027
+ "eval_steps_per_second": 23.85,
5028
+ "step": 251000
5029
+ },
5030
+ {
5031
+ "epoch": 5.86,
5032
+ "learning_rate": 0.00016553983388754428,
5033
+ "loss": 0.2725,
5034
+ "step": 251500
5035
+ },
5036
+ {
5037
+ "epoch": 5.87,
5038
+ "learning_rate": 0.00016506153932307636,
5039
+ "loss": 0.2723,
5040
+ "step": 252000
5041
+ },
5042
+ {
5043
+ "epoch": 5.87,
5044
+ "eval_loss": 0.25281623005867004,
5045
+ "eval_runtime": 1.4602,
5046
+ "eval_samples_per_second": 1505.31,
5047
+ "eval_steps_per_second": 23.97,
5048
+ "step": 252000
5049
+ },
5050
+ {
5051
+ "epoch": 5.88,
5052
+ "learning_rate": 0.00016458313472722638,
5053
+ "loss": 0.2722,
5054
+ "step": 252500
5055
+ },
5056
+ {
5057
+ "epoch": 5.89,
5058
+ "learning_rate": 0.00016410462533175045,
5059
+ "loss": 0.2722,
5060
+ "step": 253000
5061
+ },
5062
+ {
5063
+ "epoch": 5.89,
5064
+ "eval_loss": 0.2557053565979004,
5065
+ "eval_runtime": 1.4444,
5066
+ "eval_samples_per_second": 1521.717,
5067
+ "eval_steps_per_second": 24.231,
5068
+ "step": 253000
5069
+ },
5070
+ {
5071
+ "epoch": 5.91,
5072
+ "learning_rate": 0.00016362601636955049,
5073
+ "loss": 0.2719,
5074
+ "step": 253500
5075
+ },
5076
+ {
5077
+ "epoch": 5.92,
5078
+ "learning_rate": 0.00016314731307461754,
5079
+ "loss": 0.2722,
5080
+ "step": 254000
5081
+ },
5082
+ {
5083
+ "epoch": 5.92,
5084
+ "eval_loss": 0.2575713098049164,
5085
+ "eval_runtime": 1.4237,
5086
+ "eval_samples_per_second": 1543.898,
5087
+ "eval_steps_per_second": 24.584,
5088
+ "step": 254000
5089
+ },
5090
+ {
5091
+ "epoch": 5.93,
5092
+ "learning_rate": 0.0001626685206819742,
5093
+ "loss": 0.2723,
5094
+ "step": 254500
5095
+ },
5096
+ {
5097
+ "epoch": 5.94,
5098
+ "learning_rate": 0.0001621896444276172,
5099
+ "loss": 0.2718,
5100
+ "step": 255000
5101
+ },
5102
+ {
5103
+ "epoch": 5.94,
5104
+ "eval_loss": 0.25522297620773315,
5105
+ "eval_runtime": 1.5788,
5106
+ "eval_samples_per_second": 1392.174,
5107
+ "eval_steps_per_second": 22.168,
5108
+ "step": 255000
5109
+ },
5110
+ {
5111
+ "epoch": 5.95,
5112
+ "learning_rate": 0.00016171068954846067,
5113
+ "loss": 0.2721,
5114
+ "step": 255500
5115
+ },
5116
+ {
5117
+ "epoch": 5.96,
5118
+ "learning_rate": 0.00016123166128227835,
5119
+ "loss": 0.2715,
5120
+ "step": 256000
5121
+ },
5122
+ {
5123
+ "epoch": 5.96,
5124
+ "eval_loss": 0.25572481751441956,
5125
+ "eval_runtime": 1.4307,
5126
+ "eval_samples_per_second": 1536.298,
5127
+ "eval_steps_per_second": 24.463,
5128
+ "step": 256000
5129
+ },
5130
+ {
5131
+ "epoch": 5.98,
5132
+ "learning_rate": 0.0001607525648676467,
5133
+ "loss": 0.2725,
5134
+ "step": 256500
5135
+ },
5136
+ {
5137
+ "epoch": 5.99,
5138
+ "learning_rate": 0.0001602734055438873,
5139
+ "loss": 0.2725,
5140
+ "step": 257000
5141
+ },
5142
+ {
5143
+ "epoch": 5.99,
5144
+ "eval_loss": 0.2527320981025696,
5145
+ "eval_runtime": 1.4262,
5146
+ "eval_samples_per_second": 1541.159,
5147
+ "eval_steps_per_second": 24.541,
5148
+ "step": 257000
5149
+ },
5150
+ {
5151
+ "epoch": 6.0,
5152
+ "learning_rate": 0.00015979418855100963,
5153
+ "loss": 0.2721,
5154
+ "step": 257500
5155
+ },
5156
+ {
5157
+ "epoch": 6.01,
5158
+ "learning_rate": 0.00015931491912965417,
5159
+ "loss": 0.2717,
5160
+ "step": 258000
5161
+ },
5162
+ {
5163
+ "epoch": 6.01,
5164
+ "eval_loss": 0.2517436146736145,
5165
+ "eval_runtime": 1.7752,
5166
+ "eval_samples_per_second": 1238.2,
5167
+ "eval_steps_per_second": 19.717,
5168
+ "step": 258000
5169
+ },
5170
+ {
5171
+ "epoch": 6.02,
5172
+ "learning_rate": 0.0001588356025210344,
5173
+ "loss": 0.2717,
5174
+ "step": 258500
5175
+ },
5176
+ {
5177
+ "epoch": 6.03,
5178
+ "learning_rate": 0.00015835624396688,
5179
+ "loss": 0.272,
5180
+ "step": 259000
5181
+ },
5182
+ {
5183
+ "epoch": 6.03,
5184
+ "eval_loss": 0.2557397484779358,
5185
+ "eval_runtime": 1.4282,
5186
+ "eval_samples_per_second": 1538.991,
5187
+ "eval_steps_per_second": 24.506,
5188
+ "step": 259000
5189
+ },
5190
+ {
5191
+ "epoch": 6.05,
5192
+ "learning_rate": 0.00015787684870937924,
5193
+ "loss": 0.2717,
5194
+ "step": 259500
5195
+ },
5196
+ {
5197
+ "epoch": 6.06,
5198
+ "learning_rate": 0.00015739742199112196,
5199
+ "loss": 0.2717,
5200
+ "step": 260000
5201
+ },
5202
+ {
5203
+ "epoch": 6.06,
5204
+ "eval_loss": 0.25458213686943054,
5205
+ "eval_runtime": 1.4334,
5206
+ "eval_samples_per_second": 1533.386,
5207
+ "eval_steps_per_second": 24.417,
5208
+ "step": 260000
5209
  }
5210
  ],
5211
  "max_steps": 500000,
5212
  "num_train_epochs": 12,
5213
+ "total_flos": 8.30665062619156e+21,
5214
  "trial_name": null,
5215
  "trial_params": null
5216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da2ade6d09e2d53692039b0c86af61bee174f1818f4f81e3d0671d8e99591ab1
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0c29fd1ad93ebeb1e95f532ec268fb56806f7c232e1c94e4622474be46744f
3
  size 102501541