plip commited on
Commit
80774d1
1 Parent(s): 423ce3d

Training in progress, step 460000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c75b6a012ab82fb16d27738e0a95c3b1b8ef7e5d1f6ee0116a8c50549bb847f8
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:583b1f1834e02218d89c3b697d47dd796ee6e2f54da76dbfa4b2b4a392308637
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78b24f59351cac08b9824b3f79acd0f1dc0a89b48a8f25dc820166f982b53fa7
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b1ac07a8fd5b7f18f94228f9f3e9b88d618a44073c792161d704c98526bced7
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.484866842191105,
5
- "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9006,11 +9006,211 @@
9006
  "eval_samples_per_second": 1528.604,
9007
  "eval_steps_per_second": 24.341,
9008
  "step": 450000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9009
  }
9010
  ],
9011
  "max_steps": 500000,
9012
  "num_train_epochs": 12,
9013
- "total_flos": 1.4376896274943432e+22,
9014
  "trial_name": null,
9015
  "trial_params": null
9016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.717863883128684,
5
+ "global_step": 460000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9006
  "eval_samples_per_second": 1528.604,
9007
  "eval_steps_per_second": 24.341,
9008
  "step": 450000
9009
+ },
9010
+ {
9011
+ "epoch": 10.5,
9012
+ "learning_rate": 1.770155470293445e-05,
9013
+ "loss": 0.2519,
9014
+ "step": 450500
9015
+ },
9016
+ {
9017
+ "epoch": 10.51,
9018
+ "learning_rate": 1.7548110926261522e-05,
9019
+ "loss": 0.2518,
9020
+ "step": 451000
9021
+ },
9022
+ {
9023
+ "epoch": 10.51,
9024
+ "eval_loss": 0.23447871208190918,
9025
+ "eval_runtime": 1.4307,
9026
+ "eval_samples_per_second": 1536.276,
9027
+ "eval_steps_per_second": 24.463,
9028
+ "step": 451000
9029
+ },
9030
+ {
9031
+ "epoch": 10.52,
9032
+ "learning_rate": 1.7396170301425777e-05,
9033
+ "loss": 0.2518,
9034
+ "step": 451500
9035
+ },
9036
+ {
9037
+ "epoch": 10.53,
9038
+ "learning_rate": 1.7245734490025544e-05,
9039
+ "loss": 0.252,
9040
+ "step": 452000
9041
+ },
9042
+ {
9043
+ "epoch": 10.53,
9044
+ "eval_loss": 0.23647595942020416,
9045
+ "eval_runtime": 1.4161,
9046
+ "eval_samples_per_second": 1552.16,
9047
+ "eval_steps_per_second": 24.716,
9048
+ "step": 452000
9049
+ },
9050
+ {
9051
+ "epoch": 10.54,
9052
+ "learning_rate": 1.7096805137202738e-05,
9053
+ "loss": 0.2518,
9054
+ "step": 452500
9055
+ },
9056
+ {
9057
+ "epoch": 10.55,
9058
+ "learning_rate": 1.6949383871624917e-05,
9059
+ "loss": 0.2517,
9060
+ "step": 453000
9061
+ },
9062
+ {
9063
+ "epoch": 10.55,
9064
+ "eval_loss": 0.23766835033893585,
9065
+ "eval_runtime": 1.4533,
9066
+ "eval_samples_per_second": 1512.402,
9067
+ "eval_steps_per_second": 24.083,
9068
+ "step": 453000
9069
+ },
9070
+ {
9071
+ "epoch": 10.57,
9072
+ "learning_rate": 1.6803472305467368e-05,
9073
+ "loss": 0.2515,
9074
+ "step": 453500
9075
+ },
9076
+ {
9077
+ "epoch": 10.58,
9078
+ "learning_rate": 1.665907203439568e-05,
9079
+ "loss": 0.2516,
9080
+ "step": 454000
9081
+ },
9082
+ {
9083
+ "epoch": 10.58,
9084
+ "eval_loss": 0.23621916770935059,
9085
+ "eval_runtime": 1.476,
9086
+ "eval_samples_per_second": 1489.127,
9087
+ "eval_steps_per_second": 23.712,
9088
+ "step": 454000
9089
+ },
9090
+ {
9091
+ "epoch": 10.59,
9092
+ "learning_rate": 1.6516184637548058e-05,
9093
+ "loss": 0.2515,
9094
+ "step": 454500
9095
+ },
9096
+ {
9097
+ "epoch": 10.6,
9098
+ "learning_rate": 1.6374811677518142e-05,
9099
+ "loss": 0.2517,
9100
+ "step": 455000
9101
+ },
9102
+ {
9103
+ "epoch": 10.6,
9104
+ "eval_loss": 0.23542185127735138,
9105
+ "eval_runtime": 1.4176,
9106
+ "eval_samples_per_second": 1550.543,
9107
+ "eval_steps_per_second": 24.69,
9108
+ "step": 455000
9109
+ },
9110
+ {
9111
+ "epoch": 10.61,
9112
+ "learning_rate": 1.6234954700338025e-05,
9113
+ "loss": 0.2516,
9114
+ "step": 455500
9115
+ },
9116
+ {
9117
+ "epoch": 10.62,
9118
+ "learning_rate": 1.6096615235461148e-05,
9119
+ "loss": 0.2515,
9120
+ "step": 456000
9121
+ },
9122
+ {
9123
+ "epoch": 10.62,
9124
+ "eval_loss": 0.2339724451303482,
9125
+ "eval_runtime": 1.3908,
9126
+ "eval_samples_per_second": 1580.393,
9127
+ "eval_steps_per_second": 25.165,
9128
+ "step": 456000
9129
+ },
9130
+ {
9131
+ "epoch": 10.64,
9132
+ "learning_rate": 1.59597947957458e-05,
9133
+ "loss": 0.2519,
9134
+ "step": 456500
9135
+ },
9136
+ {
9137
+ "epoch": 10.65,
9138
+ "learning_rate": 1.5824494877438344e-05,
9139
+ "loss": 0.2515,
9140
+ "step": 457000
9141
+ },
9142
+ {
9143
+ "epoch": 10.65,
9144
+ "eval_loss": 0.23481862246990204,
9145
+ "eval_runtime": 1.4326,
9146
+ "eval_samples_per_second": 1534.248,
9147
+ "eval_steps_per_second": 24.431,
9148
+ "step": 457000
9149
+ },
9150
+ {
9151
+ "epoch": 10.66,
9152
+ "learning_rate": 1.569071696015702e-05,
9153
+ "loss": 0.2513,
9154
+ "step": 457500
9155
+ },
9156
+ {
9157
+ "epoch": 10.67,
9158
+ "learning_rate": 1.555846250687569e-05,
9159
+ "loss": 0.2517,
9160
+ "step": 458000
9161
+ },
9162
+ {
9163
+ "epoch": 10.67,
9164
+ "eval_loss": 0.23701262474060059,
9165
+ "eval_runtime": 1.4718,
9166
+ "eval_samples_per_second": 1493.443,
9167
+ "eval_steps_per_second": 23.781,
9168
+ "step": 458000
9169
+ },
9170
+ {
9171
+ "epoch": 10.68,
9172
+ "learning_rate": 1.542773296390789e-05,
9173
+ "loss": 0.2518,
9174
+ "step": 458500
9175
+ },
9176
+ {
9177
+ "epoch": 10.69,
9178
+ "learning_rate": 1.5298529760890945e-05,
9179
+ "loss": 0.2514,
9180
+ "step": 459000
9181
+ },
9182
+ {
9183
+ "epoch": 10.69,
9184
+ "eval_loss": 0.23413069546222687,
9185
+ "eval_runtime": 1.4496,
9186
+ "eval_samples_per_second": 1516.277,
9187
+ "eval_steps_per_second": 24.145,
9188
+ "step": 459000
9189
+ },
9190
+ {
9191
+ "epoch": 10.71,
9192
+ "learning_rate": 1.5170854310770376e-05,
9193
+ "loss": 0.251,
9194
+ "step": 459500
9195
+ },
9196
+ {
9197
+ "epoch": 10.72,
9198
+ "learning_rate": 1.5044708009784457e-05,
9199
+ "loss": 0.2514,
9200
+ "step": 460000
9201
+ },
9202
+ {
9203
+ "epoch": 10.72,
9204
+ "eval_loss": 0.2361001968383789,
9205
+ "eval_runtime": 1.4246,
9206
+ "eval_samples_per_second": 1542.873,
9207
+ "eval_steps_per_second": 24.568,
9208
+ "step": 460000
9209
  }
9210
  ],
9211
  "max_steps": 500000,
9212
  "num_train_epochs": 12,
9213
+ "total_flos": 1.4696383413717932e+22,
9214
  "trial_name": null,
9215
  "trial_params": null
9216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78b24f59351cac08b9824b3f79acd0f1dc0a89b48a8f25dc820166f982b53fa7
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b1ac07a8fd5b7f18f94228f9f3e9b88d618a44073c792161d704c98526bced7
3
  size 102501541