Training in progress, epoch 12, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2372346
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41341b31785ed0a2fbc8630488a4926f852161ef06ea08cc89540c9c37dc6630
|
3 |
size 2372346
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 990409330
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7137ffdadd3d98f6eb4df58e0d93207f78d2d3a226609b6aa51f825be2c62570
|
3 |
size 990409330
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81575766e0f892673e3292549b8eae61ee2367f845a1b2b10898b9e83a3ef05f
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fc3d9b89093dc44b3f8ae65848b5c79017341da78ff50d3adcc276738c797e7
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.5654487609863281,
|
3 |
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1050,13 +1050,104 @@
|
|
1050 |
"eval_steps_per_second": 1.037,
|
1051 |
"eval_translation_length": 52469,
|
1052 |
"step": 69553
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1053 |
}
|
1054 |
],
|
1055 |
"logging_steps": 500,
|
1056 |
"max_steps": 126460,
|
1057 |
"num_train_epochs": 20,
|
1058 |
"save_steps": 500,
|
1059 |
-
"total_flos":
|
1060 |
"trial_name": null,
|
1061 |
"trial_params": null
|
1062 |
}
|
|
|
1 |
{
|
2 |
"best_metric": 1.5654487609863281,
|
3 |
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
|
4 |
+
"epoch": 12.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 75876,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1050 |
"eval_steps_per_second": 1.037,
|
1051 |
"eval_translation_length": 52469,
|
1052 |
"step": 69553
|
1053 |
+
},
|
1054 |
+
{
|
1055 |
+
"epoch": 11.07,
|
1056 |
+
"learning_rate": 2.0952479625603017e-06,
|
1057 |
+
"loss": 1.7783,
|
1058 |
+
"step": 70000
|
1059 |
+
},
|
1060 |
+
{
|
1061 |
+
"epoch": 11.15,
|
1062 |
+
"learning_rate": 2.0645150514646657e-06,
|
1063 |
+
"loss": 1.7443,
|
1064 |
+
"step": 70500
|
1065 |
+
},
|
1066 |
+
{
|
1067 |
+
"epoch": 11.23,
|
1068 |
+
"learning_rate": 2.0338498642707977e-06,
|
1069 |
+
"loss": 1.7678,
|
1070 |
+
"step": 71000
|
1071 |
+
},
|
1072 |
+
{
|
1073 |
+
"epoch": 11.31,
|
1074 |
+
"learning_rate": 2.0032571698372577e-06,
|
1075 |
+
"loss": 1.7786,
|
1076 |
+
"step": 71500
|
1077 |
+
},
|
1078 |
+
{
|
1079 |
+
"epoch": 11.39,
|
1080 |
+
"learning_rate": 1.9727417257489874e-06,
|
1081 |
+
"loss": 1.7768,
|
1082 |
+
"step": 72000
|
1083 |
+
},
|
1084 |
+
{
|
1085 |
+
"epoch": 11.47,
|
1086 |
+
"learning_rate": 1.9423082775774337e-06,
|
1087 |
+
"loss": 1.7953,
|
1088 |
+
"step": 72500
|
1089 |
+
},
|
1090 |
+
{
|
1091 |
+
"epoch": 11.55,
|
1092 |
+
"learning_rate": 1.9119615581425524e-06,
|
1093 |
+
"loss": 1.7715,
|
1094 |
+
"step": 73000
|
1095 |
+
},
|
1096 |
+
{
|
1097 |
+
"epoch": 11.62,
|
1098 |
+
"learning_rate": 1.881706286776785e-06,
|
1099 |
+
"loss": 1.8047,
|
1100 |
+
"step": 73500
|
1101 |
+
},
|
1102 |
+
{
|
1103 |
+
"epoch": 11.7,
|
1104 |
+
"learning_rate": 1.8515471685911402e-06,
|
1105 |
+
"loss": 1.7781,
|
1106 |
+
"step": 74000
|
1107 |
+
},
|
1108 |
+
{
|
1109 |
+
"epoch": 11.78,
|
1110 |
+
"learning_rate": 1.821488893743488e-06,
|
1111 |
+
"loss": 1.8197,
|
1112 |
+
"step": 74500
|
1113 |
+
},
|
1114 |
+
{
|
1115 |
+
"epoch": 11.86,
|
1116 |
+
"learning_rate": 1.7915361367091677e-06,
|
1117 |
+
"loss": 1.8159,
|
1118 |
+
"step": 75000
|
1119 |
+
},
|
1120 |
+
{
|
1121 |
+
"epoch": 11.94,
|
1122 |
+
"learning_rate": 1.7616935555540475e-06,
|
1123 |
+
"loss": 1.8004,
|
1124 |
+
"step": 75500
|
1125 |
+
},
|
1126 |
+
{
|
1127 |
+
"epoch": 12.0,
|
1128 |
+
"eval_bleu": 1.0,
|
1129 |
+
"eval_brevity_penalty": 1.0,
|
1130 |
+
"eval_length_ratio": 1.0,
|
1131 |
+
"eval_loss": 1.5739296674728394,
|
1132 |
+
"eval_precisions": [
|
1133 |
+
1.0,
|
1134 |
+
1.0,
|
1135 |
+
1.0,
|
1136 |
+
1.0
|
1137 |
+
],
|
1138 |
+
"eval_reference_length": 52457,
|
1139 |
+
"eval_runtime": 676.9983,
|
1140 |
+
"eval_samples_per_second": 4.152,
|
1141 |
+
"eval_steps_per_second": 1.038,
|
1142 |
+
"eval_translation_length": 52457,
|
1143 |
+
"step": 75876
|
1144 |
}
|
1145 |
],
|
1146 |
"logging_steps": 500,
|
1147 |
"max_steps": 126460,
|
1148 |
"num_train_epochs": 20,
|
1149 |
"save_steps": 500,
|
1150 |
+
"total_flos": 2.078183784091484e+17,
|
1151 |
"trial_name": null,
|
1152 |
"trial_params": null
|
1153 |
}
|