linshoufan commited on
Commit
1f06204
·
verified ·
1 Parent(s): 086e820

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f238cd0948b63bcc38c31fc5ffac173dfbf9e4dd7ae0e58b3939dce2014b3334
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a09121d6d3eee2936978fd6c56b1964e1bbfa8c1b2ba09cbdd38897b94a77ae
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae4bdb35e2cfa6ff7cea529359f7d062822060c3b215ebacd7bc00e24a36204f
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:707058d813facf683e36e5998c502da22a8bda798044802fa93b8818e84308e9
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69855359c3b57278083d84101af49ff74c6759028b4bce6d7d9917534de6a5b9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d674cc410420434f566f66b73fcc1d8861fc1ac2b81a6518f434e4af5aa0734f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5af4d52c904e89f604184d696deb9b3da1bca80a50fa03227f0b0b8aea3e335
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7613bb05f834433908984d2661d965a40a71dcb872e212158c34c801216b6c4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 32.49766635551407,
3
- "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-3500",
4
- "epoch": 1.1250401800064287,
5
  "eval_steps": 500,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1050,6 +1050,155 @@
1050
  "eval_samples_per_second": 2.4,
1051
  "eval_steps_per_second": 0.3,
1052
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1053
  }
1054
  ],
1055
  "logging_steps": 25,
@@ -1057,7 +1206,7 @@
1057
  "num_input_tokens_seen": 0,
1058
  "num_train_epochs": 2,
1059
  "save_steps": 500,
1060
- "total_flos": 1.616078241792e+19,
1061
  "train_batch_size": 16,
1062
  "trial_name": null,
1063
  "trial_params": null
 
1
  {
2
+ "best_metric": 31.99093212428324,
3
+ "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-4000",
4
+ "epoch": 1.2857602057216329,
5
  "eval_steps": 500,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1050
  "eval_samples_per_second": 2.4,
1051
  "eval_steps_per_second": 0.3,
1052
  "step": 3500
1053
+ },
1054
+ {
1055
+ "epoch": 1.13,
1056
+ "grad_norm": 9.02902603149414,
1057
+ "learning_rate": 4.405423064358053e-06,
1058
+ "loss": 0.2382,
1059
+ "step": 3525
1060
+ },
1061
+ {
1062
+ "epoch": 1.14,
1063
+ "grad_norm": 8.05671215057373,
1064
+ "learning_rate": 4.364586736360667e-06,
1065
+ "loss": 0.222,
1066
+ "step": 3550
1067
+ },
1068
+ {
1069
+ "epoch": 1.15,
1070
+ "grad_norm": 10.546473503112793,
1071
+ "learning_rate": 4.32375040836328e-06,
1072
+ "loss": 0.2131,
1073
+ "step": 3575
1074
+ },
1075
+ {
1076
+ "epoch": 1.16,
1077
+ "grad_norm": 8.884702682495117,
1078
+ "learning_rate": 4.282914080365894e-06,
1079
+ "loss": 0.1886,
1080
+ "step": 3600
1081
+ },
1082
+ {
1083
+ "epoch": 1.17,
1084
+ "grad_norm": 7.569803237915039,
1085
+ "learning_rate": 4.242077752368508e-06,
1086
+ "loss": 0.222,
1087
+ "step": 3625
1088
+ },
1089
+ {
1090
+ "epoch": 1.17,
1091
+ "grad_norm": 6.256328105926514,
1092
+ "learning_rate": 4.201241424371121e-06,
1093
+ "loss": 0.2083,
1094
+ "step": 3650
1095
+ },
1096
+ {
1097
+ "epoch": 1.18,
1098
+ "grad_norm": 6.724915027618408,
1099
+ "learning_rate": 4.1604050963737345e-06,
1100
+ "loss": 0.2452,
1101
+ "step": 3675
1102
+ },
1103
+ {
1104
+ "epoch": 1.19,
1105
+ "grad_norm": 11.219491004943848,
1106
+ "learning_rate": 4.119568768376348e-06,
1107
+ "loss": 0.2217,
1108
+ "step": 3700
1109
+ },
1110
+ {
1111
+ "epoch": 1.2,
1112
+ "grad_norm": 6.6789469718933105,
1113
+ "learning_rate": 4.078732440378961e-06,
1114
+ "loss": 0.1958,
1115
+ "step": 3725
1116
+ },
1117
+ {
1118
+ "epoch": 1.21,
1119
+ "grad_norm": 7.929986476898193,
1120
+ "learning_rate": 4.037896112381575e-06,
1121
+ "loss": 0.1863,
1122
+ "step": 3750
1123
+ },
1124
+ {
1125
+ "epoch": 1.21,
1126
+ "grad_norm": 8.032015800476074,
1127
+ "learning_rate": 3.997059784384189e-06,
1128
+ "loss": 0.2153,
1129
+ "step": 3775
1130
+ },
1131
+ {
1132
+ "epoch": 1.22,
1133
+ "grad_norm": 8.176934242248535,
1134
+ "learning_rate": 3.956223456386802e-06,
1135
+ "loss": 0.1971,
1136
+ "step": 3800
1137
+ },
1138
+ {
1139
+ "epoch": 1.23,
1140
+ "grad_norm": 10.322613716125488,
1141
+ "learning_rate": 3.9153871283894155e-06,
1142
+ "loss": 0.1991,
1143
+ "step": 3825
1144
+ },
1145
+ {
1146
+ "epoch": 1.24,
1147
+ "grad_norm": 7.837410926818848,
1148
+ "learning_rate": 3.874550800392029e-06,
1149
+ "loss": 0.2043,
1150
+ "step": 3850
1151
+ },
1152
+ {
1153
+ "epoch": 1.25,
1154
+ "grad_norm": 12.684860229492188,
1155
+ "learning_rate": 3.833714472394642e-06,
1156
+ "loss": 0.2008,
1157
+ "step": 3875
1158
+ },
1159
+ {
1160
+ "epoch": 1.25,
1161
+ "grad_norm": 7.476794242858887,
1162
+ "learning_rate": 3.7928781443972564e-06,
1163
+ "loss": 0.2199,
1164
+ "step": 3900
1165
+ },
1166
+ {
1167
+ "epoch": 1.26,
1168
+ "grad_norm": 4.45359992980957,
1169
+ "learning_rate": 3.7520418163998693e-06,
1170
+ "loss": 0.196,
1171
+ "step": 3925
1172
+ },
1173
+ {
1174
+ "epoch": 1.27,
1175
+ "grad_norm": 9.373842239379883,
1176
+ "learning_rate": 3.7112054884024835e-06,
1177
+ "loss": 0.1939,
1178
+ "step": 3950
1179
+ },
1180
+ {
1181
+ "epoch": 1.28,
1182
+ "grad_norm": 6.383950233459473,
1183
+ "learning_rate": 3.670369160405097e-06,
1184
+ "loss": 0.2066,
1185
+ "step": 3975
1186
+ },
1187
+ {
1188
+ "epoch": 1.29,
1189
+ "grad_norm": 5.862789154052734,
1190
+ "learning_rate": 3.6295328324077102e-06,
1191
+ "loss": 0.1806,
1192
+ "step": 4000
1193
+ },
1194
+ {
1195
+ "epoch": 1.29,
1196
+ "eval_cer": 31.99093212428324,
1197
+ "eval_loss": 0.4084797203540802,
1198
+ "eval_runtime": 1898.761,
1199
+ "eval_samples_per_second": 2.336,
1200
+ "eval_steps_per_second": 0.292,
1201
+ "step": 4000
1202
  }
1203
  ],
1204
  "logging_steps": 25,
 
1206
  "num_input_tokens_seen": 0,
1207
  "num_train_epochs": 2,
1208
  "save_steps": 500,
1209
+ "total_flos": 1.846946562048e+19,
1210
  "train_batch_size": 16,
1211
  "trial_name": null,
1212
  "trial_params": null