amazingvince commited on
Commit
22d81fe
1 Parent(s): e0ad69a

Upload folder using huggingface_hub

Browse files
latest CHANGED
@@ -1 +1 @@
1
- global_step1000
 
1
+ global_step1200
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44c5dd58db09ef6c8d26f575bb1c52fc43bb2da069752a940c3c786d5296ab76
3
  size 4944210912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b692de05056f10e069f7e9c0915bb02734793c59dca77041aa1eea315cd3a34
3
  size 4944210912
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fc780af17679805c0358ac7f2f9355f4108dcfad591c2b4c924313d821ed4f2
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01a591f9840a3d2a9ee3c6234e6fd9ab33bcc1c98a7b45790346153f338a5b2f
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:780efe7446c48ba32e596540697fa9c2036e9ced694d2d25e4cba8622a7085b1
3
  size 4541564920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d2eebebc0731faba3082c7171851b35cf53492353b5c7014bd348a3bae4aaa3
3
  size 4541564920
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04061841537407022,
5
  "eval_steps": 400,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1229,6 +1229,254 @@
1229
  "learning_rate": 1.995293084834134e-06,
1230
  "loss": 0.7101,
1231
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1232
  }
1233
  ],
1234
  "logging_steps": 5,
@@ -1236,7 +1484,7 @@
1236
  "num_input_tokens_seen": 0,
1237
  "num_train_epochs": 1,
1238
  "save_steps": 200,
1239
- "total_flos": 138834746810368.0,
1240
  "trial_name": null,
1241
  "trial_params": null
1242
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04874209844888426,
5
  "eval_steps": 400,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1229
  "learning_rate": 1.995293084834134e-06,
1230
  "loss": 0.7101,
1231
  "step": 1000
1232
+ },
1233
+ {
1234
+ "epoch": 0.04,
1235
+ "learning_rate": 1.9952304183703893e-06,
1236
+ "loss": 0.7109,
1237
+ "step": 1005
1238
+ },
1239
+ {
1240
+ "epoch": 0.04,
1241
+ "learning_rate": 1.9951673384965835e-06,
1242
+ "loss": 0.7103,
1243
+ "step": 1010
1244
+ },
1245
+ {
1246
+ "epoch": 0.04,
1247
+ "learning_rate": 1.99510384523892e-06,
1248
+ "loss": 0.7677,
1249
+ "step": 1015
1250
+ },
1251
+ {
1252
+ "epoch": 0.04,
1253
+ "learning_rate": 1.995039938623773e-06,
1254
+ "loss": 0.7371,
1255
+ "step": 1020
1256
+ },
1257
+ {
1258
+ "epoch": 0.04,
1259
+ "learning_rate": 1.9949756186776893e-06,
1260
+ "loss": 0.7204,
1261
+ "step": 1025
1262
+ },
1263
+ {
1264
+ "epoch": 0.04,
1265
+ "learning_rate": 1.9949108854273855e-06,
1266
+ "loss": 0.7271,
1267
+ "step": 1030
1268
+ },
1269
+ {
1270
+ "epoch": 0.04,
1271
+ "learning_rate": 1.9948457388997528e-06,
1272
+ "loss": 0.7031,
1273
+ "step": 1035
1274
+ },
1275
+ {
1276
+ "epoch": 0.04,
1277
+ "learning_rate": 1.994780179121851e-06,
1278
+ "loss": 0.7612,
1279
+ "step": 1040
1280
+ },
1281
+ {
1282
+ "epoch": 0.04,
1283
+ "learning_rate": 1.994714206120914e-06,
1284
+ "loss": 0.7234,
1285
+ "step": 1045
1286
+ },
1287
+ {
1288
+ "epoch": 0.04,
1289
+ "learning_rate": 1.9946478199243466e-06,
1290
+ "loss": 0.7187,
1291
+ "step": 1050
1292
+ },
1293
+ {
1294
+ "epoch": 0.04,
1295
+ "learning_rate": 1.9945810205597246e-06,
1296
+ "loss": 0.7004,
1297
+ "step": 1055
1298
+ },
1299
+ {
1300
+ "epoch": 0.04,
1301
+ "learning_rate": 1.9945138080547957e-06,
1302
+ "loss": 0.6932,
1303
+ "step": 1060
1304
+ },
1305
+ {
1306
+ "epoch": 0.04,
1307
+ "learning_rate": 1.99444618243748e-06,
1308
+ "loss": 0.7135,
1309
+ "step": 1065
1310
+ },
1311
+ {
1312
+ "epoch": 0.04,
1313
+ "learning_rate": 1.994378143735868e-06,
1314
+ "loss": 0.7056,
1315
+ "step": 1070
1316
+ },
1317
+ {
1318
+ "epoch": 0.04,
1319
+ "learning_rate": 1.9943096919782225e-06,
1320
+ "loss": 0.6984,
1321
+ "step": 1075
1322
+ },
1323
+ {
1324
+ "epoch": 0.04,
1325
+ "learning_rate": 1.994240827192978e-06,
1326
+ "loss": 0.6947,
1327
+ "step": 1080
1328
+ },
1329
+ {
1330
+ "epoch": 0.04,
1331
+ "learning_rate": 1.9941715494087408e-06,
1332
+ "loss": 0.7332,
1333
+ "step": 1085
1334
+ },
1335
+ {
1336
+ "epoch": 0.04,
1337
+ "learning_rate": 1.9941018586542866e-06,
1338
+ "loss": 0.6963,
1339
+ "step": 1090
1340
+ },
1341
+ {
1342
+ "epoch": 0.04,
1343
+ "learning_rate": 1.9940317549585665e-06,
1344
+ "loss": 0.7245,
1345
+ "step": 1095
1346
+ },
1347
+ {
1348
+ "epoch": 0.04,
1349
+ "learning_rate": 1.9939612383506993e-06,
1350
+ "loss": 0.769,
1351
+ "step": 1100
1352
+ },
1353
+ {
1354
+ "epoch": 0.04,
1355
+ "learning_rate": 1.993890308859978e-06,
1356
+ "loss": 0.7245,
1357
+ "step": 1105
1358
+ },
1359
+ {
1360
+ "epoch": 0.05,
1361
+ "learning_rate": 1.9938189665158654e-06,
1362
+ "loss": 0.6868,
1363
+ "step": 1110
1364
+ },
1365
+ {
1366
+ "epoch": 0.05,
1367
+ "learning_rate": 1.9937472113479966e-06,
1368
+ "loss": 0.7072,
1369
+ "step": 1115
1370
+ },
1371
+ {
1372
+ "epoch": 0.05,
1373
+ "learning_rate": 1.9936750433861787e-06,
1374
+ "loss": 0.7415,
1375
+ "step": 1120
1376
+ },
1377
+ {
1378
+ "epoch": 0.05,
1379
+ "learning_rate": 1.993602462660389e-06,
1380
+ "loss": 0.71,
1381
+ "step": 1125
1382
+ },
1383
+ {
1384
+ "epoch": 0.05,
1385
+ "learning_rate": 1.993529469200777e-06,
1386
+ "loss": 0.7006,
1387
+ "step": 1130
1388
+ },
1389
+ {
1390
+ "epoch": 0.05,
1391
+ "learning_rate": 1.993456063037664e-06,
1392
+ "loss": 0.6957,
1393
+ "step": 1135
1394
+ },
1395
+ {
1396
+ "epoch": 0.05,
1397
+ "learning_rate": 1.9933822442015416e-06,
1398
+ "loss": 0.733,
1399
+ "step": 1140
1400
+ },
1401
+ {
1402
+ "epoch": 0.05,
1403
+ "learning_rate": 1.993308012723074e-06,
1404
+ "loss": 0.7156,
1405
+ "step": 1145
1406
+ },
1407
+ {
1408
+ "epoch": 0.05,
1409
+ "learning_rate": 1.993233368633096e-06,
1410
+ "loss": 0.6977,
1411
+ "step": 1150
1412
+ },
1413
+ {
1414
+ "epoch": 0.05,
1415
+ "learning_rate": 1.993158311962614e-06,
1416
+ "loss": 0.6911,
1417
+ "step": 1155
1418
+ },
1419
+ {
1420
+ "epoch": 0.05,
1421
+ "learning_rate": 1.9930828427428066e-06,
1422
+ "loss": 0.7124,
1423
+ "step": 1160
1424
+ },
1425
+ {
1426
+ "epoch": 0.05,
1427
+ "learning_rate": 1.9930069610050224e-06,
1428
+ "loss": 0.7197,
1429
+ "step": 1165
1430
+ },
1431
+ {
1432
+ "epoch": 0.05,
1433
+ "learning_rate": 1.9929306667807823e-06,
1434
+ "loss": 0.7129,
1435
+ "step": 1170
1436
+ },
1437
+ {
1438
+ "epoch": 0.05,
1439
+ "learning_rate": 1.992853960101778e-06,
1440
+ "loss": 0.6775,
1441
+ "step": 1175
1442
+ },
1443
+ {
1444
+ "epoch": 0.05,
1445
+ "learning_rate": 1.9927768409998733e-06,
1446
+ "loss": 0.7333,
1447
+ "step": 1180
1448
+ },
1449
+ {
1450
+ "epoch": 0.05,
1451
+ "learning_rate": 1.992699309507102e-06,
1452
+ "loss": 0.6704,
1453
+ "step": 1185
1454
+ },
1455
+ {
1456
+ "epoch": 0.05,
1457
+ "learning_rate": 1.992621365655671e-06,
1458
+ "loss": 0.7136,
1459
+ "step": 1190
1460
+ },
1461
+ {
1462
+ "epoch": 0.05,
1463
+ "learning_rate": 1.9925430094779566e-06,
1464
+ "loss": 0.696,
1465
+ "step": 1195
1466
+ },
1467
+ {
1468
+ "epoch": 0.05,
1469
+ "learning_rate": 1.9924642410065075e-06,
1470
+ "loss": 0.7362,
1471
+ "step": 1200
1472
+ },
1473
+ {
1474
+ "epoch": 0.05,
1475
+ "eval_loss": 0.6821444630622864,
1476
+ "eval_runtime": 138.0679,
1477
+ "eval_samples_per_second": 17.136,
1478
+ "eval_steps_per_second": 2.861,
1479
+ "step": 1200
1480
  }
1481
  ],
1482
  "logging_steps": 5,
 
1484
  "num_input_tokens_seen": 0,
1485
  "num_train_epochs": 1,
1486
  "save_steps": 200,
1487
+ "total_flos": 166374297772032.0,
1488
  "trial_name": null,
1489
  "trial_params": null
1490
  }