sedrickkeh commited on
Commit
099ae7c
·
verified ·
1 Parent(s): 35eedd9

Training in progress, epoch 1

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1512c2c7e37de5e4838f561c1901175ff569e5657cd5d8c603eb92e6ac507386
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7745a4aa56d839271a284e9ab1b37ca3403713a76f45d5ca8362cf39eb8f41
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51bf6bbd3e16c646652b5e9150cc803b2b5f2f5994e0af03f145162bf926e8c7
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5a197e00c1b187743529f8631134b84017e1136b0efebdd21656b58cb4e85cc
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e73e2b950df19e5309ac13974ff27116786af3f7124351f687e3fce1cf854cf1
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f11ef5fcf3873784e494189b3a9515862064acfc7a2203730affa9acdc736a
3
  size 4540516344
trainer_log.jsonl CHANGED
@@ -66,3 +66,71 @@
66
  {"current_steps": 660, "total_steps": 2013, "loss": 0.4661, "lr": 4.5311606533359665e-06, "epoch": 0.9823255813953489, "percentage": 32.79, "elapsed_time": "5:48:44", "remaining_time": "11:54:54"}
67
  {"current_steps": 670, "total_steps": 2013, "loss": 0.4722, "lr": 4.50561965229053e-06, "epoch": 0.9972093023255814, "percentage": 33.28, "elapsed_time": "5:54:02", "remaining_time": "11:49:39"}
68
  {"current_steps": 671, "total_steps": 2013, "eval_loss": 0.058389123529195786, "epoch": 0.9986976744186047, "percentage": 33.33, "elapsed_time": "6:02:30", "remaining_time": "12:05:01"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  {"current_steps": 660, "total_steps": 2013, "loss": 0.4661, "lr": 4.5311606533359665e-06, "epoch": 0.9823255813953489, "percentage": 32.79, "elapsed_time": "5:48:44", "remaining_time": "11:54:54"}
67
  {"current_steps": 670, "total_steps": 2013, "loss": 0.4722, "lr": 4.50561965229053e-06, "epoch": 0.9972093023255814, "percentage": 33.28, "elapsed_time": "5:54:02", "remaining_time": "11:49:39"}
68
  {"current_steps": 671, "total_steps": 2013, "eval_loss": 0.058389123529195786, "epoch": 0.9986976744186047, "percentage": 33.33, "elapsed_time": "6:02:30", "remaining_time": "12:05:01"}
69
+ {"current_steps": 680, "total_steps": 2013, "loss": 0.4052, "lr": 4.479486792650245e-06, "epoch": 1.0130232558139536, "percentage": 33.78, "elapsed_time": "6:08:14", "remaining_time": "12:01:52"}
70
+ {"current_steps": 690, "total_steps": 2013, "loss": 0.3906, "lr": 4.452770884385747e-06, "epoch": 1.027906976744186, "percentage": 34.28, "elapsed_time": "6:13:29", "remaining_time": "11:56:07"}
71
+ {"current_steps": 700, "total_steps": 2013, "loss": 0.393, "lr": 4.4254809340263875e-06, "epoch": 1.0427906976744186, "percentage": 34.77, "elapsed_time": "6:18:45", "remaining_time": "11:50:25"}
72
+ {"current_steps": 710, "total_steps": 2013, "loss": 0.3946, "lr": 4.397626141623928e-06, "epoch": 1.0576744186046512, "percentage": 35.27, "elapsed_time": "6:24:00", "remaining_time": "11:44:45"}
73
+ {"current_steps": 720, "total_steps": 2013, "loss": 0.3944, "lr": 4.369215897650995e-06, "epoch": 1.0725581395348838, "percentage": 35.77, "elapsed_time": "6:29:15", "remaining_time": "11:39:03"}
74
+ {"current_steps": 730, "total_steps": 2013, "loss": 0.3921, "lr": 4.340259779835348e-06, "epoch": 1.0874418604651164, "percentage": 36.26, "elapsed_time": "6:34:29", "remaining_time": "11:33:19"}
75
+ {"current_steps": 740, "total_steps": 2013, "loss": 0.3977, "lr": 4.310767549931009e-06, "epoch": 1.1023255813953488, "percentage": 36.76, "elapsed_time": "6:39:43", "remaining_time": "11:27:37"}
76
+ {"current_steps": 750, "total_steps": 2013, "loss": 0.394, "lr": 4.28074915042736e-06, "epoch": 1.1172093023255814, "percentage": 37.26, "elapsed_time": "6:44:56", "remaining_time": "11:21:55"}
77
+ {"current_steps": 760, "total_steps": 2013, "loss": 0.3972, "lr": 4.250214701197305e-06, "epoch": 1.132093023255814, "percentage": 37.75, "elapsed_time": "6:50:12", "remaining_time": "11:16:17"}
78
+ {"current_steps": 770, "total_steps": 2013, "loss": 0.3983, "lr": 4.219174496085636e-06, "epoch": 1.1469767441860466, "percentage": 38.25, "elapsed_time": "6:55:25", "remaining_time": "11:10:37"}
79
+ {"current_steps": 780, "total_steps": 2013, "loss": 0.3976, "lr": 4.18763899943875e-06, "epoch": 1.1618604651162792, "percentage": 38.75, "elapsed_time": "7:00:41", "remaining_time": "11:05:00"}
80
+ {"current_steps": 790, "total_steps": 2013, "loss": 0.3946, "lr": 4.155618842576882e-06, "epoch": 1.1767441860465115, "percentage": 39.24, "elapsed_time": "7:05:58", "remaining_time": "10:59:26"}
81
+ {"current_steps": 800, "total_steps": 2013, "loss": 0.395, "lr": 4.1231248202100474e-06, "epoch": 1.1916279069767441, "percentage": 39.74, "elapsed_time": "7:11:13", "remaining_time": "10:53:51"}
82
+ {"current_steps": 810, "total_steps": 2013, "loss": 0.3991, "lr": 4.09016788679891e-06, "epoch": 1.2065116279069767, "percentage": 40.24, "elapsed_time": "7:16:28", "remaining_time": "10:48:15"}
83
+ {"current_steps": 820, "total_steps": 2013, "loss": 0.4013, "lr": 4.056759152861782e-06, "epoch": 1.2213953488372093, "percentage": 40.74, "elapsed_time": "7:21:43", "remaining_time": "10:42:39"}
84
+ {"current_steps": 830, "total_steps": 2013, "loss": 0.3951, "lr": 4.022909881229028e-06, "epoch": 1.236279069767442, "percentage": 41.23, "elapsed_time": "7:26:59", "remaining_time": "10:37:05"}
85
+ {"current_steps": 840, "total_steps": 2013, "loss": 0.3981, "lr": 3.988631483246104e-06, "epoch": 1.2511627906976743, "percentage": 41.73, "elapsed_time": "7:32:15", "remaining_time": "10:31:32"}
86
+ {"current_steps": 850, "total_steps": 2013, "loss": 0.3962, "lr": 3.953935514926546e-06, "epoch": 1.266046511627907, "percentage": 42.23, "elapsed_time": "7:37:30", "remaining_time": "10:25:59"}
87
+ {"current_steps": 860, "total_steps": 2013, "loss": 0.3951, "lr": 3.9188336730561745e-06, "epoch": 1.2809302325581395, "percentage": 42.72, "elapsed_time": "7:42:47", "remaining_time": "10:20:27"}
88
+ {"current_steps": 870, "total_steps": 2013, "loss": 0.3934, "lr": 3.8833377912498494e-06, "epoch": 1.2958139534883721, "percentage": 43.22, "elapsed_time": "7:48:03", "remaining_time": "10:14:55"}
89
+ {"current_steps": 880, "total_steps": 2013, "loss": 0.3959, "lr": 3.847459835962095e-06, "epoch": 1.3106976744186047, "percentage": 43.72, "elapsed_time": "7:53:17", "remaining_time": "10:09:21"}
90
+ {"current_steps": 890, "total_steps": 2013, "loss": 0.3926, "lr": 3.8112119024529367e-06, "epoch": 1.3255813953488373, "percentage": 44.21, "elapsed_time": "7:58:33", "remaining_time": "10:03:51"}
91
+ {"current_steps": 900, "total_steps": 2013, "loss": 0.3966, "lr": 3.774606210710323e-06, "epoch": 1.3404651162790697, "percentage": 44.71, "elapsed_time": "8:03:50", "remaining_time": "9:58:21"}
92
+ {"current_steps": 910, "total_steps": 2013, "loss": 0.3963, "lr": 3.737655101330493e-06, "epoch": 1.3553488372093023, "percentage": 45.21, "elapsed_time": "8:09:07", "remaining_time": "9:52:51"}
93
+ {"current_steps": 920, "total_steps": 2013, "loss": 0.3982, "lr": 3.700371031357687e-06, "epoch": 1.370232558139535, "percentage": 45.7, "elapsed_time": "8:14:23", "remaining_time": "9:47:21"}
94
+ {"current_steps": 930, "total_steps": 2013, "loss": 0.3933, "lr": 3.6627665700845944e-06, "epoch": 1.3851162790697673, "percentage": 46.2, "elapsed_time": "8:19:39", "remaining_time": "9:41:51"}
95
+ {"current_steps": 940, "total_steps": 2013, "loss": 0.3983, "lr": 3.624854394814972e-06, "epoch": 1.4, "percentage": 46.7, "elapsed_time": "8:24:56", "remaining_time": "9:36:22"}
96
+ {"current_steps": 950, "total_steps": 2013, "loss": 0.3947, "lr": 3.5866472865898326e-06, "epoch": 1.4148837209302325, "percentage": 47.19, "elapsed_time": "8:30:13", "remaining_time": "9:30:54"}
97
+ {"current_steps": 960, "total_steps": 2013, "loss": 0.3923, "lr": 3.5481581258786783e-06, "epoch": 1.4297674418604651, "percentage": 47.69, "elapsed_time": "8:35:30", "remaining_time": "9:25:26"}
98
+ {"current_steps": 970, "total_steps": 2013, "loss": 0.3888, "lr": 3.5093998882372038e-06, "epoch": 1.4446511627906977, "percentage": 48.19, "elapsed_time": "8:40:46", "remaining_time": "9:19:57"}
99
+ {"current_steps": 980, "total_steps": 2013, "loss": 0.3949, "lr": 3.470385639932944e-06, "epoch": 1.4595348837209303, "percentage": 48.68, "elapsed_time": "8:46:01", "remaining_time": "9:14:28"}
100
+ {"current_steps": 990, "total_steps": 2013, "loss": 0.3928, "lr": 3.4311285335403495e-06, "epoch": 1.474418604651163, "percentage": 49.18, "elapsed_time": "8:51:19", "remaining_time": "9:09:02"}
101
+ {"current_steps": 1000, "total_steps": 2013, "loss": 0.3912, "lr": 3.3916418035067527e-06, "epoch": 1.4893023255813953, "percentage": 49.68, "elapsed_time": "8:56:33", "remaining_time": "9:03:32"}
102
+ {"current_steps": 1010, "total_steps": 2013, "loss": 0.3938, "lr": 3.3519387616907477e-06, "epoch": 1.504186046511628, "percentage": 50.17, "elapsed_time": "9:01:49", "remaining_time": "8:58:04"}
103
+ {"current_steps": 1020, "total_steps": 2013, "loss": 0.3911, "lr": 3.312032792874458e-06, "epoch": 1.5190697674418605, "percentage": 50.67, "elapsed_time": "9:07:05", "remaining_time": "8:52:36"}
104
+ {"current_steps": 1030, "total_steps": 2013, "loss": 0.3913, "lr": 3.2719373502512326e-06, "epoch": 1.5339534883720929, "percentage": 51.17, "elapsed_time": "9:12:19", "remaining_time": "8:47:07"}
105
+ {"current_steps": 1040, "total_steps": 2013, "loss": 0.3884, "lr": 3.231665950890274e-06, "epoch": 1.5488372093023255, "percentage": 51.66, "elapsed_time": "9:17:35", "remaining_time": "8:41:39"}
106
+ {"current_steps": 1050, "total_steps": 2013, "loss": 0.3897, "lr": 3.1912321711797285e-06, "epoch": 1.563720930232558, "percentage": 52.16, "elapsed_time": "9:22:51", "remaining_time": "8:36:13"}
107
+ {"current_steps": 1060, "total_steps": 2013, "loss": 0.3926, "lr": 3.1506496422497856e-06, "epoch": 1.5786046511627907, "percentage": 52.66, "elapsed_time": "9:28:08", "remaining_time": "8:30:47"}
108
+ {"current_steps": 1070, "total_steps": 2013, "loss": 0.3918, "lr": 3.1099320453773195e-06, "epoch": 1.5934883720930233, "percentage": 53.15, "elapsed_time": "9:33:25", "remaining_time": "8:25:21"}
109
+ {"current_steps": 1080, "total_steps": 2013, "loss": 0.3849, "lr": 3.0690931073736233e-06, "epoch": 1.608372093023256, "percentage": 53.65, "elapsed_time": "9:38:44", "remaining_time": "8:19:57"}
110
+ {"current_steps": 1090, "total_steps": 2013, "loss": 0.3926, "lr": 3.0281465959567974e-06, "epoch": 1.6232558139534885, "percentage": 54.15, "elapsed_time": "9:44:02", "remaining_time": "8:14:33"}
111
+ {"current_steps": 1100, "total_steps": 2013, "loss": 0.3912, "lr": 2.9871063151103395e-06, "epoch": 1.6381395348837209, "percentage": 54.64, "elapsed_time": "9:49:20", "remaining_time": "8:09:09"}
112
+ {"current_steps": 1110, "total_steps": 2013, "loss": 0.3883, "lr": 2.945986100429519e-06, "epoch": 1.6530232558139535, "percentage": 55.14, "elapsed_time": "9:54:37", "remaining_time": "8:03:44"}
113
+ {"current_steps": 1120, "total_steps": 2013, "loss": 0.3852, "lr": 2.9047998144570836e-06, "epoch": 1.667906976744186, "percentage": 55.64, "elapsed_time": "9:59:53", "remaining_time": "7:58:18"}
114
+ {"current_steps": 1130, "total_steps": 2013, "loss": 0.389, "lr": 2.8635613420098922e-06, "epoch": 1.6827906976744185, "percentage": 56.14, "elapsed_time": "10:05:09", "remaining_time": "7:52:52"}
115
+ {"current_steps": 1140, "total_steps": 2013, "loss": 0.387, "lr": 2.8222845854980257e-06, "epoch": 1.697674418604651, "percentage": 56.63, "elapsed_time": "10:10:24", "remaining_time": "7:47:26"}
116
+ {"current_steps": 1150, "total_steps": 2013, "loss": 0.3883, "lr": 2.7809834602379822e-06, "epoch": 1.7125581395348837, "percentage": 57.13, "elapsed_time": "10:15:41", "remaining_time": "7:42:02"}
117
+ {"current_steps": 1160, "total_steps": 2013, "loss": 0.3889, "lr": 2.739671889761507e-06, "epoch": 1.7274418604651163, "percentage": 57.63, "elapsed_time": "10:20:58", "remaining_time": "7:36:37"}
118
+ {"current_steps": 1170, "total_steps": 2013, "loss": 0.3824, "lr": 2.698363801121661e-06, "epoch": 1.7423255813953489, "percentage": 58.12, "elapsed_time": "10:26:15", "remaining_time": "7:31:13"}
119
+ {"current_steps": 1180, "total_steps": 2013, "loss": 0.3874, "lr": 2.657073120197702e-06, "epoch": 1.7572093023255815, "percentage": 58.62, "elapsed_time": "10:31:32", "remaining_time": "7:25:49"}
120
+ {"current_steps": 1190, "total_steps": 2013, "loss": 0.383, "lr": 2.6158137670003563e-06, "epoch": 1.772093023255814, "percentage": 59.12, "elapsed_time": "10:36:48", "remaining_time": "7:20:24"}
121
+ {"current_steps": 1200, "total_steps": 2013, "loss": 0.382, "lr": 2.574599650979073e-06, "epoch": 1.7869767441860465, "percentage": 59.61, "elapsed_time": "10:42:04", "remaining_time": "7:15:00"}
122
+ {"current_steps": 1210, "total_steps": 2013, "loss": 0.3847, "lr": 2.5334446663328414e-06, "epoch": 1.801860465116279, "percentage": 60.11, "elapsed_time": "10:47:20", "remaining_time": "7:09:36"}
123
+ {"current_steps": 1220, "total_steps": 2013, "loss": 0.3836, "lr": 2.492362687326143e-06, "epoch": 1.8167441860465117, "percentage": 60.61, "elapsed_time": "10:52:37", "remaining_time": "7:04:12"}
124
+ {"current_steps": 1230, "total_steps": 2013, "loss": 0.3848, "lr": 2.4513675636116257e-06, "epoch": 1.831627906976744, "percentage": 61.1, "elapsed_time": "10:57:53", "remaining_time": "6:58:48"}
125
+ {"current_steps": 1240, "total_steps": 2013, "loss": 0.3796, "lr": 2.4104731155610806e-06, "epoch": 1.8465116279069766, "percentage": 61.6, "elapsed_time": "11:03:09", "remaining_time": "6:53:24"}
126
+ {"current_steps": 1250, "total_steps": 2013, "loss": 0.3824, "lr": 2.369693129606284e-06, "epoch": 1.8613953488372093, "percentage": 62.1, "elapsed_time": "11:08:26", "remaining_time": "6:48:00"}
127
+ {"current_steps": 1260, "total_steps": 2013, "loss": 0.3871, "lr": 2.329041353591282e-06, "epoch": 1.8762790697674419, "percentage": 62.59, "elapsed_time": "11:13:42", "remaining_time": "6:42:37"}
128
+ {"current_steps": 1270, "total_steps": 2013, "loss": 0.3805, "lr": 2.288531492137687e-06, "epoch": 1.8911627906976745, "percentage": 63.09, "elapsed_time": "11:18:59", "remaining_time": "6:37:14"}
129
+ {"current_steps": 1280, "total_steps": 2013, "loss": 0.382, "lr": 2.248177202024544e-06, "epoch": 1.906046511627907, "percentage": 63.59, "elapsed_time": "11:24:15", "remaining_time": "6:31:50"}
130
+ {"current_steps": 1290, "total_steps": 2013, "loss": 0.3807, "lr": 2.207992087584323e-06, "epoch": 1.9209302325581397, "percentage": 64.08, "elapsed_time": "11:29:31", "remaining_time": "6:26:27"}
131
+ {"current_steps": 1300, "total_steps": 2013, "loss": 0.3849, "lr": 2.167989696116599e-06, "epoch": 1.935813953488372, "percentage": 64.58, "elapsed_time": "11:34:49", "remaining_time": "6:21:04"}
132
+ {"current_steps": 1310, "total_steps": 2013, "loss": 0.3823, "lr": 2.1281835133209493e-06, "epoch": 1.9506976744186046, "percentage": 65.08, "elapsed_time": "11:40:03", "remaining_time": "6:15:40"}
133
+ {"current_steps": 1320, "total_steps": 2013, "loss": 0.3811, "lr": 2.0885869587506267e-06, "epoch": 1.9655813953488372, "percentage": 65.57, "elapsed_time": "11:45:21", "remaining_time": "6:10:18"}
134
+ {"current_steps": 1330, "total_steps": 2013, "loss": 0.3784, "lr": 2.0492133812885277e-06, "epoch": 1.9804651162790696, "percentage": 66.07, "elapsed_time": "11:50:38", "remaining_time": "6:04:56"}
135
+ {"current_steps": 1340, "total_steps": 2013, "loss": 0.3816, "lr": 2.0100760546469864e-06, "epoch": 1.9953488372093022, "percentage": 66.57, "elapsed_time": "11:55:55", "remaining_time": "5:59:34"}
136
+ {"current_steps": 1342, "total_steps": 2013, "eval_loss": 0.05558985471725464, "epoch": 1.9983255813953489, "percentage": 66.67, "elapsed_time": "12:04:53", "remaining_time": "6:02:26"}