Commit
·
836ec68
1
Parent(s):
6bbc5c6
Training in progress, step 2500
Browse files- all_results.json +14 -0
- post-training eval_results.json +8 -0
- prediction_output.jsonl +1 -0
- pytorch_model.bin +1 -1
- train_results.json +8 -0
- trainer_state.json +382 -0
- training_args.bin +1 -1
all_results.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"eval_samples": 98,
|
4 |
+
"test_f1": 0.5503861750639598,
|
5 |
+
"test_loss": 1.6987024545669556,
|
6 |
+
"test_runtime": 4.9602,
|
7 |
+
"test_samples_per_second": 19.757,
|
8 |
+
"test_steps_per_second": 19.757,
|
9 |
+
"train_loss": 0.6695007115008145,
|
10 |
+
"train_runtime": 916.4977,
|
11 |
+
"train_samples": 702,
|
12 |
+
"train_samples_per_second": 3.83,
|
13 |
+
"train_steps_per_second": 3.83
|
14 |
+
}
|
post-training eval_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eval_samples": 98,
|
3 |
+
"test_f1": 0.5503861750639598,
|
4 |
+
"test_loss": 1.6987024545669556,
|
5 |
+
"test_runtime": 4.9602,
|
6 |
+
"test_samples_per_second": 19.757,
|
7 |
+
"test_steps_per_second": 19.757
|
8 |
+
}
|
prediction_output.jsonl
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pred": 10.019140243530273, "label": 0.0}, {"pred": 0.6532726287841797, "label": 5.0}, {"pred": -1.5070127248764038, "label": 2.0}, {"pred": 0.24536748230457306, "label": 4.0}, {"pred": -2.25246524810791, "label": 2.0}, {"pred": -2.66007924079895, "label": 8.0}, {"pred": -1.6002302169799805, "label": 8.0}, {"pred": -2.7276155948638916, "label": 2.0}, {"pred": -1.574631690979004, "label": 8.0}, {"pred": -0.33556482195854187, "label": 2.0}, {"pred": -3.09733247756958, "label": 2.0}, {"pred": 0.4764690697193146, "label": 2.0}, {"pred": 6.7093048095703125, "label": 5.0}, {"pred": -2.442629337310791, "label": 5.0}, {"pred": 2.1584315299987793, "label": 5.0}, {"pred": -1.3713687658309937, "label": 5.0}, {"pred": 0.45947328209877014, "label": 5.0}, {"pred": -3.8779995441436768, "label": 6.0}, {"pred": 0.47429022192955017, "label": 6.0}, {"pred": 1.6603984832763672, "label": 5.0}, {"pred": 8.309249877929688, "label": 8.0}, {"pred": 3.2501542568206787, "label": 8.0}, {"pred": -2.717172384262085, "label": 2.0}, {"pred": -0.39201176166534424, "label": 5.0}, {"pred": -4.595953464508057, "label": 5.0}, {"pred": -2.235063076019287, "label": 5.0}, {"pred": -2.44830584526062, "label": 2.0}, {"pred": -0.0069770533591508865, "label": 2.0}, {"pred": 0.969771146774292, "label": 2.0}, {"pred": 7.553024768829346, "label": 3.0}, {"pred": 2.6176557540893555, "label": 3.0}, {"pred": 0.691173255443573, "label": 5.0}, {"pred": 0.06238013133406639, "label": 0.0}, {"pred": -4.164968013763428, "label": 2.0}, {"pred": -3.2204692363739014, "label": 3.0}, {"pred": -3.3176817893981934, "label": 3.0}, {"pred": -2.7758054733276367, "label": 3.0}, {"pred": -0.1663847714662552, "label": 7.0}, {"pred": 9.606531143188477, "label": 8.0}, {"pred": -1.0946831703186035, "label": 7.0}, {"pred": -0.14866292476654053, "label": 7.0}, {"pred": 0.30310508608818054, "label": 2.0}, {"pred": -3.1653261184692383, "label": 2.0}, {"pred": -1.565669298171997, "label": 3.0}, {"pred": -0.18176725506782532, "label": 8.0}, {"pred": -1.1957494020462036, "label": 0.0}, {"pred": -1.6685035228729248, "label": 2.0}, {"pred": -0.043660327792167664, "label": 2.0}, {"pred": -1.7757004499435425, "label": 2.0}, {"pred": -1.4604750871658325, "label": 2.0}, {"pred": 1.0374609231948853, "label": 2.0}, {"pred": -1.8439505100250244, "label": 2.0}, {"pred": -0.7517821788787842, "label": 0.0}, {"pred": 8.289470672607422, "label": 7.0}, {"pred": -1.8440061807632446, "label": 7.0}, {"pred": -1.2976378202438354, "label": 7.0}, {"pred": -0.34481102228164673, "label": 7.0}, {"pred": -2.668887138366699, "label": 8.0}, {"pred": -1.2527223825454712, "label": 8.0}, {"pred": -0.075716033577919, "label": 8.0}, {"pred": -1.3003135919570923, "label": 3.0}, {"pred": -1.1259794235229492, "label": 6.0}, {"pred": 10.510120391845703, "label": 0.0}, {"pred": 2.0237245559692383, "label": 2.0}, {"pred": 1.3917936086654663, "label": 3.0}, {"pred": 6.7284111976623535, "label": 7.0}, {"pred": 4.734694957733154, "label": 0.0}, {"pred": -3.3061137199401855, "label": 5.0}, {"pred": -1.8624444007873535, "label": 2.0}, {"pred": -4.001344203948975, "label": 2.0}, {"pred": -1.8468009233474731, "label": 7.0}, {"pred": -3.2456326484680176, "label": 2.0}, {"pred": 0.39871928095817566, "label": 2.0}, {"pred": -2.057805299758911, "label": 2.0}, {"pred": 0.13218283653259277, "label": 8.0}, {"pred": -0.892180323600769, "label": 8.0}, {"pred": -1.2932769060134888, "label": 3.0}, {"pred": 0.1954576075077057, "label": 0.0}, {"pred": -2.229828357696533, "label": 0.0}, {"pred": -2.0269217491149902, "label": 2.0}, {"pred": 8.243239402770996, "label": 2.0}, {"pred": -2.59175968170166, "label": 2.0}, {"pred": 0.8506417870521545, "label": 8.0}, {"pred": 9.682441711425781, "label": 6.0}, {"pred": -1.4374347925186157, "label": 2.0}, {"pred": 0.009745392948389053, "label": 2.0}, {"pred": -0.4231429994106293, "label": 8.0}, {"pred": -2.4760589599609375, "label": 0.0}, {"pred": -1.4600414037704468, "label": 2.0}, {"pred": -0.09000376611948013, "label": 3.0}, {"pred": -3.15801739692688, "label": 3.0}, {"pred": -1.641605257987976, "label": 2.0}, {"pred": 1.6113959550857544, "label": 6.0}, {"pred": 1.35031259059906, "label": 0.0}, {"pred": 1.4748491048812866, "label": 0.0}, {"pred": 8.973405838012695, "label": 5.0}, {"pred": -0.44036799669265747, "label": 5.0}, {"pred": -2.8449594974517822, "label": 5.0}, {"pred": -2.715689182281494, "label": 5.0}, {"pred": -0.5715784430503845, "label": 5.0}, {"pred": -1.1643404960632324, "label": 2.0}, {"pred": 2.7366137504577637, "label": 2.0}, {"pred": 4.114213466644287, "label": 2.0}, {"pred": -1.6327688694000244, "label": 5.0}, {"pred": 3.206129312515259, "label": 4.0}, {"pred": 1.428622841835022, "label": 8.0}, {"pred": -1.9255108833312988, "label": 4.0}, {"pred": -5.721708297729492, "label": 4.0}, {"pred": -1.8454394340515137, "label": 4.0}, {"pred": -0.906534731388092, "label": 8.0}, {"pred": 0.5136074423789978, "label": 2.0}, {"pred": 0.31771302223205566, "label": 3.0}, {"pred": 6.170832633972168, "label": 2.0}, {"pred": 5.061820983886719, "label": 5.0}, {"pred": -2.271714448928833, "label": 5.0}, {"pred": -1.850223422050476, "label": 5.0}, {"pred": -3.501612901687622, "label": 0.0}, {"pred": -2.0070955753326416, "label": 6.0}, {"pred": -1.9781445264816284, "label": 5.0}, {"pred": -1.6112139225006104, "label": 5.0}, {"pred": -1.830712914466858, "label": 5.0}, {"pred": -0.9544073939323425, "label": 2.0}, {"pred": 3.8555877208709717, "label": 2.0}, {"pred": 4.870663166046143, "label": 2.0}, {"pred": -1.867636799812317, "label": 2.0}, {"pred": 1.9673343896865845, "label": 2.0}, {"pred": -0.6791107058525085, "label": 2.0}, {"pred": -1.7167493104934692, "label": 5.0}, {"pred": -2.843881368637085, "label": 5.0}, {"pred": -1.1429674625396729, "label": 6.0}, {"pred": 2.594322443008423, "label": 6.0}, {"pred": 7.284641265869141, "label": 0.0}, {"pred": 2.655223846435547, "label": 2.0}, {"pred": -2.549745798110962, "label": 2.0}, {"pred": -3.192248821258545, "label": 6.0}, {"pred": 0.08405379951000214, "label": 2.0}, {"pred": -3.116849184036255, "label": 5.0}, {"pred": -0.5237486958503723, "label": 5.0}, {"pred": 2.366180419921875, "label": 6.0}, {"pred": -1.2737070322036743, "label": 2.0}, {"pred": 2.989075183868408, "label": 5.0}, {"pred": 5.9830098152160645, "label": 2.0}, {"pred": -2.1102664470672607, "label": 8.0}, {"pred": -3.5376365184783936, "label": 2.0}, {"pred": -0.9231218099594116, "label": 2.0}, {"pred": -2.3496975898742676, "label": 5.0}, {"pred": -0.7576992511749268, "label": 8.0}, {"pred": 0.5094679594039917, "label": 5.0}, {"pred": -0.6562840342521667, "label": 5.0}, {"pred": 2.5642170906066895, "label": 5.0}, {"pred": 7.409799575805664, "label": 5.0}, {"pred": -2.4087905883789062, "label": 5.0}, {"pred": -2.6217246055603027, "label": 6.0}, {"pred": -0.6317837834358215, "label": 8.0}, {"pred": 0.762174665927887, "label": 6.0}, {"pred": 0.018339872360229492, "label": 7.0}, {"pred": -0.6353716254234314, "label": 6.0}, {"pred": -2.467911958694458, "label": 8.0}, {"pred": -1.4698598384857178, "label": 0.0}, {"pred": 6.675502777099609, "label": 2.0}, {"pred": -1.6566411256790161, "label": 6.0}, {"pred": -1.6938785314559937, "label": 5.0}, {"pred": 2.345128297805786, "label": 5.0}, {"pred": -0.5916361808776855, "label": 2.0}, {"pred": -3.186824083328247, "label": 2.0}, {"pred": -0.9120825529098511, "label": 8.0}, {"pred": -0.8187981247901917, "label": 8.0}, {"pred": -0.8101270198822021, "label": 0.0}, {"pred": 7.5780930519104, "label": 2.0}, {"pred": -2.928427219390869, "label": 0.0}, {"pred": -1.3859901428222656, "label": 7.0}, {"pred": 3.0276236534118652, "label": 2.0}, {"pred": -1.8224083185195923, "label": 2.0}, {"pred": -4.104769706726074, "label": 8.0}, {"pred": 0.03777565062046051, "label": 6.0}, {"pred": -0.014997448772192001, "label": 6.0}, {"pred": 1.0839900970458984, "label": 5.0}, {"pred": 6.335562229156494, "label": 5.0}, {"pred": -3.4330081939697266, "label": 5.0}, {"pred": -2.156536817550659, "label": 5.0}, {"pred": -1.3603081703186035, "label": 5.0}, {"pred": -1.5407929420471191, "label": 5.0}, {"pred": -0.07066483795642853, "label": 5.0}, {"pred": -1.6604976654052734, "label": 6.0}, {"pred": -1.440683364868164, "label": 8.0}, {"pred": 0.9588245153427124, "label": 5.0}, {"pred": -1.7472809553146362, "label": 5.0}, {"pred": -1.087075114250183, "label": 3.0}, {"pred": 8.69987678527832, "label": 3.0}, {"pred": -1.4591203927993774, "label": 6.0}, {"pred": -1.2280433177947998, "label": 2.0}, {"pred": -0.5524106621742249, "label": 3.0}, {"pred": -2.392406463623047, "label": 2.0}, {"pred": -1.3709814548492432, "label": 2.0}, {"pred": -0.1875690072774887, "label": 2.0}, {"pred": -1.5981941223144531, "label": 0.0}, {"pred": -1.1282113790512085, "label": 2.0}, {"pred": 10.509876251220703, "label": 5.0}, {"pred": -0.5587414503097534, "label": 5.0}, {"pred": -0.11731800436973572, "label": 5.0}, {"pred": 8.627680778503418, "label": 2.0}, {"pred": 4.010296821594238, "label": 6.0}, {"pred": -3.5236639976501465, "label": 3.0}, {"pred": -2.157024383544922, "label": 3.0}, {"pred": -1.5002130270004272, "label": 6.0}, {"pred": -1.053414225578308, "label": 3.0}, {"pred": -3.027621030807495, "label": 2.0}, {"pred": 3.2739715576171875, "label": 8.0}, {"pred": -2.5486247539520264, "label": 8.0}, {"pred": -3.9105663299560547, "label": 0.0}, {"pred": 0.9814618825912476, "label": 1.0}, {"pred": -0.46406108140945435, "label": 2.0}, {"pred": 2.880763053894043, "label": 3.0}, {"pred": 5.093158721923828, "label": 6.0}, {"pred": -3.3170182704925537, "label": 8.0}, {"pred": -2.14131498336792, "label": 7.0}, {"pred": -1.5651869773864746, "label": 0.0}, {"pred": 0.6249361038208008, "label": 8.0}, {"pred": 3.9404008388519287, "label": 2.0}, {"pred": 2.718013048171997, "label": 2.0}, {"pred": -2.9450290203094482, "label": 5.0}, {"pred": 6.749814033508301, "label": 5.0}, {"pred": -1.64478600025177, "label": 7.0}, {"pred": -4.171202659606934, "label": 2.0}, {"pred": -3.1550910472869873, "label": 2.0}, {"pred": -2.1020801067352295, "label": 8.0}, {"pred": -1.6797502040863037, "label": 5.0}, {"pred": 6.253066062927246, "label": 5.0}, {"pred": 4.325278282165527, "label": 2.0}, {"pred": -3.1421334743499756, "label": 6.0}, {"pred": 4.086713790893555, "label": 6.0}, {"pred": -1.0735156536102295, "label": 2.0}, {"pred": -3.3857691287994385, "label": 5.0}, {"pred": -2.6780574321746826, "label": 2.0}, {"pred": -2.3147029876708984, "label": 5.0}, {"pred": 0.46940743923187256, "label": 5.0}, {"pred": 9.495539665222168, "label": 8.0}, {"pred": -1.889843463897705, "label": 5.0}, {"pred": 0.36386972665786743, "label": 2.0}, {"pred": -0.4371775984764099, "label": 6.0}, {"pred": -2.8748319149017334, "label": 6.0}, {"pred": -1.1083565950393677, "label": 5.0}, {"pred": 0.13744854927062988, "label": 6.0}, {"pred": -1.4635956287384033, "label": 2.0}, {"pred": -0.5616008043289185, "label": 2.0}, {"pred": 8.173700332641602, "label": 5.0}, {"pred": 4.250307083129883, "label": 5.0}, {"pred": -2.4926605224609375, "label": 0.0}, {"pred": -0.31131792068481445, "label": 5.0}, {"pred": -3.5435919761657715, "label": 2.0}, {"pred": -0.29817143082618713, "label": 2.0}, {"pred": -2.634324073791504, "label": 2.0}, {"pred": -1.6608585119247437, "label": 5.0}, {"pred": -1.39112389087677, "label": 2.0}, {"pred": 7.657371997833252, "label": 6.0}, {"pred": 0.5764560699462891, "label": 6.0}, {"pred": -0.8287283182144165, "label": 6.0}, {"pred": 4.195872783660889, "label": 6.0}, {"pred": -0.7789970636367798, "label": 5.0}, {"pred": -2.123466730117798, "label": 5.0}, {"pred": -3.2755870819091797, "label": 5.0}, {"pred": -2.2995500564575195, "label": 5.0}, {"pred": -4.09137487411499, "label": 5.0}, {"pred": 3.905306577682495, "label": 2.0}, {"pred": 5.917882442474365, "label": 5.0}, {"pred": -2.399942636489868, "label": 8.0}, {"pred": 0.7929872274398804, "label": 8.0}, {"pred": -0.8256539106369019, "label": 0.0}, {"pred": 1.5062816143035889, "label": 3.0}, {"pred": -2.261814594268799, "label": 2.0}, {"pred": -0.10297468304634094, "label": 2.0}, {"pred": -0.8380897641181946, "label": 2.0}, {"pred": 1.7574100494384766, "label": 8.0}, {"pred": -0.9829455614089966, "label": 8.0}, {"pred": -1.2617555856704712, "label": 0.0}, {"pred": 6.876880168914795, "label": 2.0}, {"pred": 1.2606760263442993, "label": 0.0}, {"pred": -3.835230588912964, "label": 7.0}, {"pred": -1.5416922569274902, "label": 0.0}, {"pred": -3.7833173274993896, "label": 0.0}, {"pred": -1.4478707313537598, "label": 0.0}, {"pred": 3.486963987350464, "label": 8.0}, {"pred": 1.1780611276626587, "label": 8.0}, {"pred": -1.6854732036590576, "label": 0.0}, {"pred": 5.843297004699707, "label": 0.0}, {"pred": 3.6902544498443604, "label": 2.0}, {"pred": -2.237409830093384, "label": 2.0}, {"pred": -3.392303943634033, "label": 0.0}]
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 714922721
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb947af03d15814808e3e8d8715807fc9620fa797908c7f3fb658d3cd2b7cdc6
|
3 |
size 714922721
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"train_loss": 0.6695007115008145,
|
4 |
+
"train_runtime": 916.4977,
|
5 |
+
"train_samples": 702,
|
6 |
+
"train_samples_per_second": 3.83,
|
7 |
+
"train_steps_per_second": 3.83
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,382 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.0,
|
5 |
+
"global_step": 3510,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.14,
|
12 |
+
"eval_f1": 0.3722687284743791,
|
13 |
+
"eval_loss": 1.3361328840255737,
|
14 |
+
"eval_runtime": 3.2985,
|
15 |
+
"eval_samples_per_second": 29.71,
|
16 |
+
"eval_steps_per_second": 29.71,
|
17 |
+
"step": 100
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 0.28,
|
21 |
+
"eval_f1": 0.4344699929794222,
|
22 |
+
"eval_loss": 1.1836130619049072,
|
23 |
+
"eval_runtime": 3.274,
|
24 |
+
"eval_samples_per_second": 29.933,
|
25 |
+
"eval_steps_per_second": 29.933,
|
26 |
+
"step": 200
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 0.43,
|
30 |
+
"eval_f1": 0.3996935122704078,
|
31 |
+
"eval_loss": 1.1635534763336182,
|
32 |
+
"eval_runtime": 3.2772,
|
33 |
+
"eval_samples_per_second": 29.904,
|
34 |
+
"eval_steps_per_second": 29.904,
|
35 |
+
"step": 300
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 0.57,
|
39 |
+
"eval_f1": 0.5028054395723479,
|
40 |
+
"eval_loss": 1.353499174118042,
|
41 |
+
"eval_runtime": 3.2926,
|
42 |
+
"eval_samples_per_second": 29.764,
|
43 |
+
"eval_steps_per_second": 29.764,
|
44 |
+
"step": 400
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.71,
|
48 |
+
"learning_rate": 2.572649572649573e-05,
|
49 |
+
"loss": 1.2064,
|
50 |
+
"step": 500
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"epoch": 0.71,
|
54 |
+
"eval_f1": 0.47071964928887344,
|
55 |
+
"eval_loss": 1.2940737009048462,
|
56 |
+
"eval_runtime": 3.3744,
|
57 |
+
"eval_samples_per_second": 29.042,
|
58 |
+
"eval_steps_per_second": 29.042,
|
59 |
+
"step": 500
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 0.85,
|
63 |
+
"eval_f1": 0.49370416278560797,
|
64 |
+
"eval_loss": 1.2891041040420532,
|
65 |
+
"eval_runtime": 3.3094,
|
66 |
+
"eval_samples_per_second": 29.613,
|
67 |
+
"eval_steps_per_second": 29.613,
|
68 |
+
"step": 600
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"epoch": 1.0,
|
72 |
+
"eval_f1": 0.47736617527595926,
|
73 |
+
"eval_loss": 1.2047343254089355,
|
74 |
+
"eval_runtime": 3.3145,
|
75 |
+
"eval_samples_per_second": 29.567,
|
76 |
+
"eval_steps_per_second": 29.567,
|
77 |
+
"step": 700
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 1.14,
|
81 |
+
"eval_f1": 0.4943830125990876,
|
82 |
+
"eval_loss": 1.2190661430358887,
|
83 |
+
"eval_runtime": 3.3377,
|
84 |
+
"eval_samples_per_second": 29.362,
|
85 |
+
"eval_steps_per_second": 29.362,
|
86 |
+
"step": 800
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.28,
|
90 |
+
"eval_f1": 0.4777863203104454,
|
91 |
+
"eval_loss": 1.174961805343628,
|
92 |
+
"eval_runtime": 3.3391,
|
93 |
+
"eval_samples_per_second": 29.349,
|
94 |
+
"eval_steps_per_second": 29.349,
|
95 |
+
"step": 900
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"epoch": 1.42,
|
99 |
+
"learning_rate": 2.1452991452991456e-05,
|
100 |
+
"loss": 0.9207,
|
101 |
+
"step": 1000
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"epoch": 1.42,
|
105 |
+
"eval_f1": 0.49086129207075757,
|
106 |
+
"eval_loss": 1.3087153434753418,
|
107 |
+
"eval_runtime": 3.4003,
|
108 |
+
"eval_samples_per_second": 28.821,
|
109 |
+
"eval_steps_per_second": 28.821,
|
110 |
+
"step": 1000
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 1.57,
|
114 |
+
"eval_f1": 0.49757882395260544,
|
115 |
+
"eval_loss": 1.2435556650161743,
|
116 |
+
"eval_runtime": 3.4366,
|
117 |
+
"eval_samples_per_second": 28.517,
|
118 |
+
"eval_steps_per_second": 28.517,
|
119 |
+
"step": 1100
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 1.71,
|
123 |
+
"eval_f1": 0.503327058221218,
|
124 |
+
"eval_loss": 1.1465363502502441,
|
125 |
+
"eval_runtime": 3.2944,
|
126 |
+
"eval_samples_per_second": 29.748,
|
127 |
+
"eval_steps_per_second": 29.748,
|
128 |
+
"step": 1200
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.85,
|
132 |
+
"eval_f1": 0.5141589868888157,
|
133 |
+
"eval_loss": 1.113364577293396,
|
134 |
+
"eval_runtime": 3.3642,
|
135 |
+
"eval_samples_per_second": 29.131,
|
136 |
+
"eval_steps_per_second": 29.131,
|
137 |
+
"step": 1300
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"epoch": 1.99,
|
141 |
+
"eval_f1": 0.5383469405673188,
|
142 |
+
"eval_loss": 1.1939647197723389,
|
143 |
+
"eval_runtime": 3.3033,
|
144 |
+
"eval_samples_per_second": 29.668,
|
145 |
+
"eval_steps_per_second": 29.668,
|
146 |
+
"step": 1400
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"epoch": 2.14,
|
150 |
+
"learning_rate": 1.7179487179487178e-05,
|
151 |
+
"loss": 0.8149,
|
152 |
+
"step": 1500
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 2.14,
|
156 |
+
"eval_f1": 0.5291030100787034,
|
157 |
+
"eval_loss": 1.2552497386932373,
|
158 |
+
"eval_runtime": 3.7541,
|
159 |
+
"eval_samples_per_second": 26.105,
|
160 |
+
"eval_steps_per_second": 26.105,
|
161 |
+
"step": 1500
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 2.28,
|
165 |
+
"eval_f1": 0.5259736412492381,
|
166 |
+
"eval_loss": 1.3746747970581055,
|
167 |
+
"eval_runtime": 4.9995,
|
168 |
+
"eval_samples_per_second": 19.602,
|
169 |
+
"eval_steps_per_second": 19.602,
|
170 |
+
"step": 1600
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 2.42,
|
174 |
+
"eval_f1": 0.5329388682083431,
|
175 |
+
"eval_loss": 1.3680145740509033,
|
176 |
+
"eval_runtime": 5.1597,
|
177 |
+
"eval_samples_per_second": 18.993,
|
178 |
+
"eval_steps_per_second": 18.993,
|
179 |
+
"step": 1700
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"epoch": 2.56,
|
183 |
+
"eval_f1": 0.5189920532535025,
|
184 |
+
"eval_loss": 1.27865469455719,
|
185 |
+
"eval_runtime": 5.0223,
|
186 |
+
"eval_samples_per_second": 19.513,
|
187 |
+
"eval_steps_per_second": 19.513,
|
188 |
+
"step": 1800
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 2.71,
|
192 |
+
"eval_f1": 0.5409205239275264,
|
193 |
+
"eval_loss": 1.3888845443725586,
|
194 |
+
"eval_runtime": 3.3132,
|
195 |
+
"eval_samples_per_second": 29.579,
|
196 |
+
"eval_steps_per_second": 29.579,
|
197 |
+
"step": 1900
|
198 |
+
},
|
199 |
+
{
|
200 |
+
"epoch": 2.85,
|
201 |
+
"learning_rate": 1.2905982905982905e-05,
|
202 |
+
"loss": 0.6152,
|
203 |
+
"step": 2000
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"epoch": 2.85,
|
207 |
+
"eval_f1": 0.543504294934508,
|
208 |
+
"eval_loss": 1.3602004051208496,
|
209 |
+
"eval_runtime": 3.3336,
|
210 |
+
"eval_samples_per_second": 29.398,
|
211 |
+
"eval_steps_per_second": 29.398,
|
212 |
+
"step": 2000
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 2.99,
|
216 |
+
"eval_f1": 0.5467811408362643,
|
217 |
+
"eval_loss": 1.3174574375152588,
|
218 |
+
"eval_runtime": 5.0569,
|
219 |
+
"eval_samples_per_second": 19.379,
|
220 |
+
"eval_steps_per_second": 19.379,
|
221 |
+
"step": 2100
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"epoch": 3.13,
|
225 |
+
"eval_f1": 0.5365057187973831,
|
226 |
+
"eval_loss": 1.5886870622634888,
|
227 |
+
"eval_runtime": 4.5058,
|
228 |
+
"eval_samples_per_second": 21.75,
|
229 |
+
"eval_steps_per_second": 21.75,
|
230 |
+
"step": 2200
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"epoch": 3.28,
|
234 |
+
"eval_f1": 0.5563382534701277,
|
235 |
+
"eval_loss": 1.517231822013855,
|
236 |
+
"eval_runtime": 4.5269,
|
237 |
+
"eval_samples_per_second": 21.648,
|
238 |
+
"eval_steps_per_second": 21.648,
|
239 |
+
"step": 2300
|
240 |
+
},
|
241 |
+
{
|
242 |
+
"epoch": 3.42,
|
243 |
+
"eval_f1": 0.5661390498930103,
|
244 |
+
"eval_loss": 1.5470443964004517,
|
245 |
+
"eval_runtime": 3.6905,
|
246 |
+
"eval_samples_per_second": 26.555,
|
247 |
+
"eval_steps_per_second": 26.555,
|
248 |
+
"step": 2400
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"epoch": 3.56,
|
252 |
+
"learning_rate": 8.632478632478633e-06,
|
253 |
+
"loss": 0.4719,
|
254 |
+
"step": 2500
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 3.56,
|
258 |
+
"eval_f1": 0.521216772952552,
|
259 |
+
"eval_loss": 1.4928430318832397,
|
260 |
+
"eval_runtime": 3.3155,
|
261 |
+
"eval_samples_per_second": 29.558,
|
262 |
+
"eval_steps_per_second": 29.558,
|
263 |
+
"step": 2500
|
264 |
+
},
|
265 |
+
{
|
266 |
+
"epoch": 3.7,
|
267 |
+
"eval_f1": 0.5356457612585566,
|
268 |
+
"eval_loss": 1.6497721672058105,
|
269 |
+
"eval_runtime": 4.8518,
|
270 |
+
"eval_samples_per_second": 20.199,
|
271 |
+
"eval_steps_per_second": 20.199,
|
272 |
+
"step": 2600
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"epoch": 3.85,
|
276 |
+
"eval_f1": 0.5596834952223371,
|
277 |
+
"eval_loss": 1.4976708889007568,
|
278 |
+
"eval_runtime": 4.6972,
|
279 |
+
"eval_samples_per_second": 20.863,
|
280 |
+
"eval_steps_per_second": 20.863,
|
281 |
+
"step": 2700
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"epoch": 3.99,
|
285 |
+
"eval_f1": 0.5470066167039311,
|
286 |
+
"eval_loss": 1.471981167793274,
|
287 |
+
"eval_runtime": 4.5019,
|
288 |
+
"eval_samples_per_second": 21.769,
|
289 |
+
"eval_steps_per_second": 21.769,
|
290 |
+
"step": 2800
|
291 |
+
},
|
292 |
+
{
|
293 |
+
"epoch": 4.13,
|
294 |
+
"eval_f1": 0.5492964393504802,
|
295 |
+
"eval_loss": 1.5796676874160767,
|
296 |
+
"eval_runtime": 4.7041,
|
297 |
+
"eval_samples_per_second": 20.833,
|
298 |
+
"eval_steps_per_second": 20.833,
|
299 |
+
"step": 2900
|
300 |
+
},
|
301 |
+
{
|
302 |
+
"epoch": 4.27,
|
303 |
+
"learning_rate": 4.358974358974359e-06,
|
304 |
+
"loss": 0.372,
|
305 |
+
"step": 3000
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"epoch": 4.27,
|
309 |
+
"eval_f1": 0.5445354826532323,
|
310 |
+
"eval_loss": 1.6874395608901978,
|
311 |
+
"eval_runtime": 3.9793,
|
312 |
+
"eval_samples_per_second": 24.628,
|
313 |
+
"eval_steps_per_second": 24.628,
|
314 |
+
"step": 3000
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"epoch": 4.42,
|
318 |
+
"eval_f1": 0.5544723066439012,
|
319 |
+
"eval_loss": 1.6702477931976318,
|
320 |
+
"eval_runtime": 4.7443,
|
321 |
+
"eval_samples_per_second": 20.656,
|
322 |
+
"eval_steps_per_second": 20.656,
|
323 |
+
"step": 3100
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"epoch": 4.56,
|
327 |
+
"eval_f1": 0.5469058666319371,
|
328 |
+
"eval_loss": 1.7671833038330078,
|
329 |
+
"eval_runtime": 4.6665,
|
330 |
+
"eval_samples_per_second": 21.001,
|
331 |
+
"eval_steps_per_second": 21.001,
|
332 |
+
"step": 3200
|
333 |
+
},
|
334 |
+
{
|
335 |
+
"epoch": 4.7,
|
336 |
+
"eval_f1": 0.5485370297299399,
|
337 |
+
"eval_loss": 1.7351080179214478,
|
338 |
+
"eval_runtime": 4.8851,
|
339 |
+
"eval_samples_per_second": 20.061,
|
340 |
+
"eval_steps_per_second": 20.061,
|
341 |
+
"step": 3300
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"epoch": 4.84,
|
345 |
+
"eval_f1": 0.5497797755164764,
|
346 |
+
"eval_loss": 1.7282612323760986,
|
347 |
+
"eval_runtime": 5.1791,
|
348 |
+
"eval_samples_per_second": 18.922,
|
349 |
+
"eval_steps_per_second": 18.922,
|
350 |
+
"step": 3400
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"epoch": 4.99,
|
354 |
+
"learning_rate": 8.547008547008547e-08,
|
355 |
+
"loss": 0.2944,
|
356 |
+
"step": 3500
|
357 |
+
},
|
358 |
+
{
|
359 |
+
"epoch": 4.99,
|
360 |
+
"eval_f1": 0.5503861750639598,
|
361 |
+
"eval_loss": 1.698703408241272,
|
362 |
+
"eval_runtime": 5.6701,
|
363 |
+
"eval_samples_per_second": 17.284,
|
364 |
+
"eval_steps_per_second": 17.284,
|
365 |
+
"step": 3500
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"epoch": 5.0,
|
369 |
+
"step": 3510,
|
370 |
+
"total_flos": 2890172619430200.0,
|
371 |
+
"train_loss": 0.6695007115008145,
|
372 |
+
"train_runtime": 916.4977,
|
373 |
+
"train_samples_per_second": 3.83,
|
374 |
+
"train_steps_per_second": 3.83
|
375 |
+
}
|
376 |
+
],
|
377 |
+
"max_steps": 3510,
|
378 |
+
"num_train_epochs": 5,
|
379 |
+
"total_flos": 2890172619430200.0,
|
380 |
+
"trial_name": null,
|
381 |
+
"trial_params": null
|
382 |
+
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3899
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbdd39e5dadc71c4520b7ee1b5c990a33eee4d7fdad887960a5ecdb44855941e
|
3 |
size 3899
|