alex2awesome commited on
Commit
836ec68
·
1 Parent(s): 6bbc5c6

Training in progress, step 2500

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_samples": 98,
4
+ "test_f1": 0.5503861750639598,
5
+ "test_loss": 1.6987024545669556,
6
+ "test_runtime": 4.9602,
7
+ "test_samples_per_second": 19.757,
8
+ "test_steps_per_second": 19.757,
9
+ "train_loss": 0.6695007115008145,
10
+ "train_runtime": 916.4977,
11
+ "train_samples": 702,
12
+ "train_samples_per_second": 3.83,
13
+ "train_steps_per_second": 3.83
14
+ }
post-training eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_samples": 98,
3
+ "test_f1": 0.5503861750639598,
4
+ "test_loss": 1.6987024545669556,
5
+ "test_runtime": 4.9602,
6
+ "test_samples_per_second": 19.757,
7
+ "test_steps_per_second": 19.757
8
+ }
prediction_output.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pred": 10.019140243530273, "label": 0.0}, {"pred": 0.6532726287841797, "label": 5.0}, {"pred": -1.5070127248764038, "label": 2.0}, {"pred": 0.24536748230457306, "label": 4.0}, {"pred": -2.25246524810791, "label": 2.0}, {"pred": -2.66007924079895, "label": 8.0}, {"pred": -1.6002302169799805, "label": 8.0}, {"pred": -2.7276155948638916, "label": 2.0}, {"pred": -1.574631690979004, "label": 8.0}, {"pred": -0.33556482195854187, "label": 2.0}, {"pred": -3.09733247756958, "label": 2.0}, {"pred": 0.4764690697193146, "label": 2.0}, {"pred": 6.7093048095703125, "label": 5.0}, {"pred": -2.442629337310791, "label": 5.0}, {"pred": 2.1584315299987793, "label": 5.0}, {"pred": -1.3713687658309937, "label": 5.0}, {"pred": 0.45947328209877014, "label": 5.0}, {"pred": -3.8779995441436768, "label": 6.0}, {"pred": 0.47429022192955017, "label": 6.0}, {"pred": 1.6603984832763672, "label": 5.0}, {"pred": 8.309249877929688, "label": 8.0}, {"pred": 3.2501542568206787, "label": 8.0}, {"pred": -2.717172384262085, "label": 2.0}, {"pred": -0.39201176166534424, "label": 5.0}, {"pred": -4.595953464508057, "label": 5.0}, {"pred": -2.235063076019287, "label": 5.0}, {"pred": -2.44830584526062, "label": 2.0}, {"pred": -0.0069770533591508865, "label": 2.0}, {"pred": 0.969771146774292, "label": 2.0}, {"pred": 7.553024768829346, "label": 3.0}, {"pred": 2.6176557540893555, "label": 3.0}, {"pred": 0.691173255443573, "label": 5.0}, {"pred": 0.06238013133406639, "label": 0.0}, {"pred": -4.164968013763428, "label": 2.0}, {"pred": -3.2204692363739014, "label": 3.0}, {"pred": -3.3176817893981934, "label": 3.0}, {"pred": -2.7758054733276367, "label": 3.0}, {"pred": -0.1663847714662552, "label": 7.0}, {"pred": 9.606531143188477, "label": 8.0}, {"pred": -1.0946831703186035, "label": 7.0}, {"pred": -0.14866292476654053, "label": 7.0}, {"pred": 0.30310508608818054, "label": 2.0}, {"pred": -3.1653261184692383, "label": 2.0}, {"pred": -1.565669298171997, "label": 3.0}, {"pred": -0.18176725506782532, "label": 8.0}, {"pred": -1.1957494020462036, "label": 0.0}, {"pred": -1.6685035228729248, "label": 2.0}, {"pred": -0.043660327792167664, "label": 2.0}, {"pred": -1.7757004499435425, "label": 2.0}, {"pred": -1.4604750871658325, "label": 2.0}, {"pred": 1.0374609231948853, "label": 2.0}, {"pred": -1.8439505100250244, "label": 2.0}, {"pred": -0.7517821788787842, "label": 0.0}, {"pred": 8.289470672607422, "label": 7.0}, {"pred": -1.8440061807632446, "label": 7.0}, {"pred": -1.2976378202438354, "label": 7.0}, {"pred": -0.34481102228164673, "label": 7.0}, {"pred": -2.668887138366699, "label": 8.0}, {"pred": -1.2527223825454712, "label": 8.0}, {"pred": -0.075716033577919, "label": 8.0}, {"pred": -1.3003135919570923, "label": 3.0}, {"pred": -1.1259794235229492, "label": 6.0}, {"pred": 10.510120391845703, "label": 0.0}, {"pred": 2.0237245559692383, "label": 2.0}, {"pred": 1.3917936086654663, "label": 3.0}, {"pred": 6.7284111976623535, "label": 7.0}, {"pred": 4.734694957733154, "label": 0.0}, {"pred": -3.3061137199401855, "label": 5.0}, {"pred": -1.8624444007873535, "label": 2.0}, {"pred": -4.001344203948975, "label": 2.0}, {"pred": -1.8468009233474731, "label": 7.0}, {"pred": -3.2456326484680176, "label": 2.0}, {"pred": 0.39871928095817566, "label": 2.0}, {"pred": -2.057805299758911, "label": 2.0}, {"pred": 0.13218283653259277, "label": 8.0}, {"pred": -0.892180323600769, "label": 8.0}, {"pred": -1.2932769060134888, "label": 3.0}, {"pred": 0.1954576075077057, "label": 0.0}, {"pred": -2.229828357696533, "label": 0.0}, {"pred": -2.0269217491149902, "label": 2.0}, {"pred": 8.243239402770996, "label": 2.0}, {"pred": -2.59175968170166, "label": 2.0}, {"pred": 0.8506417870521545, "label": 8.0}, {"pred": 9.682441711425781, "label": 6.0}, {"pred": -1.4374347925186157, "label": 2.0}, {"pred": 0.009745392948389053, "label": 2.0}, {"pred": -0.4231429994106293, "label": 8.0}, {"pred": -2.4760589599609375, "label": 0.0}, {"pred": -1.4600414037704468, "label": 2.0}, {"pred": -0.09000376611948013, "label": 3.0}, {"pred": -3.15801739692688, "label": 3.0}, {"pred": -1.641605257987976, "label": 2.0}, {"pred": 1.6113959550857544, "label": 6.0}, {"pred": 1.35031259059906, "label": 0.0}, {"pred": 1.4748491048812866, "label": 0.0}, {"pred": 8.973405838012695, "label": 5.0}, {"pred": -0.44036799669265747, "label": 5.0}, {"pred": -2.8449594974517822, "label": 5.0}, {"pred": -2.715689182281494, "label": 5.0}, {"pred": -0.5715784430503845, "label": 5.0}, {"pred": -1.1643404960632324, "label": 2.0}, {"pred": 2.7366137504577637, "label": 2.0}, {"pred": 4.114213466644287, "label": 2.0}, {"pred": -1.6327688694000244, "label": 5.0}, {"pred": 3.206129312515259, "label": 4.0}, {"pred": 1.428622841835022, "label": 8.0}, {"pred": -1.9255108833312988, "label": 4.0}, {"pred": -5.721708297729492, "label": 4.0}, {"pred": -1.8454394340515137, "label": 4.0}, {"pred": -0.906534731388092, "label": 8.0}, {"pred": 0.5136074423789978, "label": 2.0}, {"pred": 0.31771302223205566, "label": 3.0}, {"pred": 6.170832633972168, "label": 2.0}, {"pred": 5.061820983886719, "label": 5.0}, {"pred": -2.271714448928833, "label": 5.0}, {"pred": -1.850223422050476, "label": 5.0}, {"pred": -3.501612901687622, "label": 0.0}, {"pred": -2.0070955753326416, "label": 6.0}, {"pred": -1.9781445264816284, "label": 5.0}, {"pred": -1.6112139225006104, "label": 5.0}, {"pred": -1.830712914466858, "label": 5.0}, {"pred": -0.9544073939323425, "label": 2.0}, {"pred": 3.8555877208709717, "label": 2.0}, {"pred": 4.870663166046143, "label": 2.0}, {"pred": -1.867636799812317, "label": 2.0}, {"pred": 1.9673343896865845, "label": 2.0}, {"pred": -0.6791107058525085, "label": 2.0}, {"pred": -1.7167493104934692, "label": 5.0}, {"pred": -2.843881368637085, "label": 5.0}, {"pred": -1.1429674625396729, "label": 6.0}, {"pred": 2.594322443008423, "label": 6.0}, {"pred": 7.284641265869141, "label": 0.0}, {"pred": 2.655223846435547, "label": 2.0}, {"pred": -2.549745798110962, "label": 2.0}, {"pred": -3.192248821258545, "label": 6.0}, {"pred": 0.08405379951000214, "label": 2.0}, {"pred": -3.116849184036255, "label": 5.0}, {"pred": -0.5237486958503723, "label": 5.0}, {"pred": 2.366180419921875, "label": 6.0}, {"pred": -1.2737070322036743, "label": 2.0}, {"pred": 2.989075183868408, "label": 5.0}, {"pred": 5.9830098152160645, "label": 2.0}, {"pred": -2.1102664470672607, "label": 8.0}, {"pred": -3.5376365184783936, "label": 2.0}, {"pred": -0.9231218099594116, "label": 2.0}, {"pred": -2.3496975898742676, "label": 5.0}, {"pred": -0.7576992511749268, "label": 8.0}, {"pred": 0.5094679594039917, "label": 5.0}, {"pred": -0.6562840342521667, "label": 5.0}, {"pred": 2.5642170906066895, "label": 5.0}, {"pred": 7.409799575805664, "label": 5.0}, {"pred": -2.4087905883789062, "label": 5.0}, {"pred": -2.6217246055603027, "label": 6.0}, {"pred": -0.6317837834358215, "label": 8.0}, {"pred": 0.762174665927887, "label": 6.0}, {"pred": 0.018339872360229492, "label": 7.0}, {"pred": -0.6353716254234314, "label": 6.0}, {"pred": -2.467911958694458, "label": 8.0}, {"pred": -1.4698598384857178, "label": 0.0}, {"pred": 6.675502777099609, "label": 2.0}, {"pred": -1.6566411256790161, "label": 6.0}, {"pred": -1.6938785314559937, "label": 5.0}, {"pred": 2.345128297805786, "label": 5.0}, {"pred": -0.5916361808776855, "label": 2.0}, {"pred": -3.186824083328247, "label": 2.0}, {"pred": -0.9120825529098511, "label": 8.0}, {"pred": -0.8187981247901917, "label": 8.0}, {"pred": -0.8101270198822021, "label": 0.0}, {"pred": 7.5780930519104, "label": 2.0}, {"pred": -2.928427219390869, "label": 0.0}, {"pred": -1.3859901428222656, "label": 7.0}, {"pred": 3.0276236534118652, "label": 2.0}, {"pred": -1.8224083185195923, "label": 2.0}, {"pred": -4.104769706726074, "label": 8.0}, {"pred": 0.03777565062046051, "label": 6.0}, {"pred": -0.014997448772192001, "label": 6.0}, {"pred": 1.0839900970458984, "label": 5.0}, {"pred": 6.335562229156494, "label": 5.0}, {"pred": -3.4330081939697266, "label": 5.0}, {"pred": -2.156536817550659, "label": 5.0}, {"pred": -1.3603081703186035, "label": 5.0}, {"pred": -1.5407929420471191, "label": 5.0}, {"pred": -0.07066483795642853, "label": 5.0}, {"pred": -1.6604976654052734, "label": 6.0}, {"pred": -1.440683364868164, "label": 8.0}, {"pred": 0.9588245153427124, "label": 5.0}, {"pred": -1.7472809553146362, "label": 5.0}, {"pred": -1.087075114250183, "label": 3.0}, {"pred": 8.69987678527832, "label": 3.0}, {"pred": -1.4591203927993774, "label": 6.0}, {"pred": -1.2280433177947998, "label": 2.0}, {"pred": -0.5524106621742249, "label": 3.0}, {"pred": -2.392406463623047, "label": 2.0}, {"pred": -1.3709814548492432, "label": 2.0}, {"pred": -0.1875690072774887, "label": 2.0}, {"pred": -1.5981941223144531, "label": 0.0}, {"pred": -1.1282113790512085, "label": 2.0}, {"pred": 10.509876251220703, "label": 5.0}, {"pred": -0.5587414503097534, "label": 5.0}, {"pred": -0.11731800436973572, "label": 5.0}, {"pred": 8.627680778503418, "label": 2.0}, {"pred": 4.010296821594238, "label": 6.0}, {"pred": -3.5236639976501465, "label": 3.0}, {"pred": -2.157024383544922, "label": 3.0}, {"pred": -1.5002130270004272, "label": 6.0}, {"pred": -1.053414225578308, "label": 3.0}, {"pred": -3.027621030807495, "label": 2.0}, {"pred": 3.2739715576171875, "label": 8.0}, {"pred": -2.5486247539520264, "label": 8.0}, {"pred": -3.9105663299560547, "label": 0.0}, {"pred": 0.9814618825912476, "label": 1.0}, {"pred": -0.46406108140945435, "label": 2.0}, {"pred": 2.880763053894043, "label": 3.0}, {"pred": 5.093158721923828, "label": 6.0}, {"pred": -3.3170182704925537, "label": 8.0}, {"pred": -2.14131498336792, "label": 7.0}, {"pred": -1.5651869773864746, "label": 0.0}, {"pred": 0.6249361038208008, "label": 8.0}, {"pred": 3.9404008388519287, "label": 2.0}, {"pred": 2.718013048171997, "label": 2.0}, {"pred": -2.9450290203094482, "label": 5.0}, {"pred": 6.749814033508301, "label": 5.0}, {"pred": -1.64478600025177, "label": 7.0}, {"pred": -4.171202659606934, "label": 2.0}, {"pred": -3.1550910472869873, "label": 2.0}, {"pred": -2.1020801067352295, "label": 8.0}, {"pred": -1.6797502040863037, "label": 5.0}, {"pred": 6.253066062927246, "label": 5.0}, {"pred": 4.325278282165527, "label": 2.0}, {"pred": -3.1421334743499756, "label": 6.0}, {"pred": 4.086713790893555, "label": 6.0}, {"pred": -1.0735156536102295, "label": 2.0}, {"pred": -3.3857691287994385, "label": 5.0}, {"pred": -2.6780574321746826, "label": 2.0}, {"pred": -2.3147029876708984, "label": 5.0}, {"pred": 0.46940743923187256, "label": 5.0}, {"pred": 9.495539665222168, "label": 8.0}, {"pred": -1.889843463897705, "label": 5.0}, {"pred": 0.36386972665786743, "label": 2.0}, {"pred": -0.4371775984764099, "label": 6.0}, {"pred": -2.8748319149017334, "label": 6.0}, {"pred": -1.1083565950393677, "label": 5.0}, {"pred": 0.13744854927062988, "label": 6.0}, {"pred": -1.4635956287384033, "label": 2.0}, {"pred": -0.5616008043289185, "label": 2.0}, {"pred": 8.173700332641602, "label": 5.0}, {"pred": 4.250307083129883, "label": 5.0}, {"pred": -2.4926605224609375, "label": 0.0}, {"pred": -0.31131792068481445, "label": 5.0}, {"pred": -3.5435919761657715, "label": 2.0}, {"pred": -0.29817143082618713, "label": 2.0}, {"pred": -2.634324073791504, "label": 2.0}, {"pred": -1.6608585119247437, "label": 5.0}, {"pred": -1.39112389087677, "label": 2.0}, {"pred": 7.657371997833252, "label": 6.0}, {"pred": 0.5764560699462891, "label": 6.0}, {"pred": -0.8287283182144165, "label": 6.0}, {"pred": 4.195872783660889, "label": 6.0}, {"pred": -0.7789970636367798, "label": 5.0}, {"pred": -2.123466730117798, "label": 5.0}, {"pred": -3.2755870819091797, "label": 5.0}, {"pred": -2.2995500564575195, "label": 5.0}, {"pred": -4.09137487411499, "label": 5.0}, {"pred": 3.905306577682495, "label": 2.0}, {"pred": 5.917882442474365, "label": 5.0}, {"pred": -2.399942636489868, "label": 8.0}, {"pred": 0.7929872274398804, "label": 8.0}, {"pred": -0.8256539106369019, "label": 0.0}, {"pred": 1.5062816143035889, "label": 3.0}, {"pred": -2.261814594268799, "label": 2.0}, {"pred": -0.10297468304634094, "label": 2.0}, {"pred": -0.8380897641181946, "label": 2.0}, {"pred": 1.7574100494384766, "label": 8.0}, {"pred": -0.9829455614089966, "label": 8.0}, {"pred": -1.2617555856704712, "label": 0.0}, {"pred": 6.876880168914795, "label": 2.0}, {"pred": 1.2606760263442993, "label": 0.0}, {"pred": -3.835230588912964, "label": 7.0}, {"pred": -1.5416922569274902, "label": 0.0}, {"pred": -3.7833173274993896, "label": 0.0}, {"pred": -1.4478707313537598, "label": 0.0}, {"pred": 3.486963987350464, "label": 8.0}, {"pred": 1.1780611276626587, "label": 8.0}, {"pred": -1.6854732036590576, "label": 0.0}, {"pred": 5.843297004699707, "label": 0.0}, {"pred": 3.6902544498443604, "label": 2.0}, {"pred": -2.237409830093384, "label": 2.0}, {"pred": -3.392303943634033, "label": 0.0}]
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b15b7ce8462e7e8dd2cf42599fe22fce9d427edadfbda587f6cac72bd7544650
3
  size 714922721
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb947af03d15814808e3e8d8715807fc9620fa797908c7f3fb658d3cd2b7cdc6
3
  size 714922721
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.6695007115008145,
4
+ "train_runtime": 916.4977,
5
+ "train_samples": 702,
6
+ "train_samples_per_second": 3.83,
7
+ "train_steps_per_second": 3.83
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 3510,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.14,
12
+ "eval_f1": 0.3722687284743791,
13
+ "eval_loss": 1.3361328840255737,
14
+ "eval_runtime": 3.2985,
15
+ "eval_samples_per_second": 29.71,
16
+ "eval_steps_per_second": 29.71,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.28,
21
+ "eval_f1": 0.4344699929794222,
22
+ "eval_loss": 1.1836130619049072,
23
+ "eval_runtime": 3.274,
24
+ "eval_samples_per_second": 29.933,
25
+ "eval_steps_per_second": 29.933,
26
+ "step": 200
27
+ },
28
+ {
29
+ "epoch": 0.43,
30
+ "eval_f1": 0.3996935122704078,
31
+ "eval_loss": 1.1635534763336182,
32
+ "eval_runtime": 3.2772,
33
+ "eval_samples_per_second": 29.904,
34
+ "eval_steps_per_second": 29.904,
35
+ "step": 300
36
+ },
37
+ {
38
+ "epoch": 0.57,
39
+ "eval_f1": 0.5028054395723479,
40
+ "eval_loss": 1.353499174118042,
41
+ "eval_runtime": 3.2926,
42
+ "eval_samples_per_second": 29.764,
43
+ "eval_steps_per_second": 29.764,
44
+ "step": 400
45
+ },
46
+ {
47
+ "epoch": 0.71,
48
+ "learning_rate": 2.572649572649573e-05,
49
+ "loss": 1.2064,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 0.71,
54
+ "eval_f1": 0.47071964928887344,
55
+ "eval_loss": 1.2940737009048462,
56
+ "eval_runtime": 3.3744,
57
+ "eval_samples_per_second": 29.042,
58
+ "eval_steps_per_second": 29.042,
59
+ "step": 500
60
+ },
61
+ {
62
+ "epoch": 0.85,
63
+ "eval_f1": 0.49370416278560797,
64
+ "eval_loss": 1.2891041040420532,
65
+ "eval_runtime": 3.3094,
66
+ "eval_samples_per_second": 29.613,
67
+ "eval_steps_per_second": 29.613,
68
+ "step": 600
69
+ },
70
+ {
71
+ "epoch": 1.0,
72
+ "eval_f1": 0.47736617527595926,
73
+ "eval_loss": 1.2047343254089355,
74
+ "eval_runtime": 3.3145,
75
+ "eval_samples_per_second": 29.567,
76
+ "eval_steps_per_second": 29.567,
77
+ "step": 700
78
+ },
79
+ {
80
+ "epoch": 1.14,
81
+ "eval_f1": 0.4943830125990876,
82
+ "eval_loss": 1.2190661430358887,
83
+ "eval_runtime": 3.3377,
84
+ "eval_samples_per_second": 29.362,
85
+ "eval_steps_per_second": 29.362,
86
+ "step": 800
87
+ },
88
+ {
89
+ "epoch": 1.28,
90
+ "eval_f1": 0.4777863203104454,
91
+ "eval_loss": 1.174961805343628,
92
+ "eval_runtime": 3.3391,
93
+ "eval_samples_per_second": 29.349,
94
+ "eval_steps_per_second": 29.349,
95
+ "step": 900
96
+ },
97
+ {
98
+ "epoch": 1.42,
99
+ "learning_rate": 2.1452991452991456e-05,
100
+ "loss": 0.9207,
101
+ "step": 1000
102
+ },
103
+ {
104
+ "epoch": 1.42,
105
+ "eval_f1": 0.49086129207075757,
106
+ "eval_loss": 1.3087153434753418,
107
+ "eval_runtime": 3.4003,
108
+ "eval_samples_per_second": 28.821,
109
+ "eval_steps_per_second": 28.821,
110
+ "step": 1000
111
+ },
112
+ {
113
+ "epoch": 1.57,
114
+ "eval_f1": 0.49757882395260544,
115
+ "eval_loss": 1.2435556650161743,
116
+ "eval_runtime": 3.4366,
117
+ "eval_samples_per_second": 28.517,
118
+ "eval_steps_per_second": 28.517,
119
+ "step": 1100
120
+ },
121
+ {
122
+ "epoch": 1.71,
123
+ "eval_f1": 0.503327058221218,
124
+ "eval_loss": 1.1465363502502441,
125
+ "eval_runtime": 3.2944,
126
+ "eval_samples_per_second": 29.748,
127
+ "eval_steps_per_second": 29.748,
128
+ "step": 1200
129
+ },
130
+ {
131
+ "epoch": 1.85,
132
+ "eval_f1": 0.5141589868888157,
133
+ "eval_loss": 1.113364577293396,
134
+ "eval_runtime": 3.3642,
135
+ "eval_samples_per_second": 29.131,
136
+ "eval_steps_per_second": 29.131,
137
+ "step": 1300
138
+ },
139
+ {
140
+ "epoch": 1.99,
141
+ "eval_f1": 0.5383469405673188,
142
+ "eval_loss": 1.1939647197723389,
143
+ "eval_runtime": 3.3033,
144
+ "eval_samples_per_second": 29.668,
145
+ "eval_steps_per_second": 29.668,
146
+ "step": 1400
147
+ },
148
+ {
149
+ "epoch": 2.14,
150
+ "learning_rate": 1.7179487179487178e-05,
151
+ "loss": 0.8149,
152
+ "step": 1500
153
+ },
154
+ {
155
+ "epoch": 2.14,
156
+ "eval_f1": 0.5291030100787034,
157
+ "eval_loss": 1.2552497386932373,
158
+ "eval_runtime": 3.7541,
159
+ "eval_samples_per_second": 26.105,
160
+ "eval_steps_per_second": 26.105,
161
+ "step": 1500
162
+ },
163
+ {
164
+ "epoch": 2.28,
165
+ "eval_f1": 0.5259736412492381,
166
+ "eval_loss": 1.3746747970581055,
167
+ "eval_runtime": 4.9995,
168
+ "eval_samples_per_second": 19.602,
169
+ "eval_steps_per_second": 19.602,
170
+ "step": 1600
171
+ },
172
+ {
173
+ "epoch": 2.42,
174
+ "eval_f1": 0.5329388682083431,
175
+ "eval_loss": 1.3680145740509033,
176
+ "eval_runtime": 5.1597,
177
+ "eval_samples_per_second": 18.993,
178
+ "eval_steps_per_second": 18.993,
179
+ "step": 1700
180
+ },
181
+ {
182
+ "epoch": 2.56,
183
+ "eval_f1": 0.5189920532535025,
184
+ "eval_loss": 1.27865469455719,
185
+ "eval_runtime": 5.0223,
186
+ "eval_samples_per_second": 19.513,
187
+ "eval_steps_per_second": 19.513,
188
+ "step": 1800
189
+ },
190
+ {
191
+ "epoch": 2.71,
192
+ "eval_f1": 0.5409205239275264,
193
+ "eval_loss": 1.3888845443725586,
194
+ "eval_runtime": 3.3132,
195
+ "eval_samples_per_second": 29.579,
196
+ "eval_steps_per_second": 29.579,
197
+ "step": 1900
198
+ },
199
+ {
200
+ "epoch": 2.85,
201
+ "learning_rate": 1.2905982905982905e-05,
202
+ "loss": 0.6152,
203
+ "step": 2000
204
+ },
205
+ {
206
+ "epoch": 2.85,
207
+ "eval_f1": 0.543504294934508,
208
+ "eval_loss": 1.3602004051208496,
209
+ "eval_runtime": 3.3336,
210
+ "eval_samples_per_second": 29.398,
211
+ "eval_steps_per_second": 29.398,
212
+ "step": 2000
213
+ },
214
+ {
215
+ "epoch": 2.99,
216
+ "eval_f1": 0.5467811408362643,
217
+ "eval_loss": 1.3174574375152588,
218
+ "eval_runtime": 5.0569,
219
+ "eval_samples_per_second": 19.379,
220
+ "eval_steps_per_second": 19.379,
221
+ "step": 2100
222
+ },
223
+ {
224
+ "epoch": 3.13,
225
+ "eval_f1": 0.5365057187973831,
226
+ "eval_loss": 1.5886870622634888,
227
+ "eval_runtime": 4.5058,
228
+ "eval_samples_per_second": 21.75,
229
+ "eval_steps_per_second": 21.75,
230
+ "step": 2200
231
+ },
232
+ {
233
+ "epoch": 3.28,
234
+ "eval_f1": 0.5563382534701277,
235
+ "eval_loss": 1.517231822013855,
236
+ "eval_runtime": 4.5269,
237
+ "eval_samples_per_second": 21.648,
238
+ "eval_steps_per_second": 21.648,
239
+ "step": 2300
240
+ },
241
+ {
242
+ "epoch": 3.42,
243
+ "eval_f1": 0.5661390498930103,
244
+ "eval_loss": 1.5470443964004517,
245
+ "eval_runtime": 3.6905,
246
+ "eval_samples_per_second": 26.555,
247
+ "eval_steps_per_second": 26.555,
248
+ "step": 2400
249
+ },
250
+ {
251
+ "epoch": 3.56,
252
+ "learning_rate": 8.632478632478633e-06,
253
+ "loss": 0.4719,
254
+ "step": 2500
255
+ },
256
+ {
257
+ "epoch": 3.56,
258
+ "eval_f1": 0.521216772952552,
259
+ "eval_loss": 1.4928430318832397,
260
+ "eval_runtime": 3.3155,
261
+ "eval_samples_per_second": 29.558,
262
+ "eval_steps_per_second": 29.558,
263
+ "step": 2500
264
+ },
265
+ {
266
+ "epoch": 3.7,
267
+ "eval_f1": 0.5356457612585566,
268
+ "eval_loss": 1.6497721672058105,
269
+ "eval_runtime": 4.8518,
270
+ "eval_samples_per_second": 20.199,
271
+ "eval_steps_per_second": 20.199,
272
+ "step": 2600
273
+ },
274
+ {
275
+ "epoch": 3.85,
276
+ "eval_f1": 0.5596834952223371,
277
+ "eval_loss": 1.4976708889007568,
278
+ "eval_runtime": 4.6972,
279
+ "eval_samples_per_second": 20.863,
280
+ "eval_steps_per_second": 20.863,
281
+ "step": 2700
282
+ },
283
+ {
284
+ "epoch": 3.99,
285
+ "eval_f1": 0.5470066167039311,
286
+ "eval_loss": 1.471981167793274,
287
+ "eval_runtime": 4.5019,
288
+ "eval_samples_per_second": 21.769,
289
+ "eval_steps_per_second": 21.769,
290
+ "step": 2800
291
+ },
292
+ {
293
+ "epoch": 4.13,
294
+ "eval_f1": 0.5492964393504802,
295
+ "eval_loss": 1.5796676874160767,
296
+ "eval_runtime": 4.7041,
297
+ "eval_samples_per_second": 20.833,
298
+ "eval_steps_per_second": 20.833,
299
+ "step": 2900
300
+ },
301
+ {
302
+ "epoch": 4.27,
303
+ "learning_rate": 4.358974358974359e-06,
304
+ "loss": 0.372,
305
+ "step": 3000
306
+ },
307
+ {
308
+ "epoch": 4.27,
309
+ "eval_f1": 0.5445354826532323,
310
+ "eval_loss": 1.6874395608901978,
311
+ "eval_runtime": 3.9793,
312
+ "eval_samples_per_second": 24.628,
313
+ "eval_steps_per_second": 24.628,
314
+ "step": 3000
315
+ },
316
+ {
317
+ "epoch": 4.42,
318
+ "eval_f1": 0.5544723066439012,
319
+ "eval_loss": 1.6702477931976318,
320
+ "eval_runtime": 4.7443,
321
+ "eval_samples_per_second": 20.656,
322
+ "eval_steps_per_second": 20.656,
323
+ "step": 3100
324
+ },
325
+ {
326
+ "epoch": 4.56,
327
+ "eval_f1": 0.5469058666319371,
328
+ "eval_loss": 1.7671833038330078,
329
+ "eval_runtime": 4.6665,
330
+ "eval_samples_per_second": 21.001,
331
+ "eval_steps_per_second": 21.001,
332
+ "step": 3200
333
+ },
334
+ {
335
+ "epoch": 4.7,
336
+ "eval_f1": 0.5485370297299399,
337
+ "eval_loss": 1.7351080179214478,
338
+ "eval_runtime": 4.8851,
339
+ "eval_samples_per_second": 20.061,
340
+ "eval_steps_per_second": 20.061,
341
+ "step": 3300
342
+ },
343
+ {
344
+ "epoch": 4.84,
345
+ "eval_f1": 0.5497797755164764,
346
+ "eval_loss": 1.7282612323760986,
347
+ "eval_runtime": 5.1791,
348
+ "eval_samples_per_second": 18.922,
349
+ "eval_steps_per_second": 18.922,
350
+ "step": 3400
351
+ },
352
+ {
353
+ "epoch": 4.99,
354
+ "learning_rate": 8.547008547008547e-08,
355
+ "loss": 0.2944,
356
+ "step": 3500
357
+ },
358
+ {
359
+ "epoch": 4.99,
360
+ "eval_f1": 0.5503861750639598,
361
+ "eval_loss": 1.698703408241272,
362
+ "eval_runtime": 5.6701,
363
+ "eval_samples_per_second": 17.284,
364
+ "eval_steps_per_second": 17.284,
365
+ "step": 3500
366
+ },
367
+ {
368
+ "epoch": 5.0,
369
+ "step": 3510,
370
+ "total_flos": 2890172619430200.0,
371
+ "train_loss": 0.6695007115008145,
372
+ "train_runtime": 916.4977,
373
+ "train_samples_per_second": 3.83,
374
+ "train_steps_per_second": 3.83
375
+ }
376
+ ],
377
+ "max_steps": 3510,
378
+ "num_train_epochs": 5,
379
+ "total_flos": 2890172619430200.0,
380
+ "trial_name": null,
381
+ "trial_params": null
382
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d13453225261ae3322f25fa0c2fd223bb3b539537568b807325c962f1c371b09
3
  size 3899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbdd39e5dadc71c4520b7ee1b5c990a33eee4d7fdad887960a5ecdb44855941e
3
  size 3899