napsternxg commited on
Commit
69c0828
·
1 Parent(s): dcbf272

End of training

Browse files
README.md CHANGED
@@ -16,15 +16,16 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [napsternxg/gte-small-L3-ingredient-v2](https://huggingface.co/napsternxg/gte-small-L3-ingredient-v2) on the nyt_ingredients dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 2.5417
20
- - Comment: {'precision': 0.5882352941176471, 'recall': 0.8333333333333334, 'f1': 0.6896551724137931, 'number': 12}
21
- - Name: {'precision': 0.7692307692307693, 'recall': 0.7692307692307693, 'f1': 0.7692307692307693, 'number': 26}
22
- - Qty: {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 22}
23
- - Unit: {'precision': 0.8947368421052632, 'recall': 0.9444444444444444, 'f1': 0.918918918918919, 'number': 18}
24
- - Overall Precision: 0.8214
25
- - Overall Recall: 0.8846
26
- - Overall F1: 0.8519
27
- - Overall Accuracy: 0.7619
 
28
 
29
  ## Model description
30
 
@@ -53,23 +54,23 @@ The following hyperparameters were used during training:
53
 
54
  ### Training results
55
 
56
- | Training Loss | Epoch | Step | Validation Loss | Comment | Name | Qty | Range End | Unit | Overall Precision | Overall Recall | Overall F1 | Overall Accuracy |
57
- |:-------------:|:-----:|:-----:|:---------------:|:--------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------:|:------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------:|:-----------------:|:--------------:|:----------:|:----------------:|
58
- | 4.2414 | 0.2 | 1000 | 3.7464 | {'precision': 0.45454545454545453, 'recall': 0.4411764705882353, 'f1': 0.4477611940298507, 'number': 34} | {'precision': 0.5806451612903226, 'recall': 0.6923076923076923, 'f1': 0.631578947368421, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.7075 | 0.7009 | 0.7042 | 0.7353 |
59
- | 3.5305 | 0.4 | 2000 | 3.3135 | {'precision': 0.5161290322580645, 'recall': 0.47058823529411764, 'f1': 0.4923076923076923, 'number': 34} | {'precision': 0.5806451612903226, 'recall': 0.6923076923076923, 'f1': 0.631578947368421, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 0.3333333333333333, 'f1': 0.5, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.7333 | 0.7196 | 0.7264 | 0.7892 |
60
- | 3.2255 | 0.59 | 3000 | 3.0933 | {'precision': 0.5555555555555556, 'recall': 0.4411764705882353, 'f1': 0.49180327868852464, 'number': 34} | {'precision': 0.6428571428571429, 'recall': 0.6923076923076923, 'f1': 0.6666666666666666, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.78 | 0.7290 | 0.7536 | 0.7598 |
61
- | 3.073 | 0.79 | 4000 | 2.9998 | {'precision': 0.5, 'recall': 0.4117647058823529, 'f1': 0.45161290322580644, 'number': 34} | {'precision': 0.6206896551724138, 'recall': 0.6923076923076923, 'f1': 0.6545454545454545, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.7549 | 0.7196 | 0.7368 | 0.7402 |
62
- | 2.954 | 0.99 | 5000 | 2.8983 | {'precision': 0.7307692307692307, 'recall': 0.5588235294117647, 'f1': 0.6333333333333334, 'number': 34} | {'precision': 0.6785714285714286, 'recall': 0.7307692307692307, 'f1': 0.7037037037037038, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 0.3333333333333333, 'f1': 0.5, 'number': 3} | {'precision': 1.0, 'recall': 0.9523809523809523, 'f1': 0.975609756097561, 'number': 21} | 0.8367 | 0.7664 | 0.8000 | 0.7941 |
63
- | 2.8463 | 1.19 | 6000 | 2.8088 | {'precision': 0.5714285714285714, 'recall': 0.47058823529411764, 'f1': 0.5161290322580646, 'number': 34} | {'precision': 0.6428571428571429, 'recall': 0.6923076923076923, 'f1': 0.6666666666666666, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.7822 | 0.7383 | 0.7596 | 0.7696 |
64
- | 2.7953 | 1.39 | 7000 | 2.7729 | {'precision': 0.6153846153846154, 'recall': 0.47058823529411764, 'f1': 0.5333333333333333, 'number': 34} | {'precision': 0.6666666666666666, 'recall': 0.6923076923076923, 'f1': 0.6792452830188679, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.8061 | 0.7383 | 0.7707 | 0.7647 |
65
- | 2.7561 | 1.58 | 8000 | 2.7151 | {'precision': 0.6666666666666666, 'recall': 0.5294117647058824, 'f1': 0.5901639344262295, 'number': 34} | {'precision': 0.6206896551724138, 'recall': 0.6923076923076923, 'f1': 0.6545454545454545, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 0.6666666666666666, 'f1': 0.8, 'number': 3} | {'precision': 1.0, 'recall': 0.9523809523809523, 'f1': 0.975609756097561, 'number': 21} | 0.8020 | 0.7570 | 0.7788 | 0.7696 |
66
- | 2.789 | 1.78 | 9000 | 2.6807 | {'precision': 0.6538461538461539, 'recall': 0.5, 'f1': 0.5666666666666668, 'number': 34} | {'precision': 0.5666666666666667, 'recall': 0.6538461538461539, 'f1': 0.6071428571428571, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.7822 | 0.7383 | 0.7596 | 0.7598 |
67
- | 2.6709 | 1.98 | 10000 | 2.6451 | {'precision': 0.6153846153846154, 'recall': 0.47058823529411764, 'f1': 0.5333333333333333, 'number': 34} | {'precision': 0.6, 'recall': 0.6923076923076923, 'f1': 0.6428571428571429, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.7822 | 0.7383 | 0.7596 | 0.7696 |
68
- | 2.5318 | 2.18 | 11000 | 2.6231 | {'precision': 0.5555555555555556, 'recall': 0.4411764705882353, 'f1': 0.49180327868852464, 'number': 34} | {'precision': 0.6333333333333333, 'recall': 0.7307692307692307, 'f1': 0.6785714285714285, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9523809523809523, 'f1': 0.975609756097561, 'number': 21} | 0.7767 | 0.7477 | 0.7619 | 0.7745 |
69
- | 2.6576 | 2.38 | 12000 | 2.6023 | {'precision': 0.6923076923076923, 'recall': 0.5294117647058824, 'f1': 0.5999999999999999, 'number': 34} | {'precision': 0.7037037037037037, 'recall': 0.7307692307692307, 'f1': 0.7169811320754716, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9523809523809523, 'f1': 0.975609756097561, 'number': 21} | 0.8384 | 0.7757 | 0.8058 | 0.7843 |
70
- | 2.5836 | 2.57 | 13000 | 2.5741 | {'precision': 0.68, 'recall': 0.5, 'f1': 0.576271186440678, 'number': 34} | {'precision': 0.6551724137931034, 'recall': 0.7307692307692307, 'f1': 0.6909090909090909, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9523809523809523, 'f1': 0.975609756097561, 'number': 21} | 0.82 | 0.7664 | 0.7923 | 0.7892 |
71
- | 2.6004 | 2.77 | 14000 | 2.5654 | {'precision': 0.6538461538461539, 'recall': 0.5, 'f1': 0.5666666666666668, 'number': 34} | {'precision': 0.6, 'recall': 0.6923076923076923, 'f1': 0.6428571428571429, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.7921 | 0.7477 | 0.7692 | 0.7745 |
72
- | 2.5449 | 2.97 | 15000 | 2.5586 | {'precision': 0.72, 'recall': 0.5294117647058824, 'f1': 0.6101694915254237, 'number': 34} | {'precision': 0.6206896551724138, 'recall': 0.6923076923076923, 'f1': 0.6545454545454545, 'number': 26} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 23} | {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3} | {'precision': 1.0, 'recall': 0.9047619047619048, 'f1': 0.9500000000000001, 'number': 21} | 0.8182 | 0.7570 | 0.7864 | 0.7892 |
73
 
74
 
75
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [napsternxg/gte-small-L3-ingredient-v2](https://huggingface.co/napsternxg/gte-small-L3-ingredient-v2) on the nyt_ingredients dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 2.5723
20
+ - Comment: {'precision': 0.6705310396409873, 'recall': 0.7578191039729502, 'f1': 0.7115079365079365, 'number': 7098}
21
+ - Name: {'precision': 0.8150406504065041, 'recall': 0.8209244693459756, 'f1': 0.8179719791722584, 'number': 9281}
22
+ - Qty: {'precision': 0.9861000794281175, 'recall': 0.9857086145295753, 'f1': 0.9859043081199126, 'number': 7557}
23
+ - Range End: {'precision': 0.5986842105263158, 'recall': 0.9479166666666666, 'f1': 0.7338709677419355, 'number': 96}
24
+ - Unit: {'precision': 0.9225395839801304, 'recall': 0.985735611212473, 'f1': 0.9530911715179216, 'number': 6029}
25
+ - Overall Precision: 0.8402
26
+ - Overall Recall: 0.8809
27
+ - Overall F1: 0.8601
28
+ - Overall Accuracy: 0.8330
29
 
30
  ## Model description
31
 
 
54
 
55
  ### Training results
56
 
57
+ | Training Loss | Epoch | Step | Validation Loss | Comment | Name | Qty | Range End | Unit | Overall Precision | Overall Recall | Overall F1 | Overall Accuracy |
58
+ |:-------------:|:-----:|:-----:|:---------------:|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:-----------------:|:--------------:|:----------:|:----------------:|
59
+ | 4.174 | 0.2 | 1000 | 3.8690 | {'precision': 0.5304157015725954, 'recall': 0.6285755561976307, 'f1': 0.5753388429752067, 'number': 6922} | {'precision': 0.7673592421143288, 'recall': 0.8069738480697385, 'f1': 0.7866681381745945, 'number': 8833} | {'precision': 0.9667778704475952, 'recall': 0.9806824591088551, 'f1': 0.9736805263894722, 'number': 7092} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 88} | {'precision': 0.9121887287024901, 'recall': 0.9756439460311898, 'f1': 0.9428498856997714, 'number': 5707} | 0.7795 | 0.8380 | 0.8077 | 0.7962 |
60
+ | 3.5528 | 0.4 | 2000 | 3.4154 | {'precision': 0.5496301598663803, 'recall': 0.6655590869690841, 'f1': 0.6020648196549921, 'number': 6922} | {'precision': 0.7787928221859707, 'recall': 0.8107098381070984, 'f1': 0.7944308852895496, 'number': 8833} | {'precision': 0.9785673998871969, 'recall': 0.9785673998871969, 'f1': 0.9785673998871969, 'number': 7092} | {'precision': 0.6666666666666666, 'recall': 0.13636363636363635, 'f1': 0.22641509433962262, 'number': 88} | {'precision': 0.9109700520833334, 'recall': 0.9807254249167688, 'f1': 0.9445616403679015, 'number': 5707} | 0.7887 | 0.8490 | 0.8177 | 0.8042 |
61
+ | 3.3333 | 0.59 | 3000 | 3.1915 | {'precision': 0.5850767928407304, 'recall': 0.6989309448136377, 'f1': 0.6369560924231453, 'number': 6922} | {'precision': 0.7759810263044415, 'recall': 0.814898675421714, 'f1': 0.7949638301397095, 'number': 8833} | {'precision': 0.9797125950972105, 'recall': 0.9805414551607445, 'f1': 0.9801268498942918, 'number': 7092} | {'precision': 0.6867469879518072, 'recall': 0.6477272727272727, 'f1': 0.6666666666666667, 'number': 88} | {'precision': 0.9255372313843079, 'recall': 0.9735412651130191, 'f1': 0.948932536293766, 'number': 5707} | 0.8006 | 0.8590 | 0.8288 | 0.8124 |
62
+ | 3.1122 | 0.79 | 4000 | 3.0560 | {'precision': 0.6020151133501259, 'recall': 0.7250794568043918, 'f1': 0.6578412740022282, 'number': 6922} | {'precision': 0.7925502692011867, 'recall': 0.8165968527114231, 'f1': 0.8043938887030222, 'number': 8833} | {'precision': 0.9816358242689646, 'recall': 0.9798364354201917, 'f1': 0.9807353044950956, 'number': 7092} | {'precision': 0.5547445255474452, 'recall': 0.8636363636363636, 'f1': 0.6755555555555556, 'number': 88} | {'precision': 0.9180758496141849, 'recall': 0.9798493078675311, 'f1': 0.9479572808950669, 'number': 5707} | 0.8082 | 0.8676 | 0.8368 | 0.8183 |
63
+ | 3.074 | 0.99 | 5000 | 2.9495 | {'precision': 0.6171328671328671, 'recall': 0.7139555041895406, 'f1': 0.6620227729403884, 'number': 6922} | {'precision': 0.8028043623414199, 'recall': 0.816710064530737, 'f1': 0.8096975138896684, 'number': 8833} | {'precision': 0.9747242005306521, 'recall': 0.9842075578116187, 'f1': 0.9794429242966393, 'number': 7092} | {'precision': 0.6448598130841121, 'recall': 0.7840909090909091, 'f1': 0.7076923076923077, 'number': 88} | {'precision': 0.9168310322156475, 'recall': 0.9773961801296653, 'f1': 0.9461453651089815, 'number': 5707} | 0.8167 | 0.8653 | 0.8403 | 0.8209 |
64
+ | 2.936 | 1.19 | 6000 | 2.8893 | {'precision': 0.6245376865195765, 'recall': 0.7074544929211211, 'f1': 0.6634152949942423, 'number': 6922} | {'precision': 0.8003099402258136, 'recall': 0.81852145363976, 'f1': 0.8093132590809874, 'number': 8833} | {'precision': 0.9721951897678298, 'recall': 0.9860406091370558, 'f1': 0.9790689534476724, 'number': 7092} | {'precision': 0.6220472440944882, 'recall': 0.8977272727272727, 'f1': 0.7348837209302326, 'number': 88} | {'precision': 0.9140714169248328, 'recall': 0.9823024356053969, 'f1': 0.9469594594594595, 'number': 5707} | 0.8179 | 0.8660 | 0.8413 | 0.8217 |
65
+ | 2.7662 | 1.39 | 7000 | 2.8622 | {'precision': 0.6298537569339385, 'recall': 0.7217567177116441, 'f1': 0.6726807593914097, 'number': 6922} | {'precision': 0.7999777753083676, 'recall': 0.8150118872410279, 'f1': 0.8074248541947062, 'number': 8833} | {'precision': 0.9800337457817773, 'recall': 0.9827975183305132, 'f1': 0.9814136862855534, 'number': 7092} | {'precision': 0.6290322580645161, 'recall': 0.8863636363636364, 'f1': 0.7358490566037735, 'number': 88} | {'precision': 0.9191902567478605, 'recall': 0.9786227439985982, 'f1': 0.9479758974794195, 'number': 5707} | 0.8210 | 0.8668 | 0.8433 | 0.8235 |
66
+ | 2.7839 | 1.58 | 8000 | 2.7801 | {'precision': 0.6325475860330266, 'recall': 0.7249349898873158, 'f1': 0.6755974419387412, 'number': 6922} | {'precision': 0.8036190053285968, 'recall': 0.8195403600135854, 'f1': 0.8115015974440895, 'number': 8833} | {'precision': 0.975977653631285, 'recall': 0.9853355893965031, 'f1': 0.9806342969407802, 'number': 7092} | {'precision': 0.6521739130434783, 'recall': 0.8522727272727273, 'f1': 0.7389162561576356, 'number': 88} | {'precision': 0.9188301018731515, 'recall': 0.9798493078675311, 'f1': 0.9483591961332994, 'number': 5707} | 0.8221 | 0.8698 | 0.8453 | 0.8242 |
67
+ | 2.7221 | 1.78 | 9000 | 2.7520 | {'precision': 0.6436781609195402, 'recall': 0.7442935567754985, 'f1': 0.690339005761758, 'number': 6922} | {'precision': 0.8124719605204127, 'recall': 0.8201064191101551, 'f1': 0.8162713392303792, 'number': 8833} | {'precision': 0.9827975183305132, 'recall': 0.9827975183305132, 'f1': 0.9827975183305132, 'number': 7092} | {'precision': 0.6576576576576577, 'recall': 0.8295454545454546, 'f1': 0.7336683417085428, 'number': 88} | {'precision': 0.9227716222920457, 'recall': 0.9777466269493604, 'f1': 0.9494640122511486, 'number': 5707} | 0.8293 | 0.8735 | 0.8508 | 0.8285 |
68
+ | 2.7156 | 1.98 | 10000 | 2.7236 | {'precision': 0.6453828542355635, 'recall': 0.7330251372435712, 'f1': 0.6864177489177489, 'number': 6922} | {'precision': 0.8084821428571428, 'recall': 0.8201064191101551, 'f1': 0.8142527960433878, 'number': 8833} | {'precision': 0.9825204398082887, 'recall': 0.9827975183305132, 'f1': 0.9826589595375722, 'number': 7092} | {'precision': 0.6324786324786325, 'recall': 0.8409090909090909, 'f1': 0.7219512195121951, 'number': 88} | {'precision': 0.9222387320455672, 'recall': 0.9787979674084458, 'f1': 0.9496769806188372, 'number': 5707} | 0.8291 | 0.8710 | 0.8496 | 0.8265 |
69
+ | 2.6804 | 2.18 | 11000 | 2.6929 | {'precision': 0.6422784494578088, 'recall': 0.7444380236925744, 'f1': 0.6895951823352291, 'number': 6922} | {'precision': 0.8120670391061453, 'recall': 0.8228235027736895, 'f1': 0.8174098858460327, 'number': 8833} | {'precision': 0.9837570621468926, 'recall': 0.9820924985899605, 'f1': 0.9829240756421113, 'number': 7092} | {'precision': 0.635593220338983, 'recall': 0.8522727272727273, 'f1': 0.7281553398058253, 'number': 88} | {'precision': 0.9234584228798148, 'recall': 0.9787979674084458, 'f1': 0.9503232391970058, 'number': 5707} | 0.8288 | 0.8745 | 0.8510 | 0.8279 |
70
+ | 2.6121 | 2.38 | 12000 | 2.6691 | {'precision': 0.6490939044481054, 'recall': 0.7399595492632187, 'f1': 0.691554715452643, 'number': 6922} | {'precision': 0.811037725288257, 'recall': 0.820219630929469, 'f1': 0.8156028368794326, 'number': 8833} | {'precision': 0.9798121407542408, 'recall': 0.9854765933446137, 'f1': 0.9826362038664324, 'number': 7092} | {'precision': 0.6328125, 'recall': 0.9204545454545454, 'f1': 0.7499999999999999, 'number': 88} | {'precision': 0.9207106431978944, 'recall': 0.9807254249167688, 'f1': 0.9497709146444936, 'number': 5707} | 0.8299 | 0.8740 | 0.8514 | 0.8277 |
71
+ | 2.553 | 2.57 | 13000 | 2.6652 | {'precision': 0.6478233438485804, 'recall': 0.7416931522681306, 'f1': 0.6915875260995488, 'number': 6922} | {'precision': 0.8128150554497592, 'recall': 0.8214649609419223, 'f1': 0.8171171171171171, 'number': 8833} | {'precision': 0.9826760563380281, 'recall': 0.983784545967287, 'f1': 0.9832299887260428, 'number': 7092} | {'precision': 0.639344262295082, 'recall': 0.8863636363636364, 'f1': 0.742857142857143, 'number': 88} | {'precision': 0.920335085413929, 'recall': 0.9817767653758542, 'f1': 0.9500635862653668, 'number': 5707} | 0.8304 | 0.8745 | 0.8519 | 0.8287 |
72
+ | 2.5781 | 2.77 | 14000 | 2.6431 | {'precision': 0.6512514292974209, 'recall': 0.7405374169315226, 'f1': 0.6930304873926858, 'number': 6922} | {'precision': 0.8114304887596465, 'recall': 0.8213517491226084, 'f1': 0.8163609767075503, 'number': 8833} | {'precision': 0.9832252607837609, 'recall': 0.983502538071066, 'f1': 0.9833638798815735, 'number': 7092} | {'precision': 0.6551724137931034, 'recall': 0.8636363636363636, 'f1': 0.7450980392156864, 'number': 88} | {'precision': 0.9220500988793672, 'recall': 0.9803749780970737, 'f1': 0.9503184713375797, 'number': 5707} | 0.8317 | 0.8738 | 0.8522 | 0.8286 |
73
+ | 2.5928 | 2.97 | 15000 | 2.6394 | {'precision': 0.6551064643631264, 'recall': 0.7422710199364345, 'f1': 0.6959701997968167, 'number': 6922} | {'precision': 0.8126049479458188, 'recall': 0.8218045963998641, 'f1': 0.8171788810086682, 'number': 8833} | {'precision': 0.9825376707505985, 'recall': 0.983784545967287, 'f1': 0.9831607130275488, 'number': 7092} | {'precision': 0.6470588235294118, 'recall': 0.875, 'f1': 0.7439613526570048, 'number': 88} | {'precision': 0.9219729462223688, 'recall': 0.9793236376379885, 'f1': 0.9497833290848839, 'number': 5707} | 0.8331 | 0.8742 | 0.8532 | 0.8289 |
74
 
75
 
76
  ### Framework versions
all_results.json CHANGED
@@ -1,41 +1,41 @@
1
  {
2
  "epoch": 3.0,
3
  "eval_COMMENT": {
4
- "f1": 0.6896551724137931,
5
- "number": 12,
6
- "precision": 0.5882352941176471,
7
- "recall": 0.8333333333333334
8
  },
9
  "eval_NAME": {
10
- "f1": 0.7692307692307693,
11
- "number": 26,
12
- "precision": 0.7692307692307693,
13
- "recall": 0.7692307692307693
14
  },
15
  "eval_QTY": {
16
- "f1": 1.0,
17
- "number": 22,
18
- "precision": 1.0,
19
- "recall": 1.0
20
  },
21
  "eval_RANGE_END": {
22
- "f1": 1.0,
23
- "number": 3,
24
- "precision": 1.0,
25
- "recall": 1.0
26
  },
27
  "eval_UNIT": {
28
- "f1": 0.918918918918919,
29
- "number": 18,
30
- "precision": 0.8947368421052632,
31
- "recall": 0.9444444444444444
32
  },
33
- "eval_loss": 2.54168963432312,
34
- "eval_overall_accuracy": 0.7619047619047619,
35
- "eval_overall_f1": 0.8518518518518519,
36
- "eval_overall_precision": 0.8214285714285714,
37
- "eval_overall_recall": 0.8846153846153846,
38
- "eval_runtime": 11.2254,
39
- "eval_samples_per_second": 797.652,
40
- "eval_steps_per_second": 24.943
41
  }
 
1
  {
2
  "epoch": 3.0,
3
  "eval_COMMENT": {
4
+ "f1": 0.7115079365079365,
5
+ "number": 7098,
6
+ "precision": 0.6705310396409873,
7
+ "recall": 0.7578191039729502
8
  },
9
  "eval_NAME": {
10
+ "f1": 0.8179719791722584,
11
+ "number": 9281,
12
+ "precision": 0.8150406504065041,
13
+ "recall": 0.8209244693459756
14
  },
15
  "eval_QTY": {
16
+ "f1": 0.9859043081199126,
17
+ "number": 7557,
18
+ "precision": 0.9861000794281175,
19
+ "recall": 0.9857086145295753
20
  },
21
  "eval_RANGE_END": {
22
+ "f1": 0.7338709677419355,
23
+ "number": 96,
24
+ "precision": 0.5986842105263158,
25
+ "recall": 0.9479166666666666
26
  },
27
  "eval_UNIT": {
28
+ "f1": 0.9530911715179216,
29
+ "number": 6029,
30
+ "precision": 0.9225395839801304,
31
+ "recall": 0.985735611212473
32
  },
33
+ "eval_loss": 2.572307586669922,
34
+ "eval_overall_accuracy": 0.8329675956404067,
35
+ "eval_overall_f1": 0.8600659315675797,
36
+ "eval_overall_precision": 0.8401865600609175,
37
+ "eval_overall_recall": 0.8809088187352383,
38
+ "eval_runtime": 10.0829,
39
+ "eval_samples_per_second": 888.036,
40
+ "eval_steps_per_second": 27.77
41
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75a1845aa8742790e488f470021895268092cc242fa2e9ad4f5932ef21215ba0
3
- size 69005023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fe51036b0fe145c1633527037e9fd147f46ac6333b510111d93b14646958829
3
+ size 69005087
test_results.json CHANGED
@@ -1,35 +1,41 @@
1
  {
2
  "epoch": 3.0,
3
  "eval_COMMENT": {
4
- "f1": 0.6896551724137931,
5
- "number": 12,
6
- "precision": 0.5882352941176471,
7
- "recall": 0.8333333333333334
8
  },
9
  "eval_NAME": {
10
- "f1": 0.7692307692307693,
11
- "number": 26,
12
- "precision": 0.7692307692307693,
13
- "recall": 0.7692307692307693
14
  },
15
  "eval_QTY": {
16
- "f1": 1.0,
17
- "number": 22,
18
- "precision": 1.0,
19
- "recall": 1.0
 
 
 
 
 
 
20
  },
21
  "eval_UNIT": {
22
- "f1": 0.918918918918919,
23
- "number": 18,
24
- "precision": 0.8947368421052632,
25
- "recall": 0.9444444444444444
26
  },
27
- "eval_loss": 2.54168963432312,
28
- "eval_overall_accuracy": 0.7619047619047619,
29
- "eval_overall_f1": 0.8518518518518519,
30
- "eval_overall_precision": 0.8214285714285714,
31
- "eval_overall_recall": 0.8846153846153846,
32
- "eval_runtime": 11.2254,
33
- "eval_samples_per_second": 797.652,
34
- "eval_steps_per_second": 24.943
35
  }
 
1
  {
2
  "epoch": 3.0,
3
  "eval_COMMENT": {
4
+ "f1": 0.7115079365079365,
5
+ "number": 7098,
6
+ "precision": 0.6705310396409873,
7
+ "recall": 0.7578191039729502
8
  },
9
  "eval_NAME": {
10
+ "f1": 0.8179719791722584,
11
+ "number": 9281,
12
+ "precision": 0.8150406504065041,
13
+ "recall": 0.8209244693459756
14
  },
15
  "eval_QTY": {
16
+ "f1": 0.9859043081199126,
17
+ "number": 7557,
18
+ "precision": 0.9861000794281175,
19
+ "recall": 0.9857086145295753
20
+ },
21
+ "eval_RANGE_END": {
22
+ "f1": 0.7338709677419355,
23
+ "number": 96,
24
+ "precision": 0.5986842105263158,
25
+ "recall": 0.9479166666666666
26
  },
27
  "eval_UNIT": {
28
+ "f1": 0.9530911715179216,
29
+ "number": 6029,
30
+ "precision": 0.9225395839801304,
31
+ "recall": 0.985735611212473
32
  },
33
+ "eval_loss": 2.572307586669922,
34
+ "eval_overall_accuracy": 0.8329675956404067,
35
+ "eval_overall_f1": 0.8600659315675797,
36
+ "eval_overall_precision": 0.8401865600609175,
37
+ "eval_overall_recall": 0.8809088187352383,
38
+ "eval_runtime": 10.0829,
39
+ "eval_samples_per_second": 888.036,
40
+ "eval_steps_per_second": 27.77
41
  }
train_results.json CHANGED
@@ -1,29 +1,41 @@
1
  {
2
  "epoch": 3.0,
 
 
 
 
 
 
3
  "eval_NAME": {
4
- "f1": 0.8571428571428571,
5
- "number": 4,
6
- "precision": 1.0,
7
- "recall": 0.75
8
  },
9
  "eval_QTY": {
10
- "f1": 1.0,
11
- "number": 3,
12
- "precision": 1.0,
13
- "recall": 1.0
 
 
 
 
 
 
14
  },
15
  "eval_UNIT": {
16
- "f1": 1.0,
17
- "number": 3,
18
- "precision": 1.0,
19
- "recall": 1.0
20
  },
21
- "eval_loss": 2.457996129989624,
22
- "eval_overall_accuracy": 0.9629629629629629,
23
- "eval_overall_f1": 0.9473684210526316,
24
- "eval_overall_precision": 1.0,
25
- "eval_overall_recall": 0.9,
26
- "eval_runtime": 183.4069,
27
- "eval_samples_per_second": 881.117,
28
- "eval_steps_per_second": 27.54
29
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_COMMENT": {
4
+ "f1": 0.721799355413838,
5
+ "number": 129768,
6
+ "precision": 0.6788247832786863,
7
+ "recall": 0.7705828863818507
8
+ },
9
  "eval_NAME": {
10
+ "f1": 0.8235496724923245,
11
+ "number": 167686,
12
+ "precision": 0.8201064458900059,
13
+ "recall": 0.8270219338525577
14
  },
15
  "eval_QTY": {
16
+ "f1": 0.984932056268375,
17
+ "number": 135815,
18
+ "precision": 0.9844285567177133,
19
+ "recall": 0.9854360711261643
20
+ },
21
+ "eval_RANGE_END": {
22
+ "f1": 0.7352039613298751,
23
+ "number": 1680,
24
+ "precision": 0.6087465833658727,
25
+ "recall": 0.9279761904761905
26
  },
27
  "eval_UNIT": {
28
+ "f1": 0.9542982760615527,
29
+ "number": 108817,
30
+ "precision": 0.9251343819294052,
31
+ "recall": 0.9853607432662176
32
  },
33
+ "eval_loss": 2.4726200103759766,
34
+ "eval_overall_accuracy": 0.8400964270928377,
35
+ "eval_overall_f1": 0.8636297978361569,
36
+ "eval_overall_precision": 0.8431603381071257,
37
+ "eval_overall_recall": 0.8851178631985082,
38
+ "eval_runtime": 178.5371,
39
+ "eval_samples_per_second": 905.151,
40
+ "eval_steps_per_second": 28.291
41
  }
trainer_state.json CHANGED
@@ -11,828 +11,828 @@
11
  {
12
  "epoch": 0.1,
13
  "learning_rate": 4.835016168415495e-05,
14
- "loss": 6.8417,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.2,
19
  "learning_rate": 4.670032336830991e-05,
20
- "loss": 4.2414,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.2,
25
  "eval_COMMENT": {
26
- "f1": 0.4477611940298507,
27
- "number": 34,
28
- "precision": 0.45454545454545453,
29
- "recall": 0.4411764705882353
30
  },
31
  "eval_NAME": {
32
- "f1": 0.631578947368421,
33
- "number": 26,
34
- "precision": 0.5806451612903226,
35
- "recall": 0.6923076923076923
36
  },
37
  "eval_QTY": {
38
- "f1": 1.0,
39
- "number": 23,
40
- "precision": 1.0,
41
- "recall": 1.0
42
  },
43
  "eval_RANGE_END": {
44
  "f1": 0.0,
45
- "number": 3,
46
  "precision": 0.0,
47
  "recall": 0.0
48
  },
49
  "eval_UNIT": {
50
- "f1": 0.9500000000000001,
51
- "number": 21,
52
- "precision": 1.0,
53
- "recall": 0.9047619047619048
54
- },
55
- "eval_loss": 3.7463533878326416,
56
- "eval_overall_accuracy": 0.7352941176470589,
57
- "eval_overall_f1": 0.704225352112676,
58
- "eval_overall_precision": 0.7075471698113207,
59
- "eval_overall_recall": 0.7009345794392523,
60
- "eval_runtime": 9.4242,
61
- "eval_samples_per_second": 902.571,
62
- "eval_steps_per_second": 28.225,
63
  "step": 1000
64
  },
65
  {
66
  "epoch": 0.3,
67
  "learning_rate": 4.505048505246486e-05,
68
- "loss": 3.6927,
69
  "step": 1500
70
  },
71
  {
72
  "epoch": 0.4,
73
  "learning_rate": 4.3400646736619816e-05,
74
- "loss": 3.5305,
75
  "step": 2000
76
  },
77
  {
78
  "epoch": 0.4,
79
  "eval_COMMENT": {
80
- "f1": 0.4923076923076923,
81
- "number": 34,
82
- "precision": 0.5161290322580645,
83
- "recall": 0.47058823529411764
84
  },
85
  "eval_NAME": {
86
- "f1": 0.631578947368421,
87
- "number": 26,
88
- "precision": 0.5806451612903226,
89
- "recall": 0.6923076923076923
90
  },
91
  "eval_QTY": {
92
- "f1": 1.0,
93
- "number": 23,
94
- "precision": 1.0,
95
- "recall": 1.0
96
  },
97
  "eval_RANGE_END": {
98
- "f1": 0.5,
99
- "number": 3,
100
- "precision": 1.0,
101
- "recall": 0.3333333333333333
102
  },
103
  "eval_UNIT": {
104
- "f1": 0.9500000000000001,
105
- "number": 21,
106
- "precision": 1.0,
107
- "recall": 0.9047619047619048
108
- },
109
- "eval_loss": 3.313516139984131,
110
- "eval_overall_accuracy": 0.7892156862745098,
111
- "eval_overall_f1": 0.7264150943396226,
112
- "eval_overall_precision": 0.7333333333333333,
113
- "eval_overall_recall": 0.719626168224299,
114
- "eval_runtime": 11.1909,
115
- "eval_samples_per_second": 760.084,
116
- "eval_steps_per_second": 23.769,
117
  "step": 2000
118
  },
119
  {
120
  "epoch": 0.49,
121
  "learning_rate": 4.1750808420774766e-05,
122
- "loss": 3.364,
123
  "step": 2500
124
  },
125
  {
126
  "epoch": 0.59,
127
  "learning_rate": 4.010097010492972e-05,
128
- "loss": 3.2255,
129
  "step": 3000
130
  },
131
  {
132
  "epoch": 0.59,
133
  "eval_COMMENT": {
134
- "f1": 0.49180327868852464,
135
- "number": 34,
136
- "precision": 0.5555555555555556,
137
- "recall": 0.4411764705882353
138
  },
139
  "eval_NAME": {
140
- "f1": 0.6666666666666666,
141
- "number": 26,
142
- "precision": 0.6428571428571429,
143
- "recall": 0.6923076923076923
144
  },
145
  "eval_QTY": {
146
- "f1": 1.0,
147
- "number": 23,
148
- "precision": 1.0,
149
- "recall": 1.0
150
  },
151
  "eval_RANGE_END": {
152
- "f1": 1.0,
153
- "number": 3,
154
- "precision": 1.0,
155
- "recall": 1.0
156
  },
157
  "eval_UNIT": {
158
- "f1": 0.9500000000000001,
159
- "number": 21,
160
- "precision": 1.0,
161
- "recall": 0.9047619047619048
162
- },
163
- "eval_loss": 3.0932886600494385,
164
- "eval_overall_accuracy": 0.7598039215686274,
165
- "eval_overall_f1": 0.7536231884057971,
166
- "eval_overall_precision": 0.78,
167
- "eval_overall_recall": 0.7289719626168224,
168
- "eval_runtime": 9.5483,
169
- "eval_samples_per_second": 890.84,
170
- "eval_steps_per_second": 27.858,
171
  "step": 3000
172
  },
173
  {
174
  "epoch": 0.69,
175
  "learning_rate": 3.845113178908467e-05,
176
- "loss": 3.1407,
177
  "step": 3500
178
  },
179
  {
180
  "epoch": 0.79,
181
  "learning_rate": 3.680129347323962e-05,
182
- "loss": 3.073,
183
  "step": 4000
184
  },
185
  {
186
  "epoch": 0.79,
187
  "eval_COMMENT": {
188
- "f1": 0.45161290322580644,
189
- "number": 34,
190
- "precision": 0.5,
191
- "recall": 0.4117647058823529
192
  },
193
  "eval_NAME": {
194
- "f1": 0.6545454545454545,
195
- "number": 26,
196
- "precision": 0.6206896551724138,
197
- "recall": 0.6923076923076923
198
  },
199
  "eval_QTY": {
200
- "f1": 1.0,
201
- "number": 23,
202
- "precision": 1.0,
203
- "recall": 1.0
204
  },
205
  "eval_RANGE_END": {
206
- "f1": 1.0,
207
- "number": 3,
208
- "precision": 1.0,
209
- "recall": 1.0
210
  },
211
  "eval_UNIT": {
212
- "f1": 0.9500000000000001,
213
- "number": 21,
214
- "precision": 1.0,
215
- "recall": 0.9047619047619048
216
- },
217
- "eval_loss": 2.9998066425323486,
218
- "eval_overall_accuracy": 0.7401960784313726,
219
- "eval_overall_f1": 0.736842105263158,
220
- "eval_overall_precision": 0.7549019607843137,
221
- "eval_overall_recall": 0.719626168224299,
222
- "eval_runtime": 8.6908,
223
- "eval_samples_per_second": 978.735,
224
- "eval_steps_per_second": 30.607,
225
  "step": 4000
226
  },
227
  {
228
  "epoch": 0.89,
229
  "learning_rate": 3.515145515739457e-05,
230
- "loss": 3.0814,
231
  "step": 4500
232
  },
233
  {
234
  "epoch": 0.99,
235
  "learning_rate": 3.3501616841549535e-05,
236
- "loss": 2.954,
237
  "step": 5000
238
  },
239
  {
240
  "epoch": 0.99,
241
  "eval_COMMENT": {
242
- "f1": 0.6333333333333334,
243
- "number": 34,
244
- "precision": 0.7307692307692307,
245
- "recall": 0.5588235294117647
246
  },
247
  "eval_NAME": {
248
- "f1": 0.7037037037037038,
249
- "number": 26,
250
- "precision": 0.6785714285714286,
251
- "recall": 0.7307692307692307
252
  },
253
  "eval_QTY": {
254
- "f1": 1.0,
255
- "number": 23,
256
- "precision": 1.0,
257
- "recall": 1.0
258
  },
259
  "eval_RANGE_END": {
260
- "f1": 0.5,
261
- "number": 3,
262
- "precision": 1.0,
263
- "recall": 0.3333333333333333
264
  },
265
  "eval_UNIT": {
266
- "f1": 0.975609756097561,
267
- "number": 21,
268
- "precision": 1.0,
269
- "recall": 0.9523809523809523
270
- },
271
- "eval_loss": 2.8982722759246826,
272
- "eval_overall_accuracy": 0.7941176470588235,
273
- "eval_overall_f1": 0.7999999999999999,
274
- "eval_overall_precision": 0.8367346938775511,
275
- "eval_overall_recall": 0.7663551401869159,
276
- "eval_runtime": 10.4306,
277
- "eval_samples_per_second": 815.482,
278
- "eval_steps_per_second": 25.502,
279
  "step": 5000
280
  },
281
  {
282
  "epoch": 1.09,
283
  "learning_rate": 3.1851778525704485e-05,
284
- "loss": 2.9666,
285
  "step": 5500
286
  },
287
  {
288
  "epoch": 1.19,
289
  "learning_rate": 3.0201940209859435e-05,
290
- "loss": 2.8463,
291
  "step": 6000
292
  },
293
  {
294
  "epoch": 1.19,
295
  "eval_COMMENT": {
296
- "f1": 0.5161290322580646,
297
- "number": 34,
298
- "precision": 0.5714285714285714,
299
- "recall": 0.47058823529411764
300
  },
301
  "eval_NAME": {
302
- "f1": 0.6666666666666666,
303
- "number": 26,
304
- "precision": 0.6428571428571429,
305
- "recall": 0.6923076923076923
306
  },
307
  "eval_QTY": {
308
- "f1": 1.0,
309
- "number": 23,
310
- "precision": 1.0,
311
- "recall": 1.0
312
  },
313
  "eval_RANGE_END": {
314
- "f1": 1.0,
315
- "number": 3,
316
- "precision": 1.0,
317
- "recall": 1.0
318
  },
319
  "eval_UNIT": {
320
- "f1": 0.9500000000000001,
321
- "number": 21,
322
- "precision": 1.0,
323
- "recall": 0.9047619047619048
324
- },
325
- "eval_loss": 2.808798313140869,
326
- "eval_overall_accuracy": 0.7696078431372549,
327
- "eval_overall_f1": 0.7596153846153846,
328
- "eval_overall_precision": 0.7821782178217822,
329
- "eval_overall_recall": 0.7383177570093458,
330
- "eval_runtime": 8.5489,
331
- "eval_samples_per_second": 994.983,
332
- "eval_steps_per_second": 31.115,
333
  "step": 6000
334
  },
335
  {
336
  "epoch": 1.29,
337
  "learning_rate": 2.855210189401439e-05,
338
- "loss": 2.8023,
339
  "step": 6500
340
  },
341
  {
342
  "epoch": 1.39,
343
  "learning_rate": 2.690226357816934e-05,
344
- "loss": 2.7953,
345
  "step": 7000
346
  },
347
  {
348
  "epoch": 1.39,
349
  "eval_COMMENT": {
350
- "f1": 0.5333333333333333,
351
- "number": 34,
352
- "precision": 0.6153846153846154,
353
- "recall": 0.47058823529411764
354
  },
355
  "eval_NAME": {
356
- "f1": 0.6792452830188679,
357
- "number": 26,
358
- "precision": 0.6666666666666666,
359
- "recall": 0.6923076923076923
360
  },
361
  "eval_QTY": {
362
- "f1": 1.0,
363
- "number": 23,
364
- "precision": 1.0,
365
- "recall": 1.0
366
  },
367
  "eval_RANGE_END": {
368
- "f1": 1.0,
369
- "number": 3,
370
- "precision": 1.0,
371
- "recall": 1.0
372
  },
373
  "eval_UNIT": {
374
- "f1": 0.9500000000000001,
375
- "number": 21,
376
- "precision": 1.0,
377
- "recall": 0.9047619047619048
378
- },
379
- "eval_loss": 2.7728922367095947,
380
- "eval_overall_accuracy": 0.7647058823529411,
381
- "eval_overall_f1": 0.7707317073170732,
382
- "eval_overall_precision": 0.8061224489795918,
383
- "eval_overall_recall": 0.7383177570093458,
384
- "eval_runtime": 10.0867,
385
- "eval_samples_per_second": 843.288,
386
- "eval_steps_per_second": 26.371,
387
  "step": 7000
388
  },
389
  {
390
  "epoch": 1.48,
391
  "learning_rate": 2.5252425262324292e-05,
392
- "loss": 2.7846,
393
  "step": 7500
394
  },
395
  {
396
  "epoch": 1.58,
397
  "learning_rate": 2.3602586946479245e-05,
398
- "loss": 2.7561,
399
  "step": 8000
400
  },
401
  {
402
  "epoch": 1.58,
403
  "eval_COMMENT": {
404
- "f1": 0.5901639344262295,
405
- "number": 34,
406
- "precision": 0.6666666666666666,
407
- "recall": 0.5294117647058824
408
  },
409
  "eval_NAME": {
410
- "f1": 0.6545454545454545,
411
- "number": 26,
412
- "precision": 0.6206896551724138,
413
- "recall": 0.6923076923076923
414
  },
415
  "eval_QTY": {
416
- "f1": 1.0,
417
- "number": 23,
418
- "precision": 1.0,
419
- "recall": 1.0
420
  },
421
  "eval_RANGE_END": {
422
- "f1": 0.8,
423
- "number": 3,
424
- "precision": 1.0,
425
- "recall": 0.6666666666666666
426
  },
427
  "eval_UNIT": {
428
- "f1": 0.975609756097561,
429
- "number": 21,
430
- "precision": 1.0,
431
- "recall": 0.9523809523809523
432
- },
433
- "eval_loss": 2.715115785598755,
434
- "eval_overall_accuracy": 0.7696078431372549,
435
- "eval_overall_f1": 0.7788461538461539,
436
- "eval_overall_precision": 0.801980198019802,
437
- "eval_overall_recall": 0.7570093457943925,
438
- "eval_runtime": 10.2877,
439
- "eval_samples_per_second": 826.809,
440
- "eval_steps_per_second": 25.856,
441
  "step": 8000
442
  },
443
  {
444
  "epoch": 1.68,
445
  "learning_rate": 2.19527486306342e-05,
446
- "loss": 2.733,
447
  "step": 8500
448
  },
449
  {
450
  "epoch": 1.78,
451
  "learning_rate": 2.0302910314789152e-05,
452
- "loss": 2.789,
453
  "step": 9000
454
  },
455
  {
456
  "epoch": 1.78,
457
  "eval_COMMENT": {
458
- "f1": 0.5666666666666668,
459
- "number": 34,
460
- "precision": 0.6538461538461539,
461
- "recall": 0.5
462
  },
463
  "eval_NAME": {
464
- "f1": 0.6071428571428571,
465
- "number": 26,
466
- "precision": 0.5666666666666667,
467
- "recall": 0.6538461538461539
468
  },
469
  "eval_QTY": {
470
- "f1": 1.0,
471
- "number": 23,
472
- "precision": 1.0,
473
- "recall": 1.0
474
  },
475
  "eval_RANGE_END": {
476
- "f1": 1.0,
477
- "number": 3,
478
- "precision": 1.0,
479
- "recall": 1.0
480
  },
481
  "eval_UNIT": {
482
- "f1": 0.9500000000000001,
483
- "number": 21,
484
- "precision": 1.0,
485
- "recall": 0.9047619047619048
486
- },
487
- "eval_loss": 2.680718183517456,
488
- "eval_overall_accuracy": 0.7598039215686274,
489
- "eval_overall_f1": 0.7596153846153846,
490
- "eval_overall_precision": 0.7821782178217822,
491
- "eval_overall_recall": 0.7383177570093458,
492
- "eval_runtime": 8.9388,
493
- "eval_samples_per_second": 951.583,
494
- "eval_steps_per_second": 29.758,
495
  "step": 9000
496
  },
497
  {
498
  "epoch": 1.88,
499
  "learning_rate": 1.8653071998944105e-05,
500
- "loss": 2.741,
501
  "step": 9500
502
  },
503
  {
504
  "epoch": 1.98,
505
  "learning_rate": 1.700323368309906e-05,
506
- "loss": 2.6709,
507
  "step": 10000
508
  },
509
  {
510
  "epoch": 1.98,
511
  "eval_COMMENT": {
512
- "f1": 0.5333333333333333,
513
- "number": 34,
514
- "precision": 0.6153846153846154,
515
- "recall": 0.47058823529411764
516
  },
517
  "eval_NAME": {
518
- "f1": 0.6428571428571429,
519
- "number": 26,
520
- "precision": 0.6,
521
- "recall": 0.6923076923076923
522
  },
523
  "eval_QTY": {
524
- "f1": 1.0,
525
- "number": 23,
526
- "precision": 1.0,
527
- "recall": 1.0
528
  },
529
  "eval_RANGE_END": {
530
- "f1": 1.0,
531
- "number": 3,
532
- "precision": 1.0,
533
- "recall": 1.0
534
  },
535
  "eval_UNIT": {
536
- "f1": 0.9500000000000001,
537
- "number": 21,
538
- "precision": 1.0,
539
- "recall": 0.9047619047619048
540
- },
541
- "eval_loss": 2.645069122314453,
542
- "eval_overall_accuracy": 0.7696078431372549,
543
- "eval_overall_f1": 0.7596153846153846,
544
- "eval_overall_precision": 0.7821782178217822,
545
- "eval_overall_recall": 0.7383177570093458,
546
- "eval_runtime": 9.7262,
547
- "eval_samples_per_second": 874.545,
548
- "eval_steps_per_second": 27.349,
549
  "step": 10000
550
  },
551
  {
552
  "epoch": 2.08,
553
  "learning_rate": 1.535339536725401e-05,
554
- "loss": 2.657,
555
  "step": 10500
556
  },
557
  {
558
  "epoch": 2.18,
559
  "learning_rate": 1.3703557051408963e-05,
560
- "loss": 2.5318,
561
  "step": 11000
562
  },
563
  {
564
  "epoch": 2.18,
565
  "eval_COMMENT": {
566
- "f1": 0.49180327868852464,
567
- "number": 34,
568
- "precision": 0.5555555555555556,
569
- "recall": 0.4411764705882353
570
  },
571
  "eval_NAME": {
572
- "f1": 0.6785714285714285,
573
- "number": 26,
574
- "precision": 0.6333333333333333,
575
- "recall": 0.7307692307692307
576
  },
577
  "eval_QTY": {
578
- "f1": 1.0,
579
- "number": 23,
580
- "precision": 1.0,
581
- "recall": 1.0
582
  },
583
  "eval_RANGE_END": {
584
- "f1": 1.0,
585
- "number": 3,
586
- "precision": 1.0,
587
- "recall": 1.0
588
  },
589
  "eval_UNIT": {
590
- "f1": 0.975609756097561,
591
- "number": 21,
592
- "precision": 1.0,
593
- "recall": 0.9523809523809523
594
- },
595
- "eval_loss": 2.623105764389038,
596
- "eval_overall_accuracy": 0.7745098039215687,
597
- "eval_overall_f1": 0.7619047619047619,
598
- "eval_overall_precision": 0.7766990291262136,
599
- "eval_overall_recall": 0.7476635514018691,
600
- "eval_runtime": 10.355,
601
- "eval_samples_per_second": 821.438,
602
- "eval_steps_per_second": 25.688,
603
  "step": 11000
604
  },
605
  {
606
  "epoch": 2.28,
607
  "learning_rate": 1.2053718735563915e-05,
608
- "loss": 2.5907,
609
  "step": 11500
610
  },
611
  {
612
  "epoch": 2.38,
613
  "learning_rate": 1.0403880419718868e-05,
614
- "loss": 2.6576,
615
  "step": 12000
616
  },
617
  {
618
  "epoch": 2.38,
619
  "eval_COMMENT": {
620
- "f1": 0.5999999999999999,
621
- "number": 34,
622
- "precision": 0.6923076923076923,
623
- "recall": 0.5294117647058824
624
  },
625
  "eval_NAME": {
626
- "f1": 0.7169811320754716,
627
- "number": 26,
628
- "precision": 0.7037037037037037,
629
- "recall": 0.7307692307692307
630
  },
631
  "eval_QTY": {
632
- "f1": 1.0,
633
- "number": 23,
634
- "precision": 1.0,
635
- "recall": 1.0
636
  },
637
  "eval_RANGE_END": {
638
- "f1": 1.0,
639
- "number": 3,
640
- "precision": 1.0,
641
- "recall": 1.0
642
  },
643
  "eval_UNIT": {
644
- "f1": 0.975609756097561,
645
- "number": 21,
646
- "precision": 1.0,
647
- "recall": 0.9523809523809523
648
- },
649
- "eval_loss": 2.602302074432373,
650
- "eval_overall_accuracy": 0.7843137254901961,
651
- "eval_overall_f1": 0.8058252427184466,
652
- "eval_overall_precision": 0.8383838383838383,
653
- "eval_overall_recall": 0.7757009345794392,
654
- "eval_runtime": 9.1898,
655
- "eval_samples_per_second": 925.595,
656
- "eval_steps_per_second": 28.945,
657
  "step": 12000
658
  },
659
  {
660
  "epoch": 2.47,
661
  "learning_rate": 8.75404210387382e-06,
662
- "loss": 2.6148,
663
  "step": 12500
664
  },
665
  {
666
  "epoch": 2.57,
667
  "learning_rate": 7.104203788028774e-06,
668
- "loss": 2.5836,
669
  "step": 13000
670
  },
671
  {
672
  "epoch": 2.57,
673
  "eval_COMMENT": {
674
- "f1": 0.576271186440678,
675
- "number": 34,
676
- "precision": 0.68,
677
- "recall": 0.5
678
  },
679
  "eval_NAME": {
680
- "f1": 0.6909090909090909,
681
- "number": 26,
682
- "precision": 0.6551724137931034,
683
- "recall": 0.7307692307692307
684
  },
685
  "eval_QTY": {
686
- "f1": 1.0,
687
- "number": 23,
688
- "precision": 1.0,
689
- "recall": 1.0
690
  },
691
  "eval_RANGE_END": {
692
- "f1": 1.0,
693
- "number": 3,
694
- "precision": 1.0,
695
- "recall": 1.0
696
  },
697
  "eval_UNIT": {
698
- "f1": 0.975609756097561,
699
- "number": 21,
700
- "precision": 1.0,
701
- "recall": 0.9523809523809523
702
- },
703
- "eval_loss": 2.574141025543213,
704
- "eval_overall_accuracy": 0.7892156862745098,
705
- "eval_overall_f1": 0.7922705314009661,
706
- "eval_overall_precision": 0.82,
707
- "eval_overall_recall": 0.7663551401869159,
708
- "eval_runtime": 9.1902,
709
- "eval_samples_per_second": 925.552,
710
- "eval_steps_per_second": 28.944,
711
  "step": 13000
712
  },
713
  {
714
  "epoch": 2.67,
715
  "learning_rate": 5.4543654721837265e-06,
716
- "loss": 2.5413,
717
  "step": 13500
718
  },
719
  {
720
  "epoch": 2.77,
721
  "learning_rate": 3.804527156338679e-06,
722
- "loss": 2.6004,
723
  "step": 14000
724
  },
725
  {
726
  "epoch": 2.77,
727
  "eval_COMMENT": {
728
- "f1": 0.5666666666666668,
729
- "number": 34,
730
- "precision": 0.6538461538461539,
731
- "recall": 0.5
732
  },
733
  "eval_NAME": {
734
- "f1": 0.6428571428571429,
735
- "number": 26,
736
- "precision": 0.6,
737
- "recall": 0.6923076923076923
738
  },
739
  "eval_QTY": {
740
- "f1": 1.0,
741
- "number": 23,
742
- "precision": 1.0,
743
- "recall": 1.0
744
  },
745
  "eval_RANGE_END": {
746
- "f1": 1.0,
747
- "number": 3,
748
- "precision": 1.0,
749
- "recall": 1.0
750
  },
751
  "eval_UNIT": {
752
- "f1": 0.9500000000000001,
753
- "number": 21,
754
- "precision": 1.0,
755
- "recall": 0.9047619047619048
756
- },
757
- "eval_loss": 2.5654454231262207,
758
- "eval_overall_accuracy": 0.7745098039215687,
759
- "eval_overall_f1": 0.7692307692307693,
760
- "eval_overall_precision": 0.7920792079207921,
761
- "eval_overall_recall": 0.7476635514018691,
762
- "eval_runtime": 10.3314,
763
- "eval_samples_per_second": 823.318,
764
- "eval_steps_per_second": 25.747,
765
  "step": 14000
766
  },
767
  {
768
  "epoch": 2.87,
769
  "learning_rate": 2.154688840493632e-06,
770
- "loss": 2.5827,
771
  "step": 14500
772
  },
773
  {
774
  "epoch": 2.97,
775
  "learning_rate": 5.048505246485845e-07,
776
- "loss": 2.5449,
777
  "step": 15000
778
  },
779
  {
780
  "epoch": 2.97,
781
  "eval_COMMENT": {
782
- "f1": 0.6101694915254237,
783
- "number": 34,
784
- "precision": 0.72,
785
- "recall": 0.5294117647058824
786
  },
787
  "eval_NAME": {
788
- "f1": 0.6545454545454545,
789
- "number": 26,
790
- "precision": 0.6206896551724138,
791
- "recall": 0.6923076923076923
792
  },
793
  "eval_QTY": {
794
- "f1": 1.0,
795
- "number": 23,
796
- "precision": 1.0,
797
- "recall": 1.0
798
  },
799
  "eval_RANGE_END": {
800
- "f1": 1.0,
801
- "number": 3,
802
- "precision": 1.0,
803
- "recall": 1.0
804
  },
805
  "eval_UNIT": {
806
- "f1": 0.9500000000000001,
807
- "number": 21,
808
- "precision": 1.0,
809
- "recall": 0.9047619047619048
810
- },
811
- "eval_loss": 2.5585994720458984,
812
- "eval_overall_accuracy": 0.7892156862745098,
813
- "eval_overall_f1": 0.7864077669902914,
814
- "eval_overall_precision": 0.8181818181818182,
815
- "eval_overall_recall": 0.7570093457943925,
816
- "eval_runtime": 9.9625,
817
- "eval_samples_per_second": 853.8,
818
- "eval_steps_per_second": 26.7,
819
  "step": 15000
820
  },
821
  {
822
  "epoch": 3.0,
823
  "step": 15153,
824
- "total_flos": 467272509509796.0,
825
- "train_loss": 3.027484999280129,
826
- "train_runtime": 1025.3838,
827
- "train_samples_per_second": 472.807,
828
- "train_steps_per_second": 14.778
829
  }
830
  ],
831
  "logging_steps": 500,
832
  "max_steps": 15153,
833
  "num_train_epochs": 3,
834
  "save_steps": 500,
835
- "total_flos": 467272509509796.0,
836
  "trial_name": null,
837
  "trial_params": null
838
  }
 
11
  {
12
  "epoch": 0.1,
13
  "learning_rate": 4.835016168415495e-05,
14
+ "loss": 6.7872,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.2,
19
  "learning_rate": 4.670032336830991e-05,
20
+ "loss": 4.174,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.2,
25
  "eval_COMMENT": {
26
+ "f1": 0.5753388429752067,
27
+ "number": 6922,
28
+ "precision": 0.5304157015725954,
29
+ "recall": 0.6285755561976307
30
  },
31
  "eval_NAME": {
32
+ "f1": 0.7866681381745945,
33
+ "number": 8833,
34
+ "precision": 0.7673592421143288,
35
+ "recall": 0.8069738480697385
36
  },
37
  "eval_QTY": {
38
+ "f1": 0.9736805263894722,
39
+ "number": 7092,
40
+ "precision": 0.9667778704475952,
41
+ "recall": 0.9806824591088551
42
  },
43
  "eval_RANGE_END": {
44
  "f1": 0.0,
45
+ "number": 88,
46
  "precision": 0.0,
47
  "recall": 0.0
48
  },
49
  "eval_UNIT": {
50
+ "f1": 0.9428498856997714,
51
+ "number": 5707,
52
+ "precision": 0.9121887287024901,
53
+ "recall": 0.9756439460311898
54
+ },
55
+ "eval_loss": 3.8689863681793213,
56
+ "eval_overall_accuracy": 0.7961931378288045,
57
+ "eval_overall_f1": 0.8077130165567371,
58
+ "eval_overall_precision": 0.7795388113023709,
59
+ "eval_overall_recall": 0.838000139655052,
60
+ "eval_runtime": 10.0032,
61
+ "eval_samples_per_second": 850.33,
62
+ "eval_steps_per_second": 26.592,
63
  "step": 1000
64
  },
65
  {
66
  "epoch": 0.3,
67
  "learning_rate": 4.505048505246486e-05,
68
+ "loss": 3.6785,
69
  "step": 1500
70
  },
71
  {
72
  "epoch": 0.4,
73
  "learning_rate": 4.3400646736619816e-05,
74
+ "loss": 3.5528,
75
  "step": 2000
76
  },
77
  {
78
  "epoch": 0.4,
79
  "eval_COMMENT": {
80
+ "f1": 0.6020648196549921,
81
+ "number": 6922,
82
+ "precision": 0.5496301598663803,
83
+ "recall": 0.6655590869690841
84
  },
85
  "eval_NAME": {
86
+ "f1": 0.7944308852895496,
87
+ "number": 8833,
88
+ "precision": 0.7787928221859707,
89
+ "recall": 0.8107098381070984
90
  },
91
  "eval_QTY": {
92
+ "f1": 0.9785673998871969,
93
+ "number": 7092,
94
+ "precision": 0.9785673998871969,
95
+ "recall": 0.9785673998871969
96
  },
97
  "eval_RANGE_END": {
98
+ "f1": 0.22641509433962262,
99
+ "number": 88,
100
+ "precision": 0.6666666666666666,
101
+ "recall": 0.13636363636363635
102
  },
103
  "eval_UNIT": {
104
+ "f1": 0.9445616403679015,
105
+ "number": 5707,
106
+ "precision": 0.9109700520833334,
107
+ "recall": 0.9807254249167688
108
+ },
109
+ "eval_loss": 3.415388822555542,
110
+ "eval_overall_accuracy": 0.8041807504201522,
111
+ "eval_overall_f1": 0.8177492307433626,
112
+ "eval_overall_precision": 0.788719146313775,
113
+ "eval_overall_recall": 0.8489979750017457,
114
+ "eval_runtime": 10.2481,
115
+ "eval_samples_per_second": 830.011,
116
+ "eval_steps_per_second": 25.956,
117
  "step": 2000
118
  },
119
  {
120
  "epoch": 0.49,
121
  "learning_rate": 4.1750808420774766e-05,
122
+ "loss": 3.3872,
123
  "step": 2500
124
  },
125
  {
126
  "epoch": 0.59,
127
  "learning_rate": 4.010097010492972e-05,
128
+ "loss": 3.3333,
129
  "step": 3000
130
  },
131
  {
132
  "epoch": 0.59,
133
  "eval_COMMENT": {
134
+ "f1": 0.6369560924231453,
135
+ "number": 6922,
136
+ "precision": 0.5850767928407304,
137
+ "recall": 0.6989309448136377
138
  },
139
  "eval_NAME": {
140
+ "f1": 0.7949638301397095,
141
+ "number": 8833,
142
+ "precision": 0.7759810263044415,
143
+ "recall": 0.814898675421714
144
  },
145
  "eval_QTY": {
146
+ "f1": 0.9801268498942918,
147
+ "number": 7092,
148
+ "precision": 0.9797125950972105,
149
+ "recall": 0.9805414551607445
150
  },
151
  "eval_RANGE_END": {
152
+ "f1": 0.6666666666666667,
153
+ "number": 88,
154
+ "precision": 0.6867469879518072,
155
+ "recall": 0.6477272727272727
156
  },
157
  "eval_UNIT": {
158
+ "f1": 0.948932536293766,
159
+ "number": 5707,
160
+ "precision": 0.9255372313843079,
161
+ "recall": 0.9735412651130191
162
+ },
163
+ "eval_loss": 3.1915245056152344,
164
+ "eval_overall_accuracy": 0.8124138451951584,
165
+ "eval_overall_f1": 0.8287884657492717,
166
+ "eval_overall_precision": 0.8006443424777897,
167
+ "eval_overall_recall": 0.8589833112212835,
168
+ "eval_runtime": 11.2414,
169
+ "eval_samples_per_second": 756.666,
170
+ "eval_steps_per_second": 23.662,
171
  "step": 3000
172
  },
173
  {
174
  "epoch": 0.69,
175
  "learning_rate": 3.845113178908467e-05,
176
+ "loss": 3.2244,
177
  "step": 3500
178
  },
179
  {
180
  "epoch": 0.79,
181
  "learning_rate": 3.680129347323962e-05,
182
+ "loss": 3.1122,
183
  "step": 4000
184
  },
185
  {
186
  "epoch": 0.79,
187
  "eval_COMMENT": {
188
+ "f1": 0.6578412740022282,
189
+ "number": 6922,
190
+ "precision": 0.6020151133501259,
191
+ "recall": 0.7250794568043918
192
  },
193
  "eval_NAME": {
194
+ "f1": 0.8043938887030222,
195
+ "number": 8833,
196
+ "precision": 0.7925502692011867,
197
+ "recall": 0.8165968527114231
198
  },
199
  "eval_QTY": {
200
+ "f1": 0.9807353044950956,
201
+ "number": 7092,
202
+ "precision": 0.9816358242689646,
203
+ "recall": 0.9798364354201917
204
  },
205
  "eval_RANGE_END": {
206
+ "f1": 0.6755555555555556,
207
+ "number": 88,
208
+ "precision": 0.5547445255474452,
209
+ "recall": 0.8636363636363636
210
  },
211
  "eval_UNIT": {
212
+ "f1": 0.9479572808950669,
213
+ "number": 5707,
214
+ "precision": 0.9180758496141849,
215
+ "recall": 0.9798493078675311
216
+ },
217
+ "eval_loss": 3.055974245071411,
218
+ "eval_overall_accuracy": 0.8183054176029609,
219
+ "eval_overall_f1": 0.8368498156162123,
220
+ "eval_overall_precision": 0.8082289803220036,
221
+ "eval_overall_recall": 0.8675720969206061,
222
+ "eval_runtime": 13.0303,
223
+ "eval_samples_per_second": 652.785,
224
+ "eval_steps_per_second": 20.414,
225
  "step": 4000
226
  },
227
  {
228
  "epoch": 0.89,
229
  "learning_rate": 3.515145515739457e-05,
230
+ "loss": 3.0919,
231
  "step": 4500
232
  },
233
  {
234
  "epoch": 0.99,
235
  "learning_rate": 3.3501616841549535e-05,
236
+ "loss": 3.074,
237
  "step": 5000
238
  },
239
  {
240
  "epoch": 0.99,
241
  "eval_COMMENT": {
242
+ "f1": 0.6620227729403884,
243
+ "number": 6922,
244
+ "precision": 0.6171328671328671,
245
+ "recall": 0.7139555041895406
246
  },
247
  "eval_NAME": {
248
+ "f1": 0.8096975138896684,
249
+ "number": 8833,
250
+ "precision": 0.8028043623414199,
251
+ "recall": 0.816710064530737
252
  },
253
  "eval_QTY": {
254
+ "f1": 0.9794429242966393,
255
+ "number": 7092,
256
+ "precision": 0.9747242005306521,
257
+ "recall": 0.9842075578116187
258
  },
259
  "eval_RANGE_END": {
260
+ "f1": 0.7076923076923077,
261
+ "number": 88,
262
+ "precision": 0.6448598130841121,
263
+ "recall": 0.7840909090909091
264
  },
265
  "eval_UNIT": {
266
+ "f1": 0.9461453651089815,
267
+ "number": 5707,
268
+ "precision": 0.9168310322156475,
269
+ "recall": 0.9773961801296653
270
+ },
271
+ "eval_loss": 2.949470043182373,
272
+ "eval_overall_accuracy": 0.820892422153823,
273
+ "eval_overall_f1": 0.840272597816505,
274
+ "eval_overall_precision": 0.8166809464179793,
275
+ "eval_overall_recall": 0.8652677885622513,
276
+ "eval_runtime": 11.1611,
277
+ "eval_samples_per_second": 762.109,
278
+ "eval_steps_per_second": 23.833,
279
  "step": 5000
280
  },
281
  {
282
  "epoch": 1.09,
283
  "learning_rate": 3.1851778525704485e-05,
284
+ "loss": 2.9268,
285
  "step": 5500
286
  },
287
  {
288
  "epoch": 1.19,
289
  "learning_rate": 3.0201940209859435e-05,
290
+ "loss": 2.936,
291
  "step": 6000
292
  },
293
  {
294
  "epoch": 1.19,
295
  "eval_COMMENT": {
296
+ "f1": 0.6634152949942423,
297
+ "number": 6922,
298
+ "precision": 0.6245376865195765,
299
+ "recall": 0.7074544929211211
300
  },
301
  "eval_NAME": {
302
+ "f1": 0.8093132590809874,
303
+ "number": 8833,
304
+ "precision": 0.8003099402258136,
305
+ "recall": 0.81852145363976
306
  },
307
  "eval_QTY": {
308
+ "f1": 0.9790689534476724,
309
+ "number": 7092,
310
+ "precision": 0.9721951897678298,
311
+ "recall": 0.9860406091370558
312
  },
313
  "eval_RANGE_END": {
314
+ "f1": 0.7348837209302326,
315
+ "number": 88,
316
+ "precision": 0.6220472440944882,
317
+ "recall": 0.8977272727272727
318
  },
319
  "eval_UNIT": {
320
+ "f1": 0.9469594594594595,
321
+ "number": 5707,
322
+ "precision": 0.9140714169248328,
323
+ "recall": 0.9823024356053969
324
+ },
325
+ "eval_loss": 2.889272451400757,
326
+ "eval_overall_accuracy": 0.8216855184394887,
327
+ "eval_overall_f1": 0.8412752246905207,
328
+ "eval_overall_precision": 0.8178910577683989,
329
+ "eval_overall_recall": 0.8660358913483696,
330
+ "eval_runtime": 12.795,
331
+ "eval_samples_per_second": 664.793,
332
+ "eval_steps_per_second": 20.789,
333
  "step": 6000
334
  },
335
  {
336
  "epoch": 1.29,
337
  "learning_rate": 2.855210189401439e-05,
338
+ "loss": 2.8799,
339
  "step": 6500
340
  },
341
  {
342
  "epoch": 1.39,
343
  "learning_rate": 2.690226357816934e-05,
344
+ "loss": 2.7662,
345
  "step": 7000
346
  },
347
  {
348
  "epoch": 1.39,
349
  "eval_COMMENT": {
350
+ "f1": 0.6726807593914097,
351
+ "number": 6922,
352
+ "precision": 0.6298537569339385,
353
+ "recall": 0.7217567177116441
354
  },
355
  "eval_NAME": {
356
+ "f1": 0.8074248541947062,
357
+ "number": 8833,
358
+ "precision": 0.7999777753083676,
359
+ "recall": 0.8150118872410279
360
  },
361
  "eval_QTY": {
362
+ "f1": 0.9814136862855534,
363
+ "number": 7092,
364
+ "precision": 0.9800337457817773,
365
+ "recall": 0.9827975183305132
366
  },
367
  "eval_RANGE_END": {
368
+ "f1": 0.7358490566037735,
369
+ "number": 88,
370
+ "precision": 0.6290322580645161,
371
+ "recall": 0.8863636363636364
372
  },
373
  "eval_UNIT": {
374
+ "f1": 0.9479758974794195,
375
+ "number": 5707,
376
+ "precision": 0.9191902567478605,
377
+ "recall": 0.9786227439985982
378
+ },
379
+ "eval_loss": 2.862208366394043,
380
+ "eval_overall_accuracy": 0.8235360764393753,
381
+ "eval_overall_f1": 0.8432707820327757,
382
+ "eval_overall_precision": 0.8209503025493503,
383
+ "eval_overall_recall": 0.8668389078974932,
384
+ "eval_runtime": 9.9331,
385
+ "eval_samples_per_second": 856.327,
386
+ "eval_steps_per_second": 26.779,
387
  "step": 7000
388
  },
389
  {
390
  "epoch": 1.48,
391
  "learning_rate": 2.5252425262324292e-05,
392
+ "loss": 2.8259,
393
  "step": 7500
394
  },
395
  {
396
  "epoch": 1.58,
397
  "learning_rate": 2.3602586946479245e-05,
398
+ "loss": 2.7839,
399
  "step": 8000
400
  },
401
  {
402
  "epoch": 1.58,
403
  "eval_COMMENT": {
404
+ "f1": 0.6755974419387412,
405
+ "number": 6922,
406
+ "precision": 0.6325475860330266,
407
+ "recall": 0.7249349898873158
408
  },
409
  "eval_NAME": {
410
+ "f1": 0.8115015974440895,
411
+ "number": 8833,
412
+ "precision": 0.8036190053285968,
413
+ "recall": 0.8195403600135854
414
  },
415
  "eval_QTY": {
416
+ "f1": 0.9806342969407802,
417
+ "number": 7092,
418
+ "precision": 0.975977653631285,
419
+ "recall": 0.9853355893965031
420
  },
421
  "eval_RANGE_END": {
422
+ "f1": 0.7389162561576356,
423
+ "number": 88,
424
+ "precision": 0.6521739130434783,
425
+ "recall": 0.8522727272727273
426
  },
427
  "eval_UNIT": {
428
+ "f1": 0.9483591961332994,
429
+ "number": 5707,
430
+ "precision": 0.9188301018731515,
431
+ "recall": 0.9798493078675311
432
+ },
433
+ "eval_loss": 2.780142307281494,
434
+ "eval_overall_accuracy": 0.8241969900107634,
435
+ "eval_overall_f1": 0.8452768729641694,
436
+ "eval_overall_precision": 0.8221239522143753,
437
+ "eval_overall_recall": 0.8697716639899449,
438
+ "eval_runtime": 15.1544,
439
+ "eval_samples_per_second": 561.289,
440
+ "eval_steps_per_second": 17.553,
441
  "step": 8000
442
  },
443
  {
444
  "epoch": 1.68,
445
  "learning_rate": 2.19527486306342e-05,
446
+ "loss": 2.7185,
447
  "step": 8500
448
  },
449
  {
450
  "epoch": 1.78,
451
  "learning_rate": 2.0302910314789152e-05,
452
+ "loss": 2.7221,
453
  "step": 9000
454
  },
455
  {
456
  "epoch": 1.78,
457
  "eval_COMMENT": {
458
+ "f1": 0.690339005761758,
459
+ "number": 6922,
460
+ "precision": 0.6436781609195402,
461
+ "recall": 0.7442935567754985
462
  },
463
  "eval_NAME": {
464
+ "f1": 0.8162713392303792,
465
+ "number": 8833,
466
+ "precision": 0.8124719605204127,
467
+ "recall": 0.8201064191101551
468
  },
469
  "eval_QTY": {
470
+ "f1": 0.9827975183305132,
471
+ "number": 7092,
472
+ "precision": 0.9827975183305132,
473
+ "recall": 0.9827975183305132
474
  },
475
  "eval_RANGE_END": {
476
+ "f1": 0.7336683417085428,
477
+ "number": 88,
478
+ "precision": 0.6576576576576577,
479
+ "recall": 0.8295454545454546
480
  },
481
  "eval_UNIT": {
482
+ "f1": 0.9494640122511486,
483
+ "number": 5707,
484
+ "precision": 0.9227716222920457,
485
+ "recall": 0.9777466269493604
486
+ },
487
+ "eval_loss": 2.7520177364349365,
488
+ "eval_overall_accuracy": 0.8285401363370282,
489
+ "eval_overall_f1": 0.850812759300823,
490
+ "eval_overall_precision": 0.8292674842558834,
491
+ "eval_overall_recall": 0.8735074366315202,
492
+ "eval_runtime": 11.6589,
493
+ "eval_samples_per_second": 729.574,
494
+ "eval_steps_per_second": 22.815,
495
  "step": 9000
496
  },
497
  {
498
  "epoch": 1.88,
499
  "learning_rate": 1.8653071998944105e-05,
500
+ "loss": 2.8004,
501
  "step": 9500
502
  },
503
  {
504
  "epoch": 1.98,
505
  "learning_rate": 1.700323368309906e-05,
506
+ "loss": 2.7156,
507
  "step": 10000
508
  },
509
  {
510
  "epoch": 1.98,
511
  "eval_COMMENT": {
512
+ "f1": 0.6864177489177489,
513
+ "number": 6922,
514
+ "precision": 0.6453828542355635,
515
+ "recall": 0.7330251372435712
516
  },
517
  "eval_NAME": {
518
+ "f1": 0.8142527960433878,
519
+ "number": 8833,
520
+ "precision": 0.8084821428571428,
521
+ "recall": 0.8201064191101551
522
  },
523
  "eval_QTY": {
524
+ "f1": 0.9826589595375722,
525
+ "number": 7092,
526
+ "precision": 0.9825204398082887,
527
+ "recall": 0.9827975183305132
528
  },
529
  "eval_RANGE_END": {
530
+ "f1": 0.7219512195121951,
531
+ "number": 88,
532
+ "precision": 0.6324786324786325,
533
+ "recall": 0.8409090909090909
534
  },
535
  "eval_UNIT": {
536
+ "f1": 0.9496769806188372,
537
+ "number": 5707,
538
+ "precision": 0.9222387320455672,
539
+ "recall": 0.9787979674084458
540
+ },
541
+ "eval_loss": 2.7235608100891113,
542
+ "eval_overall_accuracy": 0.8264629793983799,
543
+ "eval_overall_f1": 0.8495539058775454,
544
+ "eval_overall_precision": 0.8291126620139582,
545
+ "eval_overall_recall": 0.8710285594581384,
546
+ "eval_runtime": 10.9292,
547
+ "eval_samples_per_second": 778.285,
548
+ "eval_steps_per_second": 24.339,
549
  "step": 10000
550
  },
551
  {
552
  "epoch": 2.08,
553
  "learning_rate": 1.535339536725401e-05,
554
+ "loss": 2.6908,
555
  "step": 10500
556
  },
557
  {
558
  "epoch": 2.18,
559
  "learning_rate": 1.3703557051408963e-05,
560
+ "loss": 2.6804,
561
  "step": 11000
562
  },
563
  {
564
  "epoch": 2.18,
565
  "eval_COMMENT": {
566
+ "f1": 0.6895951823352291,
567
+ "number": 6922,
568
+ "precision": 0.6422784494578088,
569
+ "recall": 0.7444380236925744
570
  },
571
  "eval_NAME": {
572
+ "f1": 0.8174098858460327,
573
+ "number": 8833,
574
+ "precision": 0.8120670391061453,
575
+ "recall": 0.8228235027736895
576
  },
577
  "eval_QTY": {
578
+ "f1": 0.9829240756421113,
579
+ "number": 7092,
580
+ "precision": 0.9837570621468926,
581
+ "recall": 0.9820924985899605
582
  },
583
  "eval_RANGE_END": {
584
+ "f1": 0.7281553398058253,
585
+ "number": 88,
586
+ "precision": 0.635593220338983,
587
+ "recall": 0.8522727272727273
588
  },
589
  "eval_UNIT": {
590
+ "f1": 0.9503232391970058,
591
+ "number": 5707,
592
+ "precision": 0.9234584228798148,
593
+ "recall": 0.9787979674084458
594
+ },
595
+ "eval_loss": 2.692896604537964,
596
+ "eval_overall_accuracy": 0.8278792227656401,
597
+ "eval_overall_f1": 0.8510414189120316,
598
+ "eval_overall_precision": 0.8288219722038385,
599
+ "eval_overall_recall": 0.8744850219956707,
600
+ "eval_runtime": 9.5517,
601
+ "eval_samples_per_second": 890.518,
602
+ "eval_steps_per_second": 27.848,
603
  "step": 11000
604
  },
605
  {
606
  "epoch": 2.28,
607
  "learning_rate": 1.2053718735563915e-05,
608
+ "loss": 2.5859,
609
  "step": 11500
610
  },
611
  {
612
  "epoch": 2.38,
613
  "learning_rate": 1.0403880419718868e-05,
614
+ "loss": 2.6121,
615
  "step": 12000
616
  },
617
  {
618
  "epoch": 2.38,
619
  "eval_COMMENT": {
620
+ "f1": 0.691554715452643,
621
+ "number": 6922,
622
+ "precision": 0.6490939044481054,
623
+ "recall": 0.7399595492632187
624
  },
625
  "eval_NAME": {
626
+ "f1": 0.8156028368794326,
627
+ "number": 8833,
628
+ "precision": 0.811037725288257,
629
+ "recall": 0.820219630929469
630
  },
631
  "eval_QTY": {
632
+ "f1": 0.9826362038664324,
633
+ "number": 7092,
634
+ "precision": 0.9798121407542408,
635
+ "recall": 0.9854765933446137
636
  },
637
  "eval_RANGE_END": {
638
+ "f1": 0.7499999999999999,
639
+ "number": 88,
640
+ "precision": 0.6328125,
641
+ "recall": 0.9204545454545454
642
  },
643
  "eval_UNIT": {
644
+ "f1": 0.9497709146444936,
645
+ "number": 5707,
646
+ "precision": 0.9207106431978944,
647
+ "recall": 0.9807254249167688
648
+ },
649
+ "eval_loss": 2.669058322906494,
650
+ "eval_overall_accuracy": 0.8276526238268784,
651
+ "eval_overall_f1": 0.8514097200965888,
652
+ "eval_overall_precision": 0.8299297175440923,
653
+ "eval_overall_recall": 0.8740311430766008,
654
+ "eval_runtime": 10.4859,
655
+ "eval_samples_per_second": 811.181,
656
+ "eval_steps_per_second": 25.367,
657
  "step": 12000
658
  },
659
  {
660
  "epoch": 2.47,
661
  "learning_rate": 8.75404210387382e-06,
662
+ "loss": 2.6392,
663
  "step": 12500
664
  },
665
  {
666
  "epoch": 2.57,
667
  "learning_rate": 7.104203788028774e-06,
668
+ "loss": 2.553,
669
  "step": 13000
670
  },
671
  {
672
  "epoch": 2.57,
673
  "eval_COMMENT": {
674
+ "f1": 0.6915875260995488,
675
+ "number": 6922,
676
+ "precision": 0.6478233438485804,
677
+ "recall": 0.7416931522681306
678
  },
679
  "eval_NAME": {
680
+ "f1": 0.8171171171171171,
681
+ "number": 8833,
682
+ "precision": 0.8128150554497592,
683
+ "recall": 0.8214649609419223
684
  },
685
  "eval_QTY": {
686
+ "f1": 0.9832299887260428,
687
+ "number": 7092,
688
+ "precision": 0.9826760563380281,
689
+ "recall": 0.983784545967287
690
  },
691
  "eval_RANGE_END": {
692
+ "f1": 0.742857142857143,
693
+ "number": 88,
694
+ "precision": 0.639344262295082,
695
+ "recall": 0.8863636363636364
696
  },
697
  "eval_UNIT": {
698
+ "f1": 0.9500635862653668,
699
+ "number": 5707,
700
+ "precision": 0.920335085413929,
701
+ "recall": 0.9817767653758542
702
+ },
703
+ "eval_loss": 2.6652376651763916,
704
+ "eval_overall_accuracy": 0.8286912022962026,
705
+ "eval_overall_f1": 0.8519148357254608,
706
+ "eval_overall_precision": 0.8304489092235263,
707
+ "eval_overall_recall": 0.874519935758676,
708
+ "eval_runtime": 11.1046,
709
+ "eval_samples_per_second": 765.987,
710
+ "eval_steps_per_second": 23.954,
711
  "step": 13000
712
  },
713
  {
714
  "epoch": 2.67,
715
  "learning_rate": 5.4543654721837265e-06,
716
+ "loss": 2.5782,
717
  "step": 13500
718
  },
719
  {
720
  "epoch": 2.77,
721
  "learning_rate": 3.804527156338679e-06,
722
+ "loss": 2.5781,
723
  "step": 14000
724
  },
725
  {
726
  "epoch": 2.77,
727
  "eval_COMMENT": {
728
+ "f1": 0.6930304873926858,
729
+ "number": 6922,
730
+ "precision": 0.6512514292974209,
731
+ "recall": 0.7405374169315226
732
  },
733
  "eval_NAME": {
734
+ "f1": 0.8163609767075503,
735
+ "number": 8833,
736
+ "precision": 0.8114304887596465,
737
+ "recall": 0.8213517491226084
738
  },
739
  "eval_QTY": {
740
+ "f1": 0.9833638798815735,
741
+ "number": 7092,
742
+ "precision": 0.9832252607837609,
743
+ "recall": 0.983502538071066
744
  },
745
  "eval_RANGE_END": {
746
+ "f1": 0.7450980392156864,
747
+ "number": 88,
748
+ "precision": 0.6551724137931034,
749
+ "recall": 0.8636363636363636
750
  },
751
  "eval_UNIT": {
752
+ "f1": 0.9503184713375797,
753
+ "number": 5707,
754
+ "precision": 0.9220500988793672,
755
+ "recall": 0.9803749780970737
756
+ },
757
+ "eval_loss": 2.6431026458740234,
758
+ "eval_overall_accuracy": 0.8285967860717186,
759
+ "eval_overall_f1": 0.8522440918068515,
760
+ "eval_overall_precision": 0.8317381189764042,
761
+ "eval_overall_recall": 0.8737867467355631,
762
+ "eval_runtime": 10.2487,
763
+ "eval_samples_per_second": 829.962,
764
+ "eval_steps_per_second": 25.955,
765
  "step": 14000
766
  },
767
  {
768
  "epoch": 2.87,
769
  "learning_rate": 2.154688840493632e-06,
770
+ "loss": 2.5714,
771
  "step": 14500
772
  },
773
  {
774
  "epoch": 2.97,
775
  "learning_rate": 5.048505246485845e-07,
776
+ "loss": 2.5928,
777
  "step": 15000
778
  },
779
  {
780
  "epoch": 2.97,
781
  "eval_COMMENT": {
782
+ "f1": 0.6959701997968167,
783
+ "number": 6922,
784
+ "precision": 0.6551064643631264,
785
+ "recall": 0.7422710199364345
786
  },
787
  "eval_NAME": {
788
+ "f1": 0.8171788810086682,
789
+ "number": 8833,
790
+ "precision": 0.8126049479458188,
791
+ "recall": 0.8218045963998641
792
  },
793
  "eval_QTY": {
794
+ "f1": 0.9831607130275488,
795
+ "number": 7092,
796
+ "precision": 0.9825376707505985,
797
+ "recall": 0.983784545967287
798
  },
799
  "eval_RANGE_END": {
800
+ "f1": 0.7439613526570048,
801
+ "number": 88,
802
+ "precision": 0.6470588235294118,
803
+ "recall": 0.875
804
  },
805
  "eval_UNIT": {
806
+ "f1": 0.9497833290848839,
807
+ "number": 5707,
808
+ "precision": 0.9219729462223688,
809
+ "recall": 0.9793236376379885
810
+ },
811
+ "eval_loss": 2.639442205429077,
812
+ "eval_overall_accuracy": 0.8289178012349642,
813
+ "eval_overall_f1": 0.8531516183986372,
814
+ "eval_overall_precision": 0.8330560915563244,
815
+ "eval_overall_recall": 0.874240625654633,
816
+ "eval_runtime": 9.6239,
817
+ "eval_samples_per_second": 883.843,
818
+ "eval_steps_per_second": 27.64,
819
  "step": 15000
820
  },
821
  {
822
  "epoch": 3.0,
823
  "step": 15153,
824
+ "total_flos": 466306211114724.0,
825
+ "train_loss": 3.047607653480721,
826
+ "train_runtime": 1056.0535,
827
+ "train_samples_per_second": 459.076,
828
+ "train_steps_per_second": 14.349
829
  }
830
  ],
831
  "logging_steps": 500,
832
  "max_steps": 15153,
833
  "num_train_epochs": 3,
834
  "save_steps": 500,
835
+ "total_flos": 466306211114724.0,
836
  "trial_name": null,
837
  "trial_params": null
838
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:823747ab7556f15a23f379495025c4b71556b054ec3c8b4024c477c4081cb53c
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:445f885f648d22fc4ab9f3caa1ac96f712e52c6cede27f3855cfdc76d2db8dfa
3
  size 4155
validation_results.json CHANGED
@@ -1,41 +1,41 @@
1
  {
2
  "epoch": 3.0,
3
  "eval_COMMENT": {
4
- "f1": 0.6101694915254237,
5
- "number": 34,
6
- "precision": 0.72,
7
- "recall": 0.5294117647058824
8
  },
9
  "eval_NAME": {
10
- "f1": 0.6545454545454545,
11
- "number": 26,
12
- "precision": 0.6206896551724138,
13
- "recall": 0.6923076923076923
14
  },
15
  "eval_QTY": {
16
- "f1": 1.0,
17
- "number": 23,
18
- "precision": 1.0,
19
- "recall": 1.0
20
  },
21
  "eval_RANGE_END": {
22
- "f1": 1.0,
23
- "number": 3,
24
- "precision": 1.0,
25
- "recall": 1.0
26
  },
27
  "eval_UNIT": {
28
- "f1": 0.9500000000000001,
29
- "number": 21,
30
- "precision": 1.0,
31
- "recall": 0.9047619047619048
32
  },
33
- "eval_loss": 2.5587010383605957,
34
- "eval_overall_accuracy": 0.7892156862745098,
35
- "eval_overall_f1": 0.7864077669902914,
36
- "eval_overall_precision": 0.8181818181818182,
37
- "eval_overall_recall": 0.7570093457943925,
38
- "eval_runtime": 10.3156,
39
- "eval_samples_per_second": 824.575,
40
- "eval_steps_per_second": 25.786
41
  }
 
1
  {
2
  "epoch": 3.0,
3
  "eval_COMMENT": {
4
+ "f1": 0.6959761549925485,
5
+ "number": 6922,
6
+ "precision": 0.6552295918367347,
7
+ "recall": 0.7421265530193586
8
  },
9
  "eval_NAME": {
10
+ "f1": 0.8171377097173741,
11
+ "number": 8833,
12
+ "precision": 0.8127449882405644,
13
+ "recall": 0.8215781727612362
14
  },
15
  "eval_QTY": {
16
+ "f1": 0.9830914470903199,
17
+ "number": 7092,
18
+ "precision": 0.9823993241340467,
19
+ "recall": 0.983784545967287
20
  },
21
  "eval_RANGE_END": {
22
+ "f1": 0.75,
23
+ "number": 88,
24
+ "precision": 0.65,
25
+ "recall": 0.8863636363636364
26
  },
27
  "eval_UNIT": {
28
+ "f1": 0.9497026338147834,
29
+ "number": 5707,
30
+ "precision": 0.9218208807521029,
31
+ "recall": 0.9793236376379885
32
  },
33
+ "eval_loss": 2.639784336090088,
34
+ "eval_overall_accuracy": 0.8287289687859962,
35
+ "eval_overall_f1": 0.8531416110126756,
36
+ "eval_overall_precision": 0.8331004192453584,
37
+ "eval_overall_recall": 0.8741707981286223,
38
+ "eval_runtime": 13.2958,
39
+ "eval_samples_per_second": 639.753,
40
+ "eval_steps_per_second": 20.006
41
  }