blogpost-fineweb-v1 / data /plots /wet_comparison /winogrande_acc_norm.json
hynky's picture
hynky HF staff
new plotting code (JIT)
0932e7b
raw
history blame
1.27 kB
{"data":{"ind_minhash-CC-MAIN-2019-18":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.4960000067949295,0.5010000020265579,0.5055000185966492,0.5119999945163727,0.5230000019073486,0.5149999856948853,0.5180000066757202,0.5145000219345093,0.5139999985694885,0.5200000107288361,0.5140000283718109,0.5220000147819519,0.5195000171661377,0.5194999873638153],"label":"Extracted from WARC"},"wet-extraction-2019-18":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.4970000088214874,0.49299998581409454,0.494499996304512,0.4989999979734421,0.502000018954277,0.5115000009536743,0.5119999945163727,0.5185000002384186,0.5119999945163727,0.5090000033378601,0.5175000131130219,0.5139999985694885,0.5074999928474426,0.5164999961853027,0.5115000009536743],"label":"WET data"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"title":{"text":"WET data is worse than data extracted from WARC"}}}