{"data":{"ind_minhash-CC-MAIN-2019-18":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2509999871253967,0.2957500070333481,0.32750000059604645,0.3479999899864197,0.3422500044107437,0.3535000085830688,0.35199999809265137,0.3564999997615814,0.36150000989437103,0.36275000870227814,0.36924999952316284,0.3685000091791153,0.37325000762939453,0.3764999955892563,0.3779999911785126],"label":"Extracted from WARC"},"wet-extraction-2019-18":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.2512499988079071,0.2689999938011169,0.304749995470047,0.32025000452995295,0.33400000631809235,0.3375000059604645,0.3384999930858612,0.346000000834465,0.34949998557567596,0.3512499928474426,0.3535000085830688,0.3577500134706497,0.35724999010562897,0.35950000584125513,0.35875000059604645],"label":"WET data"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"title":{"text":"WET data is worse than data extracted from WARC"}}} |