{"data":{"ind_minhash-CC-MAIN-2019-18":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.2915000021457672,0.33500000834465027,0.35800001025199885,0.37450000643730164,0.3859999924898147,0.3959999978542328,0.4035000056028366,0.4220000058412552,0.4294999986886978,0.43400000035762787,0.44099999964237213,0.4424999952316284,0.44449999928474426,0.4494999945163727],"label":"Extracted from WARC"},"wet-extraction-2019-18":{"x":[0.0,2.0971520000000003,4.194304000000001,6.291456,8.388608000000001,10.48576,12.582912,14.680064000000002,16.777216000000003,18.874368,20.97152,23.068672000000003,25.165824,27.262976000000002,28.311552000000002],"y":[0.257999986410141,0.28650000691413874,0.31299999356269836,0.3305000066757202,0.3569999933242798,0.3710000067949295,0.3879999965429306,0.3854999989271164,0.39199998974800104,0.4055000096559524,0.4064999967813492,0.4065000116825104,0.4120000004768371,0.41700001060962677,0.4175000041723251],"label":"WET data"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"title":{"text":"WET data is worse than data extracted from WARC"}}} |