{"data":{"big-run-fineweb-cross-dedup-fixed":{"x":[0.0,10.48576,20.97152,31.45728,41.94304,52.4288,62.91456,73.40032000000001,83.88608,94.37184,104.8576,115.34336,125.82912,136.31488000000002,146.80064000000002,157.28640000000001,167.77216,178.25792,188.74368,199.22944,209.7152,220.20096,230.68672,241.17248,251.65824,262.144,272.62976000000003,283.11552,293.60128000000003,304.08704,314.57280000000003,325.05856,335.54432,346.03008],"y":[null,null,null,null,0.40171657912433145,0.42239717617630956,0.43069435879588125,0.4351756565272808,0.43896834924817085,0.4424236983060837,0.4452380746603012,0.44781614691019056,0.45025914907455444,0.4521562337875366,0.4531575210392475,0.45397180542349813,0.4548915736377239,0.4563755728304386,0.45696389451622965,0.458776044100523,0.4609984554350376,0.4624955080449581,0.4629682660102844,0.4638278633356093,0.4645016059279441,0.4646032989025116,0.46489162668585776,0.4657001614570618,0.46593172624707224,0.4667894795536995,0.4675446107983589,0.46748293563723564,0.4683080866932869,0.46885923445224764],"label":"FineWeb full MinHash"},"big-run-refinedweb":{"x":[0.0,10.48576,20.97152,28.311552000000002,31.45728,41.94304,52.4288,62.91456,73.40032000000001,83.88608,94.37184,104.8576,115.34336,125.82912,136.31488000000002,146.80064000000002,157.28640000000001,167.77216,178.25792,188.74368,199.22944,209.7152,220.20096,230.68672,241.17248,251.65824,262.144,272.62976000000003,283.11552,293.60128000000003,304.08704,314.57280000000003,325.05856,335.54432,346.03008],"y":[null,null,null,null,0.40424661971628667,0.42596163749694826,0.43559565395116806,0.4419388733804226,0.4472432412207127,0.4522114463150501,0.45583397448062896,0.45813767313957215,0.460252707451582,0.4618991769850254,0.46210767328739166,0.46468816623091697,0.46640462651848785,0.46798615977168073,0.4687947325408458,0.4707141913473606,0.47183807417750356,0.4731586426496506,0.474202574789524,0.47580953985452645,0.4768182456493378,0.47721000015735626,0.477897260338068,0.47868331149220467,0.4798942424356937,0.48083210438489904,0.48233432918786995,0.4825453333556652,0.48372062146663664,0.48404486328363416,0.48417936712503434],"label":"RefinedWeb"},"big-run-sampled_full_filtered_no_dedup":{"x":[0.0,10.48576,20.97152,28.311552000000002,31.45728,41.94304,52.4288,62.91456,73.40032000000001,83.88608,94.37184,104.8576,115.34336,125.82912,136.31488000000002,146.80064000000002,157.28640000000001,167.77216,178.25792,188.74368,199.22944,209.7152,220.20096,230.68672,241.17248,251.65824,262.144,272.62976000000003,283.11552,293.60128000000003,304.08704,314.57280000000003,325.05856,335.54432,346.03008],"y":[null,null,null,null,0.39733172245323656,0.4170659720897675,0.42569294571876515,0.42934197112917893,0.4318342722952365,0.43489449843764305,0.43767731785774233,0.43933030366897585,0.4432003878057003,0.44580490812659257,0.44852474182844154,0.4508663788437842,0.45200284123420714,0.45270049944519997,0.45411895886063575,0.45437362268567083,0.4551906920969486,0.45563211515545843,0.4572733923792839,0.45865254402160643,0.4608928956091404,0.46221072375774386,0.464424304664135,0.4650039754807949,0.465817741304636,0.46602572202682496,0.4663869492709637,0.466600227355957,0.4675856366753578,0.4670651629567145,0.46774301379919053],"label":"FineWeb filtered only"}},"layout":{"xaxis":{"title":{"text":"Training tokens (billions)"}},"yaxis":{"title":{"text":"Agg Score"},"range":[0.35,0.5]},"title":{"text":"Dataset Ablations"}}}