blogpost-fineweb-v1
/
assets
/data
/plots
/dededup_difference
/big-run-fineweb-cross-dedup-fixed.json
{ | |
"data": { | |
"x": [ | |
0.0, | |
10.48576, | |
20.97152, | |
31.45728, | |
41.94304, | |
52.4288, | |
62.91456, | |
73.40032000000001, | |
83.88608, | |
94.37184, | |
104.8576, | |
115.34336, | |
125.82912, | |
136.31488000000002, | |
146.80064000000002, | |
157.28640000000001, | |
167.77216, | |
178.25792, | |
188.74368, | |
199.22944, | |
209.7152, | |
220.20096, | |
230.68672, | |
241.17248, | |
251.65824, | |
262.144, | |
272.62976000000003, | |
283.11552, | |
293.60128000000003, | |
304.08704, | |
314.57280000000003, | |
325.05856, | |
335.54432, | |
346.03008 | |
], | |
"y": [ | |
null, | |
null, | |
null, | |
null, | |
0.40171657912433145, | |
0.42239717617630956, | |
0.43069435879588125, | |
0.4351756565272808, | |
0.43896834924817085, | |
0.4424236983060837, | |
0.4452380746603012, | |
0.44781614691019056, | |
0.45025914907455444, | |
0.4521562337875366, | |
0.4531575210392475, | |
0.45397180542349813, | |
0.4548915736377239, | |
0.4563755728304386, | |
0.45696389451622965, | |
0.458776044100523, | |
0.4609984554350376, | |
0.4624955080449581, | |
0.4629682660102844, | |
0.4638278633356093, | |
0.4645016059279441, | |
0.4646032989025116, | |
0.46489162668585776, | |
0.4657001614570618, | |
0.46593172624707224, | |
0.4667894795536995, | |
0.4675446107983589, | |
0.46748293563723564, | |
0.4683080866932869, | |
0.46885923445224764 | |
], | |
"label": "FineWeb full MinHash" | |
}, | |
"layout": { | |
"xaxis": { | |
"title": { | |
"text": "Training tokens (billions)" | |
} | |
}, | |
"yaxis": { | |
"title": { | |
"text": "Agg Score" | |
}, | |
"range": [ | |
0.35, | |
0.5 | |
] | |
}, | |
"title": { | |
"text": "Dataset Ablations" | |
} | |
} | |
} |