File size: 2,376 Bytes
43ec909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
{
    "data": {
        "x": [
            0.0,
            10.48576,
            20.97152,
            31.45728,
            41.94304,
            52.4288,
            62.91456,
            73.40032000000001,
            83.88608,
            94.37184,
            104.8576,
            115.34336,
            125.82912,
            136.31488000000002,
            146.80064000000002,
            157.28640000000001,
            167.77216,
            178.25792,
            188.74368,
            199.22944,
            209.7152,
            220.20096,
            230.68672,
            241.17248,
            251.65824,
            262.144,
            272.62976000000003,
            283.11552,
            293.60128000000003,
            304.08704,
            314.57280000000003,
            325.05856,
            335.54432,
            346.03008
        ],
        "y": [
            null,
            null,
            null,
            null,
            0.40171657912433145,
            0.42239717617630956,
            0.43069435879588125,
            0.4351756565272808,
            0.43896834924817085,
            0.4424236983060837,
            0.4452380746603012,
            0.44781614691019056,
            0.45025914907455444,
            0.4521562337875366,
            0.4531575210392475,
            0.45397180542349813,
            0.4548915736377239,
            0.4563755728304386,
            0.45696389451622965,
            0.458776044100523,
            0.4609984554350376,
            0.4624955080449581,
            0.4629682660102844,
            0.4638278633356093,
            0.4645016059279441,
            0.4646032989025116,
            0.46489162668585776,
            0.4657001614570618,
            0.46593172624707224,
            0.4667894795536995,
            0.4675446107983589,
            0.46748293563723564,
            0.4683080866932869,
            0.46885923445224764
        ],
        "label": "FineWeb full MinHash"
    },
    "layout": {
        "xaxis": {
            "title": {
                "text": "Training tokens (billions)"
            }
        },
        "yaxis": {
            "title": {
                "text": "Agg Score"
            },
            "range": [
                0.35,
                0.5
            ]
        },
        "title": {
            "text": "Dataset Ablations"
        }
    }
}