{"data": {"1": {"x": ["1B", "10B", "100B", "350B", "1T"], "y": [0.994974, 0.9515081, 0.60887281, 0.1741474885714285, 0.006232416], "label": "1"}, "2": {"x": ["1B", "10B", "100B", "350B", "1T"], "y": [0.005008, 0.047331, 0.30282154, 0.3071204342857143, 0.032470074], "label": "2"}, "3": {"x": ["1B", "10B", "100B", "350B", "1T"], "y": [1.8e-05, 0.0011439, 0.0745482, 0.2680183371428571, 0.083742993], "label": "3"}, "4-8": {"x": ["1B", "10B", "100B", "350B", "1T"], "y": [0.0, 1.7e-05, 0.01375745, 0.25064894285714273, 0.8176358810000001], "label": "4-8"}, "8-16": {"x": ["1B", "10B", "100B", "350B", "1T"], "y": [0.0, 0.0, 0.0, 6.479714285714286e-05, 0.05991048400000001], "label": "8-16"}, "16-32": {"x": ["1B", "10B", "100B", "350B", "1T"], "y": [0.0, 0.0, 0.0, 0.0, 8.152000000000001e-06], "label": "16-32"}}, "layout": {"title": {"text": "Sampling from 1000 identical buckets with 200B tokens each"}, "xaxis": {"title": {"text": "Sample size"}}, "yaxis": {"title": {"text": "Dataset fraction"}}, "barmode": "stack", "legend": {"title": {"text": "# duplicates", "font": {"size": 14, "weight": "bold"}}, "font": {"size": 14}, "bgcolor": "white 0.9", "orientation": "v", "xanchor": "left", "yanchor": "bottom", "x": 0.01, "y": 0}}}