hynky HF staff commited on
Commit
3905983
·
1 Parent(s): 0932e7b

add cluster

Browse files
data/clustering/data.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/clustering/info.csv ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,cluster_id,cluster_summaries,cluster_position_x,cluster_position_y
2
+ 0,-1,None,9.926462,4.7121987
3
+ 1,0,Philosophical/Spiritual Introspection,10.312462,1.2666532
4
+ 2,1,"Scholarships",8.167274,4.8995786
5
+ 3,2,Politics,8.81142,2.4859838
6
+ 4,3,Theology,9.615214,0.3783942
7
+ 5,4,Dating,4.985182,1.8439052
8
+ 6,5,Accommodation,11.457769,5.080919
9
+ 7,6,Football,6.6154537,-1.6859366
10
+ 8,7,Film Festival,6.9734483,1.4548192
11
+ 9,8,Culinary,13.426296,4.5412893
12
+ 10,9,Music,6.0653744,0.7536916
13
+ 11,10,Gambling,3.124241,3.2533677
14
+ 12,11,Baseball,7.133596,-2.4256644
15
+ 13,12,Technology,6.4929094,6.768577
16
+ 14,13,Website Policies,4.873843,5.771508
17
+ 15,14,Weddings,11.815845,3.7894728
18
+ 16,15,Gaming,5.529167,2.9530518
19
+ 17,16,Commodities/Services Provision,10.453564,5.8489122
20
+ 18,17,Crafts,13.287651,6.4237967
21
+ 19,18,Automobiles,9.9531145,8.840178
22
+ 20,19,Watches,13.893139,9.859185
23
+ 21,20,Dogs,12.595798,3.5351615
24
+ 22,21,Photography,10.7942295,3.5504062
25
+ 23,22,Legalities,8.942016,4.72733
26
+ 24,23,Consumer Electronics,7.078649,8.338984
27
+ 25,24,Insulation,10.520957,7.914946
28
+ 26,25,Cannabis,14.317424,3.2114828
29
+ 27,26,Footwear,15.052116,7.6956415
30
+ 28,27,Real Estate,9.536316,6.103533
31
+ 29,28,Relocation,10.205071,7.1883316
32
+ 30,29,Sports betting,3.2779586,2.443366
33
+ 31,30,Narratives,7.613535,1.8300554
34
+ 32,31,Dating,4.788838,2.1900373
35
+ 33,32,Apparel/Clothing,14.394226,7.3073387
36
+ 34,33,User Authentication,5.265638,6.4014487
37
+ 35,34,Academicwriting,6.9187264,3.4357684
38
+ 36,35,Sports,7.4969172,-2.086585
39
+ 37,36,Fashion/Lifestyle Products,13.821669,7.7150764
40
+ 38,37,Diverse events,9.437052,2.2438836
41
+ 39,38,Blockchain/Cryptocurrency,7.7586045,6.9439344
42
+ 40,39,Online Businesses/Marketing,6.522259,5.219268
43
+ 41,40,Healthcare,11.425277,2.3801014
44
+ 42,41,Home Decor,12.878046,7.2632184
45
+ 43,42,Biomedicine,12.789575,2.3376262
46
+ 44,43,Jewelry,14.259997,8.653363
47
+ 45,44,Addiction,11.561383,1.3774762
48
+ 46,45,Products,11.711758,8.423251
49
+ 47,46,Multi-purposefulness,11.080702,7.4574013
50
+ 48,47,"Mass transit",9.910158,5.4402313
51
+ 49,48,Ethernet,6.9763823,7.7909245
52
+ 50,49,Legal,9.516912,4.636553
53
+ 51,50,E-commerce,13.263438,8.6548195
54
+ 52,51,Audio,7.717162,8.903019
55
+ 53,52,Infrastructure,10.52904,5.369669
56
+ 54,53,Firearms,11.062812,9.268473
57
+ 55,54,Freight/Logistics,9.551044,7.0336204
58
+ 56,55,Products,12.073747,7.645973
59
+ 57,56,Vaccinations,11.9387045,2.7824683
60
+ 58,57,Artwork,11.019163,4.1677165
61
+ 59,58,Viticulture,14.223523,5.0761614
62
+ 60,59,WordPress,5.9597983,5.824579
63
+ 61,60,Cosmetics/Dermatology,15.093273,3.4669027
64
+ 62,61,Software,6.375921,6.4298844
65
+ 63,62,Dentistry,14.76626,1.1620314
66
+ 64,63,Pest Control,13.201735,3.6806118
67
+ 65,64,SEO,5.720493,5.238112
68
+ 66,65,Lottery,1.7142816,2.9782674
69
+ 67,66,Narratives,8.460977,1.0804662
70
+ 68,67,Waste Reduction & Recycling,10.634534,6.959523
71
+ 69,68,Communication,6.438943,5.9467845
72
+ 70,69,Orthopedics,13.005415,1.1908791
73
+ 71,70,Home Decor & Furniture,12.732457,7.876862
74
+ 72,71,Education,7.6568975,3.4944353
75
+ 73,72,Sports,7.295141,-0.7343214
76
+ 74,73,Social Media Advertising,6.133886,4.8547883
77
+ 75,74,Privacy,4.756733,6.3598356
78
+ 76,75,Website design,6.1168823,5.465095
79
+ 77,76,Roofing,11.389448,8.080609
80
+ 78,77,Nutrition/Supplements,13.631578,2.5334294
81
+ 79,78,Haircare/Hairstyling,15.544645,4.54254
82
+ 80,79,Cookies,4.341592,6.819268
83
+ 81,80,International Trade,8.993828,6.4757586
84
+ 82,81,Entrepreneurial Resources,9.435777,5.3340797
85
+ 83,82,Cricket,6.5171986,-1.245905
86
+ 84,83,Crafts,13.852216,7.049825
87
+ 85,84,Floristry,13.407425,5.8741536
88
+ 86,85,Genealogy,9.530803,1.6548243
89
+ 87,86,Mental Health,11.074349,1.6069281
90
+ 88,87,Volunteerism,10.145443,3.6734574
91
+ 89,88,Lighting,11.385381,8.93693
92
+ 90,89,Artificial Intelligence,6.5306387,6.2178063
93
+ 91,90,Business,7.471462,6.4142885
94
+ 92,91,E-commerce,13.638669,6.5098934
95
+ 93,92,Urbanization/Over-tourism,10.221115,6.100654
96
+ 94,93,Events,10.8449,3.9822264
97
+ 95,94,Pharmaceuticals/Biotechnology,12.318266,2.4331784
98
+ 96,95,Professional Wrestling,6.856304,-0.65598303
99
+ 97,96,Various,9.3211975,3.4894605
100
+ 98,97,Medicine,13.17882,2.1281319
101
+ 99,98,Community Engagement,9.848856,3.5187004
102
+ 100,99,Fitness,12.504849,0.9134393
103
+ 101,100,Bathroom Design & Toilet Engineering,11.779076,7.2920136
104
+ 102,101,Business Development,7.328447,5.659843
105
+ 103,102,Sports,7.6370654,-1.0701839
106
+ 104,103,Sexuality,13.817207,1.6510898
index.html CHANGED
@@ -5,7 +5,9 @@
5
  <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjs/12.4.2/math.min.js" charset="utf-8"></script>
6
  <script src="https://cdn.plot.ly/plotly-2.32.0.min.js" charset="utf-8"></script>
7
  <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.21/lodash.min.js" charset="utf-8"></script>
 
8
  <script type="module" src="src/plotting.js"></script>
 
9
  <link rel="stylesheet" href="style.css">
10
  <meta name="viewport" content="width=device-width, initial-scale=1">
11
  <meta charset="utf8">
@@ -159,9 +161,12 @@
159
  </script>
160
  </d-front-matter>
161
  <d-title>
162
- <figure class="l-page">
163
- <img src="banner.png" alt="FineWeb">
164
- </figure>
 
 
 
165
  </d-title>
166
  <d-byline></d-byline>
167
  <d-article>
 
5
  <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjs/12.4.2/math.min.js" charset="utf-8"></script>
6
  <script src="https://cdn.plot.ly/plotly-2.32.0.min.js" charset="utf-8"></script>
7
  <script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.21/lodash.min.js" charset="utf-8"></script>
8
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/PapaParse/5.4.1/papaparse.js" integrity="sha512-M0cjXJTonbWEdLI3HJIoJSQBb9980RWmOCk+tvWkhgFrAZqSSIg1+1Db/vDu7Qk9W3L90gBynve17PYvarjfQA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
9
  <script type="module" src="src/plotting.js"></script>
10
+ <script type="module" src="src/clusters.js"></script>
11
  <link rel="stylesheet" href="style.css">
12
  <meta name="viewport" content="width=device-width, initial-scale=1">
13
  <meta charset="utf8">
 
161
  </script>
162
  </d-front-matter>
163
  <d-title>
164
+ <div class="main-plot-container l-page">
165
+ <figure>
166
+ <img src="banner.png" alt="FineWeb">
167
+ </figure>
168
+ <div id="clusters-plot"></div>
169
+ </div>
170
  </d-title>
171
  <d-byline></d-byline>
172
  <d-article>
src/clusters.js ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // plotly Dark24
2
+ const COLORS = [
3
+
4
+ ['46', '145', '229'],
5
+ ['225', '95', '153'],
6
+ ['28', '167', '28'],
7
+ ['251', '13', '13'],
8
+ ['218', '22', '255'],
9
+ // ['34', '42', '42'], Black, which makes the text unreadable
10
+ ['182', '129', '0'],
11
+ ['117', '13', '134'],
12
+ ['235', '102', '59'],
13
+ ['81', '28', '251'],
14
+ ['0', '160', '139'],
15
+ ['251', '0', '209'],
16
+ ['252', '0', '128'],
17
+ ['178', '130', '141'],
18
+ ['108', '124', '50'],
19
+ ['119', '138', '174'],
20
+ ['134', '42', '22'],
21
+ ['167', '119', '241'],
22
+ ['98', '0', '66'],
23
+ ['22', '22', '167'],
24
+ ['218', '96', '202'],
25
+ ['108', '69', '22'],
26
+ // ['13', '42', '99'], Black
27
+ ['175', '0', '56']
28
+ ]
29
+
30
+ const getLabelHoverFormat = (row) => {
31
+ return `<b>Text</b>: ${row.text}<br><b>Edu label</b>: ${row.eduScore}`
32
+ }
33
+
34
+ // Number of annotations to display
35
+ const K = 15
36
+
37
+
38
+ function createLabelOrderMapping(labels) {
39
+ const labelCounts = labels.reduce((acc, label) => {
40
+ acc[label] = (acc[label] || 0) + 1;
41
+ return acc;
42
+ }, {});
43
+
44
+ const sortedLabels = Object.entries(labelCounts).sort((a, b) => b[1] - a[1]).map(entry => entry[0]);
45
+
46
+ const labelOrder = {};
47
+ sortedLabels.forEach((label, index) => {
48
+ labelOrder[label] = index;
49
+ });
50
+ return labelOrder;
51
+ }
52
+
53
+
54
+
55
+
56
+ const parseAnnotations = async (file) => {
57
+ return (await readCSV(file)).filter((cluster_summary) => {
58
+ return parseInt(cluster_summary.cluster_id) != -1
59
+ }).map((cluster_summary) => {
60
+ return {
61
+ x: parseFloat(cluster_summary.cluster_position_x),
62
+ y: parseFloat(cluster_summary.cluster_position_y),
63
+ label: parseInt(cluster_summary.cluster_id),
64
+ text: cluster_summary.cluster_summaries,
65
+ }
66
+ })
67
+ }
68
+
69
+ const addStylingToAnnotations = (annotations) => {
70
+ return annotations.map((annotation, i) => {
71
+ return {
72
+ ...annotation,
73
+ showarrow: false,
74
+ font: {
75
+ size: 14,
76
+ color: 'black',
77
+ weight: 'bold'
78
+ },
79
+ bgcolor: getColor(annotation.label, 0.9),
80
+ borderpad: 2, // Add padding around the text
81
+ }
82
+ })
83
+ }
84
+
85
+ const getRelevantAnnotations = (annotations, x0, x1, y0, y1, k=K) => {
86
+ const relevant_annotations = annotations.filter((annotation) => {
87
+ return annotation.x >= x0 && annotation.x <= x1 && annotation.y >= y0 && annotation.y <= y1
88
+ })
89
+ return relevant_annotations.sort((a, b) => a.ord - b.ord).slice(0, k);
90
+ }
91
+
92
+
93
+ const getMinMaxTracesArea = (traces) => {
94
+ const x0 = Math.min(...traces.map(trace => trace.x));
95
+ const x1 = Math.max(...traces.map(trace => trace.x));
96
+ const y0 = Math.min(...traces.map(trace => trace.y));
97
+ const y1 = Math.max(...traces.map(trace => trace.y));
98
+ return {x0, x1, y0, y1};
99
+ }
100
+
101
+ const readData = async () => {
102
+ return (await readCSV('data/clustering/data.csv')).map(row => ({
103
+ x: parseFloat(row.X),
104
+ y: parseFloat(row.Y),
105
+ eduScore: parseFloat(row.edu_labels),
106
+ label: parseInt(row.cluster_labels),
107
+ text: row.content_display,
108
+ }));
109
+ }
110
+
111
+ async function plotClusters() {
112
+ const parent = document.getElementById('clusters-plot');
113
+ const data = await readData();
114
+ const traces = [{
115
+ type: 'scatter',
116
+ mode: 'markers',
117
+ x: data.map(row => row.x),
118
+ y: data.map(row => row.y),
119
+ marker: {
120
+ color: data.map(row => getColor(row.label, 1.0)),
121
+ size: 5,
122
+ opacity: 8
123
+ },
124
+ hoverinfo: 'text',
125
+ hovertext: data.map(row => getLabelHoverFormat(row)),
126
+ hoverlabel: {
127
+ bgcolor: 'white'
128
+ },
129
+ }];
130
+ const labelOrder = createLabelOrderMapping(data.map(row => row.label));
131
+ const annotations = (await parseAnnotations('data/clustering/info.csv')).map(
132
+ (annot) => {
133
+ return {
134
+ ...annot,
135
+ ord: labelOrder[annot.label]
136
+ }
137
+ }
138
+ );
139
+
140
+ const {x0, x1, y0, y1} = getMinMaxTracesArea(data);
141
+ const layout = {
142
+ height: 550,
143
+ width: parent.clientWidth,
144
+ xaxis: {
145
+ showticklabels: false,
146
+ showgrid: false,
147
+ zeroline: false,
148
+ title: {
149
+ text: "Fineweb dataset (clustered using TODO and labeled using TODO),<br> zoom in to see more",
150
+ font: {
151
+ size: 16,
152
+ style: 'italic'
153
+ },
154
+ // italic
155
+ },
156
+ },
157
+ yaxis: {
158
+ showticklabels: false,
159
+ showgrid: false,
160
+ zeroline: false,
161
+ },
162
+ annotations: addStylingToAnnotations(getRelevantAnnotations(annotations, x0, x1, y0, y1)),
163
+ font: {
164
+ family: "apple-system, Arial, sans-serif",
165
+ },
166
+ margin: {
167
+ t: 0,
168
+ b: 30,
169
+ },
170
+ };
171
+
172
+ Plotly.newPlot(parent, traces, layout);
173
+
174
+ parent.on('plotly_relayout', (eventdata) => {
175
+ // First option zoomed in
176
+ if (eventdata["xaxis.range[0]"]) {
177
+ const [x0, x1] = [eventdata['xaxis.range[0]'], eventdata['xaxis.range[1]']];
178
+ const [y0, y1] = [eventdata['yaxis.range[0]'], eventdata['yaxis.range[1]']];
179
+ // Idk maybe we can even recompute the ordering, but I think it's fine to use the global one
180
+ const relevant_annotations = getRelevantAnnotations(annotations, x0, x1, y0, y1);
181
+ Plotly.relayout(parent, {...layout, annotations: addStylingToAnnotations(relevant_annotations)});
182
+ }
183
+ // Zoom reset
184
+ else if (eventdata["xaxis.autorange"]){
185
+ const {x0, x1, y0, y1} = getMinMaxTracesArea(data);
186
+ const relevant_annotations = getRelevantAnnotations(annotations, x0, x1, y0, y1);
187
+ Plotly.relayout(parent, {...layout, annotations: addStylingToAnnotations(relevant_annotations)});
188
+ }
189
+ // Otherwise it's just the relayout itself
190
+ });
191
+
192
+ window.addEventListener("resize", () => {
193
+ // If the window size is smaller than 768, we don't care as it's not shown
194
+ if (window.innerWidth < 768) {
195
+ return;
196
+ }
197
+ Plotly.relayout(parent, {
198
+ width: parent.offsetWidth,
199
+ });
200
+ });
201
+ }
202
+
203
+ document.addEventListener("DOMContentLoaded", () => {
204
+ plotClusters();
205
+ });
206
+
207
+
208
+ const readCSV = async (file) => {
209
+ const data = await fetch(file)
210
+ const text = await data.text()
211
+ const csv = Papa.parse(text, {header: true});
212
+ return csv.data;
213
+ }
214
+
215
+
216
+
217
+ const getColor = (i, opacity) => {
218
+ if (i < 0) {
219
+ i = i * -1
220
+ }
221
+ console.log(COLORS[i % COLORS.length])
222
+ return `rgba(${COLORS[i % COLORS.length].join(',')}, ${opacity})`
223
+ }
src/plotting.js CHANGED
@@ -1,6 +1,6 @@
1
  const TASK_ID_TO_NAME = {
2
  // Ablations
3
- "agg_score": "Aggregate Score",
4
  "commonsense_qa/acc_norm": "Commonsense QA Norm",
5
  "hellaswag/acc_norm": "HellaSwag",
6
  "openbookqa/acc_norm": "OpenBook QA Norm",
@@ -11,27 +11,26 @@ const TASK_ID_TO_NAME = {
11
  "mmlu/acc_norm": "MMLU",
12
 
13
  // Stats
14
- "ccnet": "CCNet",
15
  };
16
 
17
  const DATASET_ID_TO_NAME = {
18
- "pii_removed": "Fineweb",
19
- "allenai_c4_en": "C4",
20
  "tiiuae_falcon-refinedweb_data": "RefinedWeb",
21
  "red-pajama-v2_jsonl-deduplicated-extract": "RedPajamaV2",
22
  "dolma-sample": "Dolma1.6",
23
- "dedup_minhash_independent_output": "Individual Dedup MinHash",
24
  };
25
 
26
  const DEFAULT_SETTINGS = {
27
- "slider": {
28
- "max": 30,
29
- "min": 0,
30
- "default": 0
31
  },
32
- "defaultMetric": "agg_score",
33
- }
34
-
35
 
36
  const DEFAULT_LAYOUT = {
37
  title: {
@@ -99,44 +98,43 @@ const getAutoRange = (traces) => {
99
  let minX = Math.min(...traces.flatMap((trace) => trace.x));
100
  let maxX = Math.max(...traces.flatMap((trace) => trace.x));
101
  return [minX * 0.95, maxX * 1.05];
102
- }
103
 
104
  const init_ablation_plot = function () {
105
  const plotElements = document.querySelectorAll('[id^="plot-"]');
106
-
107
-
108
-
109
- plotElements.forEach(async (plotElement) => {
110
- const plotName = plotElement.id.replace('plot-', '');
111
- const indexData = await fetch(`data/plots/${plotName}/index.json`).then(
112
- (response) => response.json()
113
- );
114
- const settings = _.merge(DEFAULT_SETTINGS, indexData.settings);
115
- const indexMapping = indexData.files;
116
- const { dropdown, slider, plot } = createAblationPlottingElements(
117
- plotElement,
118
- indexMapping,
119
- settings
120
- );
121
- plot.id = `graph-${plotName}`;
122
- dropdown.addEventListener("change", () => updatePlot(dropdown, slider));
123
- let timeoutId;
124
- // Debounce the slider
125
- if (slider !== undefined) {
126
- slider.addEventListener("input", () => {
127
- clearTimeout(timeoutId);
128
  timeoutId = setTimeout(() => {
129
- updatePlot(dropdown, slider);
130
- }, 500);
131
- });
132
- }
133
  // Shared plot
134
- Plotly.newPlot(plot, DEFAULT_LAYOUT);
135
 
136
  async function updatePlot(dropdown, slider) {
137
  const metricName = dropdown.value;
138
  const sliderValue = parseInt(slider?.value ?? 0);
139
- const metricData = await fetch(`data/plots/${plotName}/${indexMapping[metricName]["file"]}`).then((response) => response.json());
 
 
140
  const traces = metricData?.traces?.[metricName] ?? [];
141
  for (const key in metricData?.data ?? []) {
142
  const traceData = metricData.data[key];
@@ -161,7 +159,11 @@ const init_ablation_plot = function () {
161
  {
162
  width: width,
163
  yaxis: { title: { text: TASK_ID_TO_NAME[metricName] } },
164
- xaxis: { range: settings.autoSetXRange ? getAutoRange(traces) : undefined },
 
 
 
 
165
  },
166
  metricData.layout
167
  );
@@ -197,7 +199,9 @@ const createAblationPlottingElements = (
197
  plotElement.appendChild(plot);
198
  plotElement.appendChild(controls);
199
 
200
- const metricOptions = Object.keys(indexMapping).filter(metric => metric in TASK_ID_TO_NAME);
 
 
201
  // Dropdown
202
  const dropdownLabel = document.createElement("label");
203
  dropdownLabel.textContent = "Metric:";
@@ -216,7 +220,7 @@ const createAblationPlottingElements = (
216
  dropdownContainer.appendChild(dropdown);
217
  controls.appendChild(dropdownContainer);
218
 
219
- let slider = undefined
220
  if (settings.slider !== null) {
221
  const sliderLabel = document.createElement("label");
222
  sliderLabel.textContent = "Rolling window:";
 
1
  const TASK_ID_TO_NAME = {
2
  // Ablations
3
+ agg_score: "Aggregate Score",
4
  "commonsense_qa/acc_norm": "Commonsense QA Norm",
5
  "hellaswag/acc_norm": "HellaSwag",
6
  "openbookqa/acc_norm": "OpenBook QA Norm",
 
11
  "mmlu/acc_norm": "MMLU",
12
 
13
  // Stats
14
+ ccnet: "CCNet",
15
  };
16
 
17
  const DATASET_ID_TO_NAME = {
18
+ pii_removed: "Fineweb",
19
+ allenai_c4_en: "C4",
20
  "tiiuae_falcon-refinedweb_data": "RefinedWeb",
21
  "red-pajama-v2_jsonl-deduplicated-extract": "RedPajamaV2",
22
  "dolma-sample": "Dolma1.6",
23
+ dedup_minhash_independent_output: "Individual Dedup MinHash",
24
  };
25
 
26
  const DEFAULT_SETTINGS = {
27
+ slider: {
28
+ max: 30,
29
+ min: 0,
30
+ default: 0,
31
  },
32
+ defaultMetric: "agg_score",
33
+ };
 
34
 
35
  const DEFAULT_LAYOUT = {
36
  title: {
 
98
  let minX = Math.min(...traces.flatMap((trace) => trace.x));
99
  let maxX = Math.max(...traces.flatMap((trace) => trace.x));
100
  return [minX * 0.95, maxX * 1.05];
101
+ };
102
 
103
  const init_ablation_plot = function () {
104
  const plotElements = document.querySelectorAll('[id^="plot-"]');
105
+ plotElements.forEach(async (plotElement) => {
106
+ const plotName = plotElement.id.replace("plot-", "");
107
+ const indexData = await fetch(`data/plots/${plotName}/index.json`).then(
108
+ (response) => response.json()
109
+ );
110
+ const settings = _.merge({}, DEFAULT_SETTINGS, indexData.settings);
111
+ const indexMapping = indexData.files;
112
+ const { dropdown, slider, plot } = createAblationPlottingElements(
113
+ plotElement,
114
+ indexMapping,
115
+ settings
116
+ );
117
+ plot.id = `graph-${plotName}`;
118
+ dropdown.addEventListener("change", () => updatePlot(dropdown, slider));
119
+ let timeoutId;
120
+ // Debounce the slider
121
+ if (slider !== undefined) {
122
+ slider.addEventListener("input", () => {
123
+ clearTimeout(timeoutId);
 
 
 
124
  timeoutId = setTimeout(() => {
125
+ updatePlot(dropdown, slider);
126
+ }, 500);
127
+ });
128
+ }
129
  // Shared plot
130
+ Plotly.newPlot(plot, []);
131
 
132
  async function updatePlot(dropdown, slider) {
133
  const metricName = dropdown.value;
134
  const sliderValue = parseInt(slider?.value ?? 0);
135
+ const metricData = await fetch(
136
+ `data/plots/${plotName}/${indexMapping[metricName]["file"]}`
137
+ ).then((response) => response.json());
138
  const traces = metricData?.traces?.[metricName] ?? [];
139
  for (const key in metricData?.data ?? []) {
140
  const traceData = metricData.data[key];
 
159
  {
160
  width: width,
161
  yaxis: { title: { text: TASK_ID_TO_NAME[metricName] } },
162
+ xaxis: {
163
+ range: settings.autoSetXRange
164
+ ? getAutoRange(traces)
165
+ : undefined,
166
+ },
167
  },
168
  metricData.layout
169
  );
 
199
  plotElement.appendChild(plot);
200
  plotElement.appendChild(controls);
201
 
202
+ const metricOptions = Object.keys(indexMapping).filter(
203
+ (metric) => metric in TASK_ID_TO_NAME
204
+ );
205
  // Dropdown
206
  const dropdownLabel = document.createElement("label");
207
  dropdownLabel.textContent = "Metric:";
 
220
  dropdownContainer.appendChild(dropdown);
221
  controls.appendChild(dropdownContainer);
222
 
223
+ let slider = undefined;
224
  if (settings.slider !== null) {
225
  const sliderLabel = document.createElement("label");
226
  sliderLabel.textContent = "Rolling window:";