{ "cells": [ { "cell_type": "code", "execution_count": 6, "id": "138889b92720ce2e", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T15:30:52.864251Z", "start_time": "2024-05-13T15:30:52.316016Z" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>runname</th>\n", " <th>seed</th>\n", " <th>steps</th>\n", " <th>agg_score</th>\n", " <th>commonsense_qa/acc</th>\n", " <th>commonsense_qa/acc_norm</th>\n", " <th>hellaswag/acc</th>\n", " <th>hellaswag/acc_norm</th>\n", " <th>openbookqa/acc</th>\n", " <th>openbookqa/acc_norm</th>\n", " <th>...</th>\n", " <th>siqa/acc</th>\n", " <th>siqa/acc_norm</th>\n", " <th>winogrande/acc</th>\n", " <th>winogrande/acc_norm</th>\n", " <th>sciq/acc</th>\n", " <th>sciq/acc_norm</th>\n", " <th>arc/acc</th>\n", " <th>arc/acc_norm</th>\n", " <th>mmlu/acc</th>\n", " <th>mmlu/acc_norm</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>filtering-baseline-2019-18-40gt</td>\n", " <td>5</td>\n", " <td>0</td>\n", " <td>0.330953</td>\n", " <td>0.186</td>\n", " <td>0.233</td>\n", " <td>0.272</td>\n", " <td>0.258</td>\n", " <td>0.166</td>\n", " <td>0.286</td>\n", " <td>...</td>\n", " <td>0.367</td>\n", " <td>0.362</td>\n", " <td>0.516</td>\n", " <td>0.497</td>\n", " <td>0.210</td>\n", " <td>0.202</td>\n", " <td>0.2190</td>\n", " <td>0.2515</td>\n", " <td>0.230285</td>\n", " <td>0.250127</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>filtering-baseline-2019-18-40gt</td>\n", " <td>5</td>\n", " <td>1000</td>\n", " <td>0.357474</td>\n", " <td>0.239</td>\n", " <td>0.271</td>\n", " <td>0.297</td>\n", " <td>0.287</td>\n", " <td>0.146</td>\n", " <td>0.260</td>\n", " <td>...</td>\n", " <td>0.365</td>\n", " <td>0.396</td>\n", " <td>0.503</td>\n", " <td>0.486</td>\n", " <td>0.568</td>\n", " <td>0.502</td>\n", " <td>0.2665</td>\n", " <td>0.2855</td>\n", " <td>0.242526</td>\n", " <td>0.253291</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>filtering-baseline-2019-18-40gt</td>\n", " <td>5</td>\n", " <td>2000</td>\n", " <td>0.377436</td>\n", " <td>0.280</td>\n", " <td>0.284</td>\n", " <td>0.321</td>\n", " <td>0.332</td>\n", " <td>0.134</td>\n", " <td>0.268</td>\n", " <td>...</td>\n", " <td>0.368</td>\n", " <td>0.399</td>\n", " <td>0.519</td>\n", " <td>0.502</td>\n", " <td>0.686</td>\n", " <td>0.590</td>\n", " <td>0.3030</td>\n", " <td>0.3215</td>\n", " <td>0.245745</td>\n", " <td>0.260988</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>filtering-baseline-2019-18-40gt</td>\n", " <td>5</td>\n", " <td>3000</td>\n", " <td>0.387994</td>\n", " <td>0.277</td>\n", " <td>0.291</td>\n", " <td>0.339</td>\n", " <td>0.359</td>\n", " <td>0.132</td>\n", " <td>0.280</td>\n", " <td>...</td>\n", " <td>0.394</td>\n", " <td>0.404</td>\n", " <td>0.520</td>\n", " <td>0.503</td>\n", " <td>0.721</td>\n", " <td>0.622</td>\n", " <td>0.3210</td>\n", " <td>0.3385</td>\n", " <td>0.250427</td>\n", " <td>0.264451</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>filtering-baseline-2019-18-40gt</td>\n", " <td>5</td>\n", " <td>4000</td>\n", " <td>0.396110</td>\n", " <td>0.299</td>\n", " <td>0.315</td>\n", " <td>0.340</td>\n", " <td>0.366</td>\n", " <td>0.158</td>\n", " <td>0.286</td>\n", " <td>...</td>\n", " <td>0.376</td>\n", " <td>0.399</td>\n", " <td>0.515</td>\n", " <td>0.500</td>\n", " <td>0.739</td>\n", " <td>0.620</td>\n", " <td>0.3320</td>\n", " <td>0.3445</td>\n", " <td>0.256134</td>\n", " <td>0.270382</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>115</th>\n", " <td>wet-extraction-2019-18</td>\n", " <td>6</td>\n", " <td>10000</td>\n", " <td>0.408977</td>\n", " <td>0.326</td>\n", " <td>0.312</td>\n", " <td>0.362</td>\n", " <td>0.412</td>\n", " <td>0.166</td>\n", " <td>0.312</td>\n", " <td>...</td>\n", " <td>0.379</td>\n", " <td>0.396</td>\n", " <td>0.525</td>\n", " <td>0.517</td>\n", " <td>0.767</td>\n", " <td>0.654</td>\n", " <td>0.3480</td>\n", " <td>0.3560</td>\n", " <td>0.262357</td>\n", " <td>0.276813</td>\n", " </tr>\n", " <tr>\n", " <th>116</th>\n", " <td>wet-extraction-2019-18</td>\n", " <td>6</td>\n", " <td>11000</td>\n", " <td>0.408771</td>\n", " <td>0.325</td>\n", " <td>0.315</td>\n", " <td>0.363</td>\n", " <td>0.409</td>\n", " <td>0.162</td>\n", " <td>0.312</td>\n", " <td>...</td>\n", " <td>0.388</td>\n", " <td>0.399</td>\n", " <td>0.529</td>\n", " <td>0.520</td>\n", " <td>0.777</td>\n", " <td>0.664</td>\n", " <td>0.3465</td>\n", " <td>0.3555</td>\n", " <td>0.261599</td>\n", " <td>0.276664</td>\n", " </tr>\n", " <tr>\n", " <th>117</th>\n", " <td>wet-extraction-2019-18</td>\n", " <td>6</td>\n", " <td>12000</td>\n", " <td>0.408239</td>\n", " <td>0.329</td>\n", " <td>0.308</td>\n", " <td>0.364</td>\n", " <td>0.416</td>\n", " <td>0.178</td>\n", " <td>0.308</td>\n", " <td>...</td>\n", " <td>0.382</td>\n", " <td>0.398</td>\n", " <td>0.521</td>\n", " <td>0.510</td>\n", " <td>0.770</td>\n", " <td>0.656</td>\n", " <td>0.3555</td>\n", " <td>0.3595</td>\n", " <td>0.260928</td>\n", " <td>0.278411</td>\n", " </tr>\n", " <tr>\n", " <th>118</th>\n", " <td>wet-extraction-2019-18</td>\n", " <td>6</td>\n", " <td>13000</td>\n", " <td>0.413263</td>\n", " <td>0.325</td>\n", " <td>0.308</td>\n", " <td>0.367</td>\n", " <td>0.425</td>\n", " <td>0.174</td>\n", " <td>0.312</td>\n", " <td>...</td>\n", " <td>0.387</td>\n", " <td>0.411</td>\n", " <td>0.523</td>\n", " <td>0.524</td>\n", " <td>0.774</td>\n", " <td>0.662</td>\n", " <td>0.3570</td>\n", " <td>0.3600</td>\n", " <td>0.263067</td>\n", " <td>0.281104</td>\n", " </tr>\n", " <tr>\n", " <th>119</th>\n", " <td>wet-extraction-2019-18</td>\n", " <td>6</td>\n", " <td>13500</td>\n", " <td>0.410754</td>\n", " <td>0.335</td>\n", " <td>0.310</td>\n", " <td>0.366</td>\n", " <td>0.424</td>\n", " <td>0.164</td>\n", " <td>0.300</td>\n", " <td>...</td>\n", " <td>0.392</td>\n", " <td>0.407</td>\n", " <td>0.515</td>\n", " <td>0.519</td>\n", " <td>0.779</td>\n", " <td>0.668</td>\n", " <td>0.3590</td>\n", " <td>0.3565</td>\n", " <td>0.261681</td>\n", " <td>0.279534</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>120 rows × 22 columns</p>\n", "</div>" ], "text/plain": [ " runname seed steps agg_score \\\n", "0 filtering-baseline-2019-18-40gt 5 0 0.330953 \n", "1 filtering-baseline-2019-18-40gt 5 1000 0.357474 \n", "2 filtering-baseline-2019-18-40gt 5 2000 0.377436 \n", "3 filtering-baseline-2019-18-40gt 5 3000 0.387994 \n", "4 filtering-baseline-2019-18-40gt 5 4000 0.396110 \n", ".. ... ... ... ... \n", "115 wet-extraction-2019-18 6 10000 0.408977 \n", "116 wet-extraction-2019-18 6 11000 0.408771 \n", "117 wet-extraction-2019-18 6 12000 0.408239 \n", "118 wet-extraction-2019-18 6 13000 0.413263 \n", "119 wet-extraction-2019-18 6 13500 0.410754 \n", "\n", " commonsense_qa/acc commonsense_qa/acc_norm hellaswag/acc \\\n", "0 0.186 0.233 0.272 \n", "1 0.239 0.271 0.297 \n", "2 0.280 0.284 0.321 \n", "3 0.277 0.291 0.339 \n", "4 0.299 0.315 0.340 \n", ".. ... ... ... \n", "115 0.326 0.312 0.362 \n", "116 0.325 0.315 0.363 \n", "117 0.329 0.308 0.364 \n", "118 0.325 0.308 0.367 \n", "119 0.335 0.310 0.366 \n", "\n", " hellaswag/acc_norm openbookqa/acc openbookqa/acc_norm ... siqa/acc \\\n", "0 0.258 0.166 0.286 ... 0.367 \n", "1 0.287 0.146 0.260 ... 0.365 \n", "2 0.332 0.134 0.268 ... 0.368 \n", "3 0.359 0.132 0.280 ... 0.394 \n", "4 0.366 0.158 0.286 ... 0.376 \n", ".. ... ... ... ... ... \n", "115 0.412 0.166 0.312 ... 0.379 \n", "116 0.409 0.162 0.312 ... 0.388 \n", "117 0.416 0.178 0.308 ... 0.382 \n", "118 0.425 0.174 0.312 ... 0.387 \n", "119 0.424 0.164 0.300 ... 0.392 \n", "\n", " siqa/acc_norm winogrande/acc winogrande/acc_norm sciq/acc \\\n", "0 0.362 0.516 0.497 0.210 \n", "1 0.396 0.503 0.486 0.568 \n", "2 0.399 0.519 0.502 0.686 \n", "3 0.404 0.520 0.503 0.721 \n", "4 0.399 0.515 0.500 0.739 \n", ".. ... ... ... ... \n", "115 0.396 0.525 0.517 0.767 \n", "116 0.399 0.529 0.520 0.777 \n", "117 0.398 0.521 0.510 0.770 \n", "118 0.411 0.523 0.524 0.774 \n", "119 0.407 0.515 0.519 0.779 \n", "\n", " sciq/acc_norm arc/acc arc/acc_norm mmlu/acc mmlu/acc_norm \n", "0 0.202 0.2190 0.2515 0.230285 0.250127 \n", "1 0.502 0.2665 0.2855 0.242526 0.253291 \n", "2 0.590 0.3030 0.3215 0.245745 0.260988 \n", "3 0.622 0.3210 0.3385 0.250427 0.264451 \n", "4 0.620 0.3320 0.3445 0.256134 0.270382 \n", ".. ... ... ... ... ... \n", "115 0.654 0.3480 0.3560 0.262357 0.276813 \n", "116 0.664 0.3465 0.3555 0.261599 0.276664 \n", "117 0.656 0.3555 0.3595 0.260928 0.278411 \n", "118 0.662 0.3570 0.3600 0.263067 0.281104 \n", "119 0.668 0.3590 0.3565 0.261681 0.279534 \n", "\n", "[120 rows x 22 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from matplotlib.figure import Figure\n", "\n", "df = pd.read_csv(\"../src_data/wet_comparison.csv\")\n", "df" ] }, { "cell_type": "code", "execution_count": 7, "id": "b610f43caefdf01", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T15:30:52.866635Z", "start_time": "2024-05-13T15:30:52.865068Z" }, "collapsed": false }, "outputs": [], "source": [ "runs_mapping = {\n", " \"wet-extraction-2019-18\": \"WET data\",\n", " \"ind_minhash-CC-MAIN-2019-18\": \"Extracted from WARC\",\n", "}" ] }, { "cell_type": "code", "execution_count": 9, "id": "initial_id", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T15:30:53.034617Z", "start_time": "2024-05-13T15:30:52.867342Z" }, "collapsed": true }, "outputs": [], "source": [ "import json\n", "import os\n", "from matplotlib import pyplot as plt\n", "metrics = ['agg_score', 'commonsense_qa/acc_norm', 'hellaswag/acc_norm', 'openbookqa/acc_norm', 'piqa/acc_norm',\n", " 'siqa/acc_norm', 'winogrande/acc_norm', 'arc/acc_norm', 'mmlu/acc_norm']\n", "\n", "def normalize_runname(runname):\n", " return runname.replace(\"/\", \"_\")\n", "\n", "grouped = (\n", " df.groupby([\"runname\", \"steps\"])\n", " .agg(\n", " {\n", " key: \"mean\" for key in metrics\n", " }\n", " )\n", " .reset_index()\n", ")\n", "\n", "file_id=\"../assets/data/plots/wet_comparison\"\n", "files = {}\n", "for metric in metrics:\n", " datas = {}\n", " for name, group in grouped.groupby(\"runname\"):\n", " if name not in runs_mapping:\n", " continue\n", " group = group[[\"steps\", metric]].sort_values(by=\"steps\")\n", " group = group.set_index(\"steps\")\n", " rolling_avg = group\n", " # rolling_avg = group.rolling(window=5).mean()\n", " datas[name] = {\n", " \"x\": (rolling_avg.index * 2048 * 1024 * 1e-9).tolist(),\n", " \"y\": rolling_avg[metric].tolist(),\n", " \"label\": runs_mapping[name],\n", " }\n", " # Sort the datata based on the steps\n", " datas = {k: v for k, v in sorted(datas.items(), key=lambda x: -x[1][\"y\"][-1])}\n", " # Create a folder\n", " os.makedirs(f\"{file_id}\", exist_ok=True)\n", " with open(f\"{file_id}/{normalize_runname(metric)}.json\", \"w\") as f:\n", " json.dump({\n", " \"data\": datas,\n", " \"layout\": {\n", " \"title\": {\n", " \"text\": \"WET data is worse than data extracted from WARC\"\n", " },\n", " }\n", " }, f)\n", " files[metric] = {\"file\": f\"{normalize_runname(metric)}.json\"}\n", "# Create index\n", "with open(f\"{file_id}/index.json\", \"w\") as f:\n", " json.dump({\n", " \"files\": files,\n", " \"settings\": {\n", " \"defaultMetric\": \"agg_score\",\n", " \"slider\":{\"min\":0,\"max\":10,\"default\":0}\n", " }\n", " }, f)\n", " " ] }, { "cell_type": "code", "execution_count": 3, "id": "af28ebbd054cdc33", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T15:30:53.036912Z", "start_time": "2024-05-13T15:30:53.035519Z" }, "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }