{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "138889b92720ce2e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-13T15:30:52.864251Z",
     "start_time": "2024-05-13T15:30:52.316016Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>runname</th>\n",
       "      <th>seed</th>\n",
       "      <th>steps</th>\n",
       "      <th>agg_score</th>\n",
       "      <th>commonsense_qa/acc</th>\n",
       "      <th>commonsense_qa/acc_norm</th>\n",
       "      <th>hellaswag/acc</th>\n",
       "      <th>hellaswag/acc_norm</th>\n",
       "      <th>openbookqa/acc</th>\n",
       "      <th>openbookqa/acc_norm</th>\n",
       "      <th>...</th>\n",
       "      <th>siqa/acc</th>\n",
       "      <th>siqa/acc_norm</th>\n",
       "      <th>winogrande/acc</th>\n",
       "      <th>winogrande/acc_norm</th>\n",
       "      <th>sciq/acc</th>\n",
       "      <th>sciq/acc_norm</th>\n",
       "      <th>arc/acc</th>\n",
       "      <th>arc/acc_norm</th>\n",
       "      <th>mmlu/acc</th>\n",
       "      <th>mmlu/acc_norm</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>filtering-baseline-2019-18-40gt</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0.330953</td>\n",
       "      <td>0.186</td>\n",
       "      <td>0.233</td>\n",
       "      <td>0.272</td>\n",
       "      <td>0.258</td>\n",
       "      <td>0.166</td>\n",
       "      <td>0.286</td>\n",
       "      <td>...</td>\n",
       "      <td>0.367</td>\n",
       "      <td>0.362</td>\n",
       "      <td>0.516</td>\n",
       "      <td>0.497</td>\n",
       "      <td>0.210</td>\n",
       "      <td>0.202</td>\n",
       "      <td>0.2190</td>\n",
       "      <td>0.2515</td>\n",
       "      <td>0.230285</td>\n",
       "      <td>0.250127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>filtering-baseline-2019-18-40gt</td>\n",
       "      <td>5</td>\n",
       "      <td>1000</td>\n",
       "      <td>0.357474</td>\n",
       "      <td>0.239</td>\n",
       "      <td>0.271</td>\n",
       "      <td>0.297</td>\n",
       "      <td>0.287</td>\n",
       "      <td>0.146</td>\n",
       "      <td>0.260</td>\n",
       "      <td>...</td>\n",
       "      <td>0.365</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.503</td>\n",
       "      <td>0.486</td>\n",
       "      <td>0.568</td>\n",
       "      <td>0.502</td>\n",
       "      <td>0.2665</td>\n",
       "      <td>0.2855</td>\n",
       "      <td>0.242526</td>\n",
       "      <td>0.253291</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>filtering-baseline-2019-18-40gt</td>\n",
       "      <td>5</td>\n",
       "      <td>2000</td>\n",
       "      <td>0.377436</td>\n",
       "      <td>0.280</td>\n",
       "      <td>0.284</td>\n",
       "      <td>0.321</td>\n",
       "      <td>0.332</td>\n",
       "      <td>0.134</td>\n",
       "      <td>0.268</td>\n",
       "      <td>...</td>\n",
       "      <td>0.368</td>\n",
       "      <td>0.399</td>\n",
       "      <td>0.519</td>\n",
       "      <td>0.502</td>\n",
       "      <td>0.686</td>\n",
       "      <td>0.590</td>\n",
       "      <td>0.3030</td>\n",
       "      <td>0.3215</td>\n",
       "      <td>0.245745</td>\n",
       "      <td>0.260988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>filtering-baseline-2019-18-40gt</td>\n",
       "      <td>5</td>\n",
       "      <td>3000</td>\n",
       "      <td>0.387994</td>\n",
       "      <td>0.277</td>\n",
       "      <td>0.291</td>\n",
       "      <td>0.339</td>\n",
       "      <td>0.359</td>\n",
       "      <td>0.132</td>\n",
       "      <td>0.280</td>\n",
       "      <td>...</td>\n",
       "      <td>0.394</td>\n",
       "      <td>0.404</td>\n",
       "      <td>0.520</td>\n",
       "      <td>0.503</td>\n",
       "      <td>0.721</td>\n",
       "      <td>0.622</td>\n",
       "      <td>0.3210</td>\n",
       "      <td>0.3385</td>\n",
       "      <td>0.250427</td>\n",
       "      <td>0.264451</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>filtering-baseline-2019-18-40gt</td>\n",
       "      <td>5</td>\n",
       "      <td>4000</td>\n",
       "      <td>0.396110</td>\n",
       "      <td>0.299</td>\n",
       "      <td>0.315</td>\n",
       "      <td>0.340</td>\n",
       "      <td>0.366</td>\n",
       "      <td>0.158</td>\n",
       "      <td>0.286</td>\n",
       "      <td>...</td>\n",
       "      <td>0.376</td>\n",
       "      <td>0.399</td>\n",
       "      <td>0.515</td>\n",
       "      <td>0.500</td>\n",
       "      <td>0.739</td>\n",
       "      <td>0.620</td>\n",
       "      <td>0.3320</td>\n",
       "      <td>0.3445</td>\n",
       "      <td>0.256134</td>\n",
       "      <td>0.270382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115</th>\n",
       "      <td>wet-extraction-2019-18</td>\n",
       "      <td>6</td>\n",
       "      <td>10000</td>\n",
       "      <td>0.408977</td>\n",
       "      <td>0.326</td>\n",
       "      <td>0.312</td>\n",
       "      <td>0.362</td>\n",
       "      <td>0.412</td>\n",
       "      <td>0.166</td>\n",
       "      <td>0.312</td>\n",
       "      <td>...</td>\n",
       "      <td>0.379</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.525</td>\n",
       "      <td>0.517</td>\n",
       "      <td>0.767</td>\n",
       "      <td>0.654</td>\n",
       "      <td>0.3480</td>\n",
       "      <td>0.3560</td>\n",
       "      <td>0.262357</td>\n",
       "      <td>0.276813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>wet-extraction-2019-18</td>\n",
       "      <td>6</td>\n",
       "      <td>11000</td>\n",
       "      <td>0.408771</td>\n",
       "      <td>0.325</td>\n",
       "      <td>0.315</td>\n",
       "      <td>0.363</td>\n",
       "      <td>0.409</td>\n",
       "      <td>0.162</td>\n",
       "      <td>0.312</td>\n",
       "      <td>...</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.399</td>\n",
       "      <td>0.529</td>\n",
       "      <td>0.520</td>\n",
       "      <td>0.777</td>\n",
       "      <td>0.664</td>\n",
       "      <td>0.3465</td>\n",
       "      <td>0.3555</td>\n",
       "      <td>0.261599</td>\n",
       "      <td>0.276664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117</th>\n",
       "      <td>wet-extraction-2019-18</td>\n",
       "      <td>6</td>\n",
       "      <td>12000</td>\n",
       "      <td>0.408239</td>\n",
       "      <td>0.329</td>\n",
       "      <td>0.308</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.416</td>\n",
       "      <td>0.178</td>\n",
       "      <td>0.308</td>\n",
       "      <td>...</td>\n",
       "      <td>0.382</td>\n",
       "      <td>0.398</td>\n",
       "      <td>0.521</td>\n",
       "      <td>0.510</td>\n",
       "      <td>0.770</td>\n",
       "      <td>0.656</td>\n",
       "      <td>0.3555</td>\n",
       "      <td>0.3595</td>\n",
       "      <td>0.260928</td>\n",
       "      <td>0.278411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118</th>\n",
       "      <td>wet-extraction-2019-18</td>\n",
       "      <td>6</td>\n",
       "      <td>13000</td>\n",
       "      <td>0.413263</td>\n",
       "      <td>0.325</td>\n",
       "      <td>0.308</td>\n",
       "      <td>0.367</td>\n",
       "      <td>0.425</td>\n",
       "      <td>0.174</td>\n",
       "      <td>0.312</td>\n",
       "      <td>...</td>\n",
       "      <td>0.387</td>\n",
       "      <td>0.411</td>\n",
       "      <td>0.523</td>\n",
       "      <td>0.524</td>\n",
       "      <td>0.774</td>\n",
       "      <td>0.662</td>\n",
       "      <td>0.3570</td>\n",
       "      <td>0.3600</td>\n",
       "      <td>0.263067</td>\n",
       "      <td>0.281104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119</th>\n",
       "      <td>wet-extraction-2019-18</td>\n",
       "      <td>6</td>\n",
       "      <td>13500</td>\n",
       "      <td>0.410754</td>\n",
       "      <td>0.335</td>\n",
       "      <td>0.310</td>\n",
       "      <td>0.366</td>\n",
       "      <td>0.424</td>\n",
       "      <td>0.164</td>\n",
       "      <td>0.300</td>\n",
       "      <td>...</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.407</td>\n",
       "      <td>0.515</td>\n",
       "      <td>0.519</td>\n",
       "      <td>0.779</td>\n",
       "      <td>0.668</td>\n",
       "      <td>0.3590</td>\n",
       "      <td>0.3565</td>\n",
       "      <td>0.261681</td>\n",
       "      <td>0.279534</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>120 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                             runname  seed  steps  agg_score  \\\n",
       "0    filtering-baseline-2019-18-40gt     5      0   0.330953   \n",
       "1    filtering-baseline-2019-18-40gt     5   1000   0.357474   \n",
       "2    filtering-baseline-2019-18-40gt     5   2000   0.377436   \n",
       "3    filtering-baseline-2019-18-40gt     5   3000   0.387994   \n",
       "4    filtering-baseline-2019-18-40gt     5   4000   0.396110   \n",
       "..                               ...   ...    ...        ...   \n",
       "115           wet-extraction-2019-18     6  10000   0.408977   \n",
       "116           wet-extraction-2019-18     6  11000   0.408771   \n",
       "117           wet-extraction-2019-18     6  12000   0.408239   \n",
       "118           wet-extraction-2019-18     6  13000   0.413263   \n",
       "119           wet-extraction-2019-18     6  13500   0.410754   \n",
       "\n",
       "     commonsense_qa/acc  commonsense_qa/acc_norm  hellaswag/acc  \\\n",
       "0                 0.186                    0.233          0.272   \n",
       "1                 0.239                    0.271          0.297   \n",
       "2                 0.280                    0.284          0.321   \n",
       "3                 0.277                    0.291          0.339   \n",
       "4                 0.299                    0.315          0.340   \n",
       "..                  ...                      ...            ...   \n",
       "115               0.326                    0.312          0.362   \n",
       "116               0.325                    0.315          0.363   \n",
       "117               0.329                    0.308          0.364   \n",
       "118               0.325                    0.308          0.367   \n",
       "119               0.335                    0.310          0.366   \n",
       "\n",
       "     hellaswag/acc_norm  openbookqa/acc  openbookqa/acc_norm  ...  siqa/acc  \\\n",
       "0                 0.258           0.166                0.286  ...     0.367   \n",
       "1                 0.287           0.146                0.260  ...     0.365   \n",
       "2                 0.332           0.134                0.268  ...     0.368   \n",
       "3                 0.359           0.132                0.280  ...     0.394   \n",
       "4                 0.366           0.158                0.286  ...     0.376   \n",
       "..                  ...             ...                  ...  ...       ...   \n",
       "115               0.412           0.166                0.312  ...     0.379   \n",
       "116               0.409           0.162                0.312  ...     0.388   \n",
       "117               0.416           0.178                0.308  ...     0.382   \n",
       "118               0.425           0.174                0.312  ...     0.387   \n",
       "119               0.424           0.164                0.300  ...     0.392   \n",
       "\n",
       "     siqa/acc_norm  winogrande/acc  winogrande/acc_norm  sciq/acc  \\\n",
       "0            0.362           0.516                0.497     0.210   \n",
       "1            0.396           0.503                0.486     0.568   \n",
       "2            0.399           0.519                0.502     0.686   \n",
       "3            0.404           0.520                0.503     0.721   \n",
       "4            0.399           0.515                0.500     0.739   \n",
       "..             ...             ...                  ...       ...   \n",
       "115          0.396           0.525                0.517     0.767   \n",
       "116          0.399           0.529                0.520     0.777   \n",
       "117          0.398           0.521                0.510     0.770   \n",
       "118          0.411           0.523                0.524     0.774   \n",
       "119          0.407           0.515                0.519     0.779   \n",
       "\n",
       "     sciq/acc_norm  arc/acc  arc/acc_norm  mmlu/acc  mmlu/acc_norm  \n",
       "0            0.202   0.2190        0.2515  0.230285       0.250127  \n",
       "1            0.502   0.2665        0.2855  0.242526       0.253291  \n",
       "2            0.590   0.3030        0.3215  0.245745       0.260988  \n",
       "3            0.622   0.3210        0.3385  0.250427       0.264451  \n",
       "4            0.620   0.3320        0.3445  0.256134       0.270382  \n",
       "..             ...      ...           ...       ...            ...  \n",
       "115          0.654   0.3480        0.3560  0.262357       0.276813  \n",
       "116          0.664   0.3465        0.3555  0.261599       0.276664  \n",
       "117          0.656   0.3555        0.3595  0.260928       0.278411  \n",
       "118          0.662   0.3570        0.3600  0.263067       0.281104  \n",
       "119          0.668   0.3590        0.3565  0.261681       0.279534  \n",
       "\n",
       "[120 rows x 22 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from matplotlib.figure import Figure\n",
    "\n",
    "df = pd.read_csv(\"../src_data/wet_comparison.csv\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b610f43caefdf01",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-13T15:30:52.866635Z",
     "start_time": "2024-05-13T15:30:52.865068Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "runs_mapping = {\n",
    "    \"wet-extraction-2019-18\": \"WET data\",\n",
    "    \"ind_minhash-CC-MAIN-2019-18\": \"Extracted from WARC\",\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "initial_id",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-13T15:30:53.034617Z",
     "start_time": "2024-05-13T15:30:52.867342Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from matplotlib import pyplot as plt\n",
    "metrics = ['agg_score', 'commonsense_qa/acc_norm', 'hellaswag/acc_norm', 'openbookqa/acc_norm', 'piqa/acc_norm',\n",
    "                   'siqa/acc_norm', 'winogrande/acc_norm', 'arc/acc_norm', 'mmlu/acc_norm']\n",
    "\n",
    "def normalize_runname(runname):\n",
    "    return runname.replace(\"/\", \"_\")\n",
    "\n",
    "grouped = (\n",
    "    df.groupby([\"runname\", \"steps\"])\n",
    "    .agg(\n",
    "        {\n",
    "            key: \"mean\" for key in metrics\n",
    "        }\n",
    "    )\n",
    "    .reset_index()\n",
    ")\n",
    "\n",
    "file_id=\"../assets/data/plots/wet_comparison\"\n",
    "files = {}\n",
    "for metric in metrics:\n",
    "    datas = {}\n",
    "    for name, group in grouped.groupby(\"runname\"):\n",
    "        if name not in runs_mapping:\n",
    "            continue\n",
    "        group = group[[\"steps\", metric]].sort_values(by=\"steps\")\n",
    "        group = group.set_index(\"steps\")\n",
    "        rolling_avg = group\n",
    "        # rolling_avg = group.rolling(window=5).mean()\n",
    "        datas[name] = {\n",
    "            \"x\": (rolling_avg.index * 2048 * 1024 * 1e-9).tolist(),\n",
    "            \"y\": rolling_avg[metric].tolist(),\n",
    "            \"label\": runs_mapping[name],\n",
    "        }\n",
    "    # Sort the datata based on the steps\n",
    "    datas = {k: v for k, v in sorted(datas.items(), key=lambda x: -x[1][\"y\"][-1])}\n",
    "    # Create a folder\n",
    "    os.makedirs(f\"{file_id}\", exist_ok=True)\n",
    "    with open(f\"{file_id}/{normalize_runname(metric)}.json\", \"w\") as f:\n",
    "        json.dump({\n",
    "            \"data\": datas,\n",
    "            \"layout\": {\n",
    "                \"title\": {\n",
    "                    \"text\": \"WET data is worse than data extracted from WARC\"\n",
    "                },\n",
    "            }\n",
    "        }, f)\n",
    "    files[metric] = {\"file\": f\"{normalize_runname(metric)}.json\"}\n",
    "# Create index\n",
    "with open(f\"{file_id}/index.json\", \"w\") as f:\n",
    "    json.dump({\n",
    "        \"files\": files,\n",
    "        \"settings\": {\n",
    "            \"defaultMetric\": \"agg_score\",\n",
    "            \"slider\":{\"min\":0,\"max\":10,\"default\":0}\n",
    "        }\n",
    "    }, f)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "af28ebbd054cdc33",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-13T15:30:53.036912Z",
     "start_time": "2024-05-13T15:30:53.035519Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}