{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 82,
      "metadata": {
        "cellView": "form",
        "id": "AXI2uCSkxx7m"
      },
      "outputs": [],
      "source": [
        "#@title Setup\n",
        "\n",
        "%%capture\n",
        "!pip install networkx pulp numpy pandas\n",
        "\n",
        "!rm -rf ./data/\n",
        "!mkdir -p ./data/\n",
        "!wget -c -O ./data/lastfm_asia.zip \"https://snap.stanford.edu/data/lastfm_asia.zip\"\n",
        "!unzip -q ./data/lastfm_asia.zip -d ./data/"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Problem 3: Linear Programming\n",
        "\n",
        "\n",
        "from pulp import *\n",
        "from IPython.display import HTML, display\n",
        "\n",
        "def display_table(table):\n",
        "    display(HTML(\n",
        "       '<table><tr>{}</tr></table>'.format(\n",
        "           '</tr><tr>'.join(\n",
        "               '<td>{}</td>'.format('</td><td>'.join(str(_) for _ in row)) for row in table)\n",
        "           )\n",
        "    ))\n",
        "\n",
        "problem = LpProblem(\"MSML_602_Midterm_Q3\", LpMaximize)\n",
        "\n",
        "X = LpVariable(\"X\", cat=\"Integer\")\n",
        "Y = LpVariable(\"Y\", cat=\"Integer\")\n",
        "\n",
        "problem += (5 * X) + (3 * Y), \"Objective\"\n",
        "problem += X + (2 * Y) <= 14, \"Constraint 1\"\n",
        "problem += (3* X) - Y >= 0, \"Constraint 2\"\n",
        "problem += X - Y <= 2, \"Constraint 3\"\n",
        "\n",
        "problem.solve()\n",
        "print(\"Solution:\\n\")\n",
        "\n",
        "data = [[\"Variable\", \"Value\"]] + [[v.name, v.varValue] for v in problem.variables()]\n",
        "data += [[\"Max value for objective function: \", problem.objective.value()]]\n",
        "display_table(data)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 140
        },
        "cellView": "form",
        "id": "ALmlZnbcx-9e",
        "outputId": "45e2c507-3265-4b22-e21a-2d6dbd72f05f"
      },
      "execution_count": 83,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Solution:\n",
            "\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "<table><tr><td>Variable</td><td>Value</td></tr><tr><td>X</td><td>6.0</td></tr><tr><td>Y</td><td>4.0</td></tr><tr><td>Max value for objective function: </td><td>42.0</td></tr></table>"
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Problem 5: Graph Metrics\n",
        "\n",
        "import pandas as pd \n",
        "import networkx as nx\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "df = pd.read_csv(\"/content/data/lasftm_asia/lastfm_asia_edges.csv\")\n",
        "G = nx.from_pandas_edgelist(df, source=\"node_1\", target=\"node_2\")\n",
        "shortest_path = nx.shortest_path_length(G, 0)\n",
        "del shortest_path[0]\n",
        "num = len(shortest_path)\n",
        "total_length = sum([shortest_path[k] for k in shortest_path])\n",
        "avg_shortest_path = total_length / num\n",
        "print(f\"The average shortest path length from node 0 to all other nodes is: {avg_shortest_path}\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "cellView": "form",
        "id": "87mMC-B1yJoq",
        "outputId": "83ff28ec-7d51-4f6a-ced6-358538a58f83"
      },
      "execution_count": 84,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "The average shortest path length from node 0 to all other nodes is: 5.651974288337925\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Problem 6: Extracting Webpage Data"
      ],
      "metadata": {
        "id": "mAiJRhb5iW5O"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Scraping result\n",
        "\n",
        "import requests\n",
        "from bs4 import BeautifulSoup\n",
        "import pandas as pd \n",
        "import numpy as np\n",
        "\n",
        "page = requests.get(\"https://www.worldometers.info/coronavirus/#countries\")\n",
        "html = page.content\n",
        "\n",
        "soup = BeautifulSoup(html, 'html.parser')\n",
        "table = soup.find(\"table\", {\"id\": \"main_table_countries_today\"})\n",
        "\n",
        "cols = [\n",
        "    '#', 'Country', 'TotalCases', 'NewCases', 'TotalDeaths', 'NewDeaths', 'TotalRecovered',\n",
        "    'NewRecovered','ActiveCases','Serious,Critical','TotalCases/1M pop','Deaths/1M pop', \n",
        "    'TotalTests', 'Tests/1M pop', 'Population', 'Continent', '1 Case every X ppl', '1 Death every X ppl',\n",
        "    '1 Test every X ppl', 'New Cases/1M pop', 'New Deaths/1M pop', 'Active Cases/1M pop'\n",
        "]\n",
        "\n",
        "tbody = table.find(\"tbody\")\n",
        "rows = tbody.find_all(\"tr\")\n",
        "\n",
        "data = []\n",
        "for row in rows:\n",
        "  cells = row.find_all(\"td\")\n",
        "  values = [c.text for c in cells]\n",
        "  data.append(values)\n",
        "\n",
        "def sanitize_country_number(row):\n",
        "  val = row[\"#\"]\n",
        "  if not val.strip():\n",
        "    return np.NaN\n",
        "  else:\n",
        "    return val\n",
        "\n",
        "def fill_active_cases(row):\n",
        "  val = row[\"ActiveCases\"]\n",
        "  if not np.isnan(val):\n",
        "    return val\n",
        "  active_per_1_mil = row[\"Active Cases/1M pop\"]\n",
        "  if np.isnan(active_per_1_mil):\n",
        "    return np.nan\n",
        "  population = row[\"Population\"]\n",
        "  return (active_per_1_mil/1000000) * population\n",
        "\n",
        "def to_float(col):\n",
        "  def mapper(row):\n",
        "    if row[col] == \"N/A\":\n",
        "      return np.NaN\n",
        "    val = row[col]\n",
        "    val = val.replace(\",\", \"\").strip()\n",
        "    if not val:\n",
        "      return np.NaN\n",
        "    return float(val)\n",
        "  return mapper \n",
        "\n",
        "df = pd.DataFrame(data, columns=cols)\n",
        "df.replace(r\"\\n\", \"\", regex=True, inplace=True)\n",
        "\n",
        "df.head()\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 386
        },
        "cellView": "form",
        "id": "Ay-ceRkkzcVg",
        "outputId": "3b1e8535-f211-45ba-9b90-85c279e522ec"
      },
      "execution_count": 85,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "  #        Country   TotalCases  NewCases TotalDeaths NewDeaths  \\\n",
              "0    North America  118,308,960   +16,354   1,557,219       +76   \n",
              "1             Asia  195,343,819  +168,618   1,491,630      +230   \n",
              "2           Europe  235,496,414   +41,389   1,948,669      +165   \n",
              "3    South America   64,557,158   +10,126   1,333,737       +79   \n",
              "4          Oceania   12,691,699    +3,057      21,779        +9   \n",
              "\n",
              "  TotalRecovered NewRecovered ActiveCases Serious,Critical  ... TotalTests  \\\n",
              "0    113,762,872      +16,362   2,988,869            7,881  ...              \n",
              "1    188,186,652      +78,736   5,665,537            9,159  ...              \n",
              "2    229,427,346     +175,758   4,120,399            7,685  ...              \n",
              "3     62,884,992       +7,699     338,429           10,119  ...              \n",
              "4     12,512,305                  157,615               97  ...              \n",
              "\n",
              "  Tests/1M pop Population          Continent 1 Case every X ppl  \\\n",
              "0                              North America                      \n",
              "1                                       Asia                      \n",
              "2                                     Europe                      \n",
              "3                              South America                      \n",
              "4                          Australia/Oceania                      \n",
              "\n",
              "  1 Death every X ppl 1 Test every X ppl New Cases/1M pop New Deaths/1M pop  \\\n",
              "0                                                                             \n",
              "1                                                                             \n",
              "2                                                                             \n",
              "3                                                                             \n",
              "4                                                                             \n",
              "\n",
              "  Active Cases/1M pop  \n",
              "0                      \n",
              "1                      \n",
              "2                      \n",
              "3                      \n",
              "4                      \n",
              "\n",
              "[5 rows x 22 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-ba4843e7-ab30-4f7d-ab50-7ddf8afb1bbb\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>#</th>\n",
              "      <th>Country</th>\n",
              "      <th>TotalCases</th>\n",
              "      <th>NewCases</th>\n",
              "      <th>TotalDeaths</th>\n",
              "      <th>NewDeaths</th>\n",
              "      <th>TotalRecovered</th>\n",
              "      <th>NewRecovered</th>\n",
              "      <th>ActiveCases</th>\n",
              "      <th>Serious,Critical</th>\n",
              "      <th>...</th>\n",
              "      <th>TotalTests</th>\n",
              "      <th>Tests/1M pop</th>\n",
              "      <th>Population</th>\n",
              "      <th>Continent</th>\n",
              "      <th>1 Case every X ppl</th>\n",
              "      <th>1 Death every X ppl</th>\n",
              "      <th>1 Test every X ppl</th>\n",
              "      <th>New Cases/1M pop</th>\n",
              "      <th>New Deaths/1M pop</th>\n",
              "      <th>Active Cases/1M pop</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td></td>\n",
              "      <td>North America</td>\n",
              "      <td>118,308,960</td>\n",
              "      <td>+16,354</td>\n",
              "      <td>1,557,219</td>\n",
              "      <td>+76</td>\n",
              "      <td>113,762,872</td>\n",
              "      <td>+16,362</td>\n",
              "      <td>2,988,869</td>\n",
              "      <td>7,881</td>\n",
              "      <td>...</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td>North America</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td></td>\n",
              "      <td>Asia</td>\n",
              "      <td>195,343,819</td>\n",
              "      <td>+168,618</td>\n",
              "      <td>1,491,630</td>\n",
              "      <td>+230</td>\n",
              "      <td>188,186,652</td>\n",
              "      <td>+78,736</td>\n",
              "      <td>5,665,537</td>\n",
              "      <td>9,159</td>\n",
              "      <td>...</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td>Asia</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td></td>\n",
              "      <td>Europe</td>\n",
              "      <td>235,496,414</td>\n",
              "      <td>+41,389</td>\n",
              "      <td>1,948,669</td>\n",
              "      <td>+165</td>\n",
              "      <td>229,427,346</td>\n",
              "      <td>+175,758</td>\n",
              "      <td>4,120,399</td>\n",
              "      <td>7,685</td>\n",
              "      <td>...</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td>Europe</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td></td>\n",
              "      <td>South America</td>\n",
              "      <td>64,557,158</td>\n",
              "      <td>+10,126</td>\n",
              "      <td>1,333,737</td>\n",
              "      <td>+79</td>\n",
              "      <td>62,884,992</td>\n",
              "      <td>+7,699</td>\n",
              "      <td>338,429</td>\n",
              "      <td>10,119</td>\n",
              "      <td>...</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td>South America</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td></td>\n",
              "      <td>Oceania</td>\n",
              "      <td>12,691,699</td>\n",
              "      <td>+3,057</td>\n",
              "      <td>21,779</td>\n",
              "      <td>+9</td>\n",
              "      <td>12,512,305</td>\n",
              "      <td></td>\n",
              "      <td>157,615</td>\n",
              "      <td>97</td>\n",
              "      <td>...</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td>Australia/Oceania</td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 22 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ba4843e7-ab30-4f7d-ab50-7ddf8afb1bbb')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-ba4843e7-ab30-4f7d-ab50-7ddf8afb1bbb button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-ba4843e7-ab30-4f7d-ab50-7ddf8afb1bbb');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 85
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Data sanitization / generation\n",
        "\n",
        "#@markdown Some of the countries (actually ships, in this case) did not have any population data, so I excluded those records from the dataset.\n",
        "\n",
        "#@markdown Some countries didn't have data for exact active cases, but had data for **active cases per 1 million population**. \n",
        "#@markdown For these countries, I calculated their active cases by using the active cases per 1 million population data as follows: \n",
        "\n",
        "#@markdown ```Active Cases = (Active cases per 1 million population / 1,000,000) * Population```\n",
        "\n",
        "df[\"country_number\"] = df.apply(sanitize_country_number, axis=1)\n",
        "\n",
        "data_by_country = df[df[\"country_number\"].notna()].copy()\n",
        "data_by_country[\"ActiveCases\"] = data_by_country.apply(to_float(\"ActiveCases\"), axis=1)\n",
        "data_by_country[\"Active Cases/1M pop\"] = data_by_country.apply(to_float(\"Active Cases/1M pop\"), axis=1)\n",
        "data_by_country[\"Population\"] = data_by_country.apply(to_float(\"Population\"), axis=1)\n",
        "data_by_country[\"ActiveCases\"] = data_by_country.apply(fill_active_cases, axis=1)\n",
        "data_by_country[data_by_country[\"ActiveCases\"] == \"N/A\"].head(20)\n",
        "aggregated = data_by_country.groupby(\"Country\").agg({'ActiveCases':'mean', 'Population':'sum'}, as_index=False)\n",
        "aggregated.reset_index(inplace=True)\n",
        "dropped_countries = aggregated[aggregated[\"Population\"] == 0 ]\n",
        "aggregated = aggregated[aggregated[\"Population\"] != 0 ]\n",
        "aggregated[\"PercentageInfected\"] = aggregated.apply(lambda x: x[\"ActiveCases\"]/x[\"Population\"], axis=1)\n",
        "aggregated.sort_values([\"PercentageInfected\"], ascending=False, inplace=True)\n",
        "\n",
        "print(\"These were the countries(ships) that didn't have population data:\\n\")\n",
        "print(dropped_countries)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "cellView": "form",
        "id": "Roitzj22-VO5",
        "outputId": "5dc61fa2-31c9-4fa4-828c-c9f88fb449e2"
      },
      "execution_count": 86,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "These were the countries(ships) that didn't have population data:\n",
            "\n",
            "              Country  ActiveCases  Population\n",
            "56   Diamond Princess          0.0         0.0\n",
            "120        MS Zaandam          0.0         0.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Average active cases & the proportion of the total population affected\n",
        "\n",
        "from IPython.display import HTML, display\n",
        "\n",
        "def display_table(table):\n",
        "    display(HTML(\n",
        "       '<table><tr>{}</tr></table>'.format(\n",
        "           '</tr><tr>'.join(\n",
        "               '<td>{}</td>'.format('</td><td>'.join(str(_) for _ in row)) for row in table)\n",
        "           )\n",
        "    ))\n",
        "\n",
        "avg_active_cases = aggregated[\"ActiveCases\"].mean()\n",
        "\n",
        "aggr = aggregated.agg({\"ActiveCases\": \"sum\",  \"Population\": \"sum\"}, as_index=False)\n",
        "final_df = aggr.to_frame().T\n",
        "final_df[\"PercentageInfected\"] = final_df.apply(lambda x: (x[\"ActiveCases\"]/x[\"Population\"]) * 100, axis=1)\n",
        "percentage_infected = final_df[\"PercentageInfected\"].to_numpy()[0]\n",
        "\n",
        "display(HTML(\n",
        "    \"\"\"\n",
        "      <h3>Result:</h3>\n",
        "      <br>\n",
        "      <table border=\"1\">\n",
        "        <tr>\n",
        "          <th align=\"left\">Average active cases:</th>\n",
        "          <td>{0:.2f}</td>\n",
        "        </tr>\n",
        "        <tr>\n",
        "          <th align=\"left\">Proportion of total <br>population currently infected:</th>\n",
        "          <td>{1:.2f}%</td>\n",
        "        </tr>\n",
        "      </table>\n",
        "      <br>\n",
        "    \"\"\".format(avg_active_cases, percentage_infected))\n",
        ")\n",
        "\n",
        "\n",
        "print(\"\"\"\n",
        "I was unsure whether the problem wanted the percentage of the population\n",
        "affected for each country, so I have included the percentage for each country \n",
        "as well, just in case:\n",
        "\"\"\")\n",
        "aggregated.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 439
        },
        "cellView": "form",
        "id": "lRtwSfqSAPAY",
        "outputId": "a7037d5d-fbd6-48b3-e47b-32090720dfd1"
      },
      "execution_count": 87,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "      <h3>Result:</h3>\n",
              "      <br>\n",
              "      <table border=\"1\">\n",
              "        <tr>\n",
              "          <th align=\"left\">Average active cases:</th>\n",
              "          <td>60038.20</td>\n",
              "        </tr>\n",
              "        <tr>\n",
              "          <th align=\"left\">Proportion of total <br>population currently infected:</th>\n",
              "          <td>0.17%</td>\n",
              "        </tr>\n",
              "      </table>\n",
              "      <br>\n",
              "    "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "I was unsure whether the problem wanted the percentage of the population\n",
            "affected for each country, so I have included the percentage for each country \n",
            "as well, just in case:\n",
            "\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "            Country    ActiveCases  Population  PercentageInfected\n",
              "129      Martinique  222576.901869    374087.0            0.594987\n",
              "68   Faeroe Islands   26936.998989     49233.0            0.547133\n",
              "195       St. Barth    4854.999825      9945.0            0.488185\n",
              "84       Guadeloupe  193026.939904    399794.0            0.482816\n",
              "93          Iceland  130899.111498    345393.0            0.378986"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-5b3415ae-443b-4212-be42-19efc6bff309\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Country</th>\n",
              "      <th>ActiveCases</th>\n",
              "      <th>Population</th>\n",
              "      <th>PercentageInfected</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>129</th>\n",
              "      <td>Martinique</td>\n",
              "      <td>222576.901869</td>\n",
              "      <td>374087.0</td>\n",
              "      <td>0.594987</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>68</th>\n",
              "      <td>Faeroe Islands</td>\n",
              "      <td>26936.998989</td>\n",
              "      <td>49233.0</td>\n",
              "      <td>0.547133</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>195</th>\n",
              "      <td>St. Barth</td>\n",
              "      <td>4854.999825</td>\n",
              "      <td>9945.0</td>\n",
              "      <td>0.488185</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>84</th>\n",
              "      <td>Guadeloupe</td>\n",
              "      <td>193026.939904</td>\n",
              "      <td>399794.0</td>\n",
              "      <td>0.482816</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>93</th>\n",
              "      <td>Iceland</td>\n",
              "      <td>130899.111498</td>\n",
              "      <td>345393.0</td>\n",
              "      <td>0.378986</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5b3415ae-443b-4212-be42-19efc6bff309')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-5b3415ae-443b-4212-be42-19efc6bff309 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-5b3415ae-443b-4212-be42-19efc6bff309');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 87
        }
      ]
    }
  ]
}