File size: 19,120 Bytes
b72ca1e
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"mount_file_id":"13dQOLnAYp359h5ch3EVdhQIyFIcj7qdb","authorship_tag":"ABX9TyM/2/3dzTogx+WXTgtoOH/4"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"id":"Cdg5QVtB_0LG","executionInfo":{"status":"ok","timestamp":1672199427921,"user_tz":-330,"elapsed":1564,"user":{"displayName":"Putturu kusumakar Reddy","userId":"07552141107752951949"}}},"outputs":[],"source":["import pandas as pd\n","import numpy as np"]},{"cell_type":"code","source":["df1 = pd.read_csv(\"/content/AllStationsData_PHD.txt\",sep = \"|\")"],"metadata":{"id":"gbr09RAXDtin","executionInfo":{"status":"ok","timestamp":1672199432507,"user_tz":-330,"elapsed":3,"user":{"displayName":"Putturu kusumakar Reddy","userId":"07552141107752951949"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["df1.head(5)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":270},"id":"oHHIoo5fECv-","executionInfo":{"status":"ok","timestamp":1672199445277,"user_tz":-330,"elapsed":1002,"user":{"displayName":"Putturu kusumakar Reddy","userId":"07552141107752951949"}},"outputId":"766d3d98-3ed3-4bd3-da72-85aa7b4479eb"},"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":["   WeatherStationID AirportID  GroundHeight  StationHeight  BarometerHeight  \\\n","0              3011       TEX             0              0                0   \n","1              3012       SKX             0              0                0   \n","2              3013       LAA          3683           3703             3675   \n","3              3014       4SL             0           6909                0   \n","4              3016       RIL          5503           5544             5506   \n","\n","   Latitude  Longitude TimeZone  \n","0     37.57    -107.55       +7  \n","1     36.28    -105.40       +7  \n","2     38.04    -102.41       +7  \n","3     35.47    -107.14       +7  \n","4     39.32    -107.44       +7  "],"text/html":["\n","  <div id=\"df-9ecefe5e-895f-413f-b613-538fd6769849\">\n","    <div class=\"colab-df-container\">\n","      <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>WeatherStationID</th>\n","      <th>AirportID</th>\n","      <th>GroundHeight</th>\n","      <th>StationHeight</th>\n","      <th>BarometerHeight</th>\n","      <th>Latitude</th>\n","      <th>Longitude</th>\n","      <th>TimeZone</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>3011</td>\n","      <td>TEX</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>37.57</td>\n","      <td>-107.55</td>\n","      <td>+7</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>3012</td>\n","      <td>SKX</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>36.28</td>\n","      <td>-105.40</td>\n","      <td>+7</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>3013</td>\n","      <td>LAA</td>\n","      <td>3683</td>\n","      <td>3703</td>\n","      <td>3675</td>\n","      <td>38.04</td>\n","      <td>-102.41</td>\n","      <td>+7</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>3014</td>\n","      <td>4SL</td>\n","      <td>0</td>\n","      <td>6909</td>\n","      <td>0</td>\n","      <td>35.47</td>\n","      <td>-107.14</td>\n","      <td>+7</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>3016</td>\n","      <td>RIL</td>\n","      <td>5503</td>\n","      <td>5544</td>\n","      <td>5506</td>\n","      <td>39.32</td>\n","      <td>-107.44</td>\n","      <td>+7</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9ecefe5e-895f-413f-b613-538fd6769849')\"\n","              title=\"Convert this dataframe to an interactive table.\"\n","              style=\"display:none;\">\n","        \n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n","    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n","  </svg>\n","      </button>\n","      \n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      flex-wrap:wrap;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","      <script>\n","        const buttonEl =\n","          document.querySelector('#df-9ecefe5e-895f-413f-b613-538fd6769849 button.colab-df-convert');\n","        buttonEl.style.display =\n","          google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","        async function convertToInteractive(key) {\n","          const element = document.querySelector('#df-9ecefe5e-895f-413f-b613-538fd6769849');\n","          const dataTable =\n","            await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                     [key], {});\n","          if (!dataTable) return;\n","\n","          const docLinkHtml = 'Like what you see? Visit the ' +\n","            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","            + ' to learn more about interactive tables.';\n","          element.innerHTML = '';\n","          dataTable['output_type'] = 'display_data';\n","          await google.colab.output.renderOutput(dataTable, element);\n","          const docLink = document.createElement('div');\n","          docLink.innerHTML = docLinkHtml;\n","          element.appendChild(docLink);\n","        }\n","      </script>\n","    </div>\n","  </div>\n","  "]},"metadata":{},"execution_count":3}]},{"cell_type":"code","source":["df1.dtypes"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kLGk5BGpEFXs","executionInfo":{"status":"ok","timestamp":1672199475075,"user_tz":-330,"elapsed":1227,"user":{"displayName":"Putturu kusumakar Reddy","userId":"07552141107752951949"}},"outputId":"d23e3805-9d53-4782-f8ea-bfc14c0b879f"},"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["WeatherStationID      int64\n","AirportID            object\n","GroundHeight          int64\n","StationHeight         int64\n","BarometerHeight       int64\n","Latitude            float64\n","Longitude           float64\n","TimeZone             object\n","dtype: object"]},"metadata":{},"execution_count":4}]},{"cell_type":"code","source":["df1.shape"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"LLH4Im5-EJ9i","executionInfo":{"status":"ok","timestamp":1672199480960,"user_tz":-330,"elapsed":1663,"user":{"displayName":"Putturu kusumakar Reddy","userId":"07552141107752951949"}},"outputId":"b17aec45-6dbe-43bb-e4da-ac10938ad0f3"},"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(1744, 8)"]},"metadata":{},"execution_count":5}]},{"cell_type":"code","source":["df1[\"WeatherStationID\"].mode()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5ciWNzybFlIJ","executionInfo":{"status":"ok","timestamp":1672199494296,"user_tz":-330,"elapsed":1585,"user":{"displayName":"Putturu kusumakar Reddy","userId":"07552141107752951949"}},"outputId":"97d58348-e33e-4208-e187-4a40a990eafd"},"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0        3011\n","1        3012\n","2        3013\n","3        3014\n","4        3016\n","        ...  \n","1739    94997\n","1740    94998\n","1741    94999\n","1742    96401\n","1743    96402\n","Length: 1744, dtype: int64"]},"metadata":{},"execution_count":6}]},{"cell_type":"markdown","source":["So all station id s are unique"],"metadata":{"id":"8dukebWQ22eM"}},{"cell_type":"code","source":["df1.isna().mean()"],"metadata":{"id":"tuLCOcf8F5by","executionInfo":{"status":"ok","timestamp":1672199517117,"user_tz":-330,"elapsed":19,"user":{"displayName":"Putturu kusumakar Reddy","userId":"07552141107752951949"}},"outputId":"33f075d5-a3b5-4e77-9217-c8c7d93663b9","colab":{"base_uri":"https://localhost:8080/"}},"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["WeatherStationID    0.0\n","AirportID           0.0\n","GroundHeight        0.0\n","StationHeight       0.0\n","BarometerHeight     0.0\n","Latitude            0.0\n","Longitude           0.0\n","TimeZone            0.0\n","dtype: float64"]},"metadata":{},"execution_count":7}]},{"cell_type":"markdown","source":["So ther are no nan values"],"metadata":{"id":"Eeqn54Re276q"}},{"cell_type":"code","source":["df2 = pd.read_csv(\"/content/200401hourly.txt\")"],"metadata":{"id":"M7Rnv2dHXTCl"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["df2.head(5)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":305},"id":"DL5_g9ylXoFk","executionInfo":{"status":"ok","timestamp":1672157787704,"user_tz":-330,"elapsed":712,"user":{"displayName":"Putturu kusumakar Reddy","userId":"07552141107752951949"}},"outputId":"24c3ac56-3595-4259-8ba1-7b856cbcc83e"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["   WeatherStationID  YearMonthDay  Time         SkyConditions Visibility  \\\n","0              3034      20040101    10                OVC095       10SM   \n","1              3035      20040101    10                SCT110       10SM   \n","2              3039      20040101    10                OVC002     0.25SM   \n","3              3040      20040101    10                   CLR       10SM   \n","4              4128      20040101    10  BKN001 BKN010 OVC024        4SM   \n","\n","    DBT  DewPointTemp  RelativeHumidityPercent  WindSpeed WindDirection  \\\n","0  30.0           7.0                     38.0        0.0           000   \n","1  43.0          18.0                     37.0        0.0           000   \n","2  14.0          12.0                     92.0        0.0           000   \n","3  30.0          21.0                     69.0        9.0           250   \n","4   NaN           NaN                      NaN        0.0           000   \n","\n","   WindGustValue  StationPressure  \n","0            0.0            30.10  \n","1            0.0            30.07  \n","2            0.0            30.01  \n","3            0.0            29.82  \n","4            0.0            29.80  "],"text/html":["\n","  <div id=\"df-b3d1c0f2-d424-4ac5-9bf2-cb1a7f208eb6\">\n","    <div class=\"colab-df-container\">\n","      <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>WeatherStationID</th>\n","      <th>YearMonthDay</th>\n","      <th>Time</th>\n","      <th>SkyConditions</th>\n","      <th>Visibility</th>\n","      <th>DBT</th>\n","      <th>DewPointTemp</th>\n","      <th>RelativeHumidityPercent</th>\n","      <th>WindSpeed</th>\n","      <th>WindDirection</th>\n","      <th>WindGustValue</th>\n","      <th>StationPressure</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>3034</td>\n","      <td>20040101</td>\n","      <td>10</td>\n","      <td>OVC095</td>\n","      <td>10SM</td>\n","      <td>30.0</td>\n","      <td>7.0</td>\n","      <td>38.0</td>\n","      <td>0.0</td>\n","      <td>000</td>\n","      <td>0.0</td>\n","      <td>30.10</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>3035</td>\n","      <td>20040101</td>\n","      <td>10</td>\n","      <td>SCT110</td>\n","      <td>10SM</td>\n","      <td>43.0</td>\n","      <td>18.0</td>\n","      <td>37.0</td>\n","      <td>0.0</td>\n","      <td>000</td>\n","      <td>0.0</td>\n","      <td>30.07</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>3039</td>\n","      <td>20040101</td>\n","      <td>10</td>\n","      <td>OVC002</td>\n","      <td>0.25SM</td>\n","      <td>14.0</td>\n","      <td>12.0</td>\n","      <td>92.0</td>\n","      <td>0.0</td>\n","      <td>000</td>\n","      <td>0.0</td>\n","      <td>30.01</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>3040</td>\n","      <td>20040101</td>\n","      <td>10</td>\n","      <td>CLR</td>\n","      <td>10SM</td>\n","      <td>30.0</td>\n","      <td>21.0</td>\n","      <td>69.0</td>\n","      <td>9.0</td>\n","      <td>250</td>\n","      <td>0.0</td>\n","      <td>29.82</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4128</td>\n","      <td>20040101</td>\n","      <td>10</td>\n","      <td>BKN001 BKN010 OVC024</td>\n","      <td>4SM</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>0.0</td>\n","      <td>000</td>\n","      <td>0.0</td>\n","      <td>29.80</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b3d1c0f2-d424-4ac5-9bf2-cb1a7f208eb6')\"\n","              title=\"Convert this dataframe to an interactive table.\"\n","              style=\"display:none;\">\n","        \n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n","    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n","  </svg>\n","      </button>\n","      \n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      flex-wrap:wrap;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","      <script>\n","        const buttonEl =\n","          document.querySelector('#df-b3d1c0f2-d424-4ac5-9bf2-cb1a7f208eb6 button.colab-df-convert');\n","        buttonEl.style.display =\n","          google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","        async function convertToInteractive(key) {\n","          const element = document.querySelector('#df-b3d1c0f2-d424-4ac5-9bf2-cb1a7f208eb6');\n","          const dataTable =\n","            await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                     [key], {});\n","          if (!dataTable) return;\n","\n","          const docLinkHtml = 'Like what you see? Visit the ' +\n","            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","            + ' to learn more about interactive tables.';\n","          element.innerHTML = '';\n","          dataTable['output_type'] = 'display_data';\n","          await google.colab.output.renderOutput(dataTable, element);\n","          const docLink = document.createElement('div');\n","          docLink.innerHTML = docLinkHtml;\n","          element.appendChild(docLink);\n","        }\n","      </script>\n","    </div>\n","  </div>\n","  "]},"metadata":{},"execution_count":4}]},{"cell_type":"code","source":["df2.dtypes"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IAauKOEnXvDF","executionInfo":{"status":"ok","timestamp":1672157805700,"user_tz":-330,"elapsed":604,"user":{"displayName":"Putturu kusumakar Reddy","userId":"07552141107752951949"}},"outputId":"8bd3f3a1-ed7e-49fe-e3d2-d21bb50d6484"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["WeatherStationID             int64\n","YearMonthDay                 int64\n","Time                         int64\n","SkyConditions               object\n","Visibility                  object\n","DBT                        float64\n","DewPointTemp               float64\n","RelativeHumidityPercent    float64\n","WindSpeed                  float64\n","WindDirection               object\n","WindGustValue              float64\n","StationPressure            float64\n","dtype: object"]},"metadata":{},"execution_count":5}]},{"cell_type":"code","source":[],"metadata":{"id":"m3C4mUI0XzY0"},"execution_count":null,"outputs":[]}]}