diff --git "a/analysis.ipynb" "b/analysis.ipynb" new file mode 100644--- /dev/null +++ "b/analysis.ipynb" @@ -0,0 +1,2572 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import datetime as dt\n", + "from sklearn import metrics\n", + "from sklearn.model_selection import train_test_split, RandomizedSearchCV\n", + "from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor\n", + "import pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Airline | \n", + "Date_of_Journey | \n", + "Source | \n", + "Destination | \n", + "Route | \n", + "Dep_Time | \n", + "Arrival_Time | \n", + "Duration | \n", + "Total_Stops | \n", + "Additional_Info | \n", + "Price | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "IndiGo | \n", + "24/03/2019 | \n", + "Banglore | \n", + "New Delhi | \n", + "BLR → DEL | \n", + "22:20 | \n", + "01:10 22 Mar | \n", + "2h 50m | \n", + "non-stop | \n", + "No info | \n", + "3897 | \n", + "
1 | \n", + "Air India | \n", + "1/05/2019 | \n", + "Kolkata | \n", + "Banglore | \n", + "CCU → IXR → BBI → BLR | \n", + "05:50 | \n", + "13:15 | \n", + "7h 25m | \n", + "2 stops | \n", + "No info | \n", + "7662 | \n", + "
2 | \n", + "Jet Airways | \n", + "9/06/2019 | \n", + "Delhi | \n", + "Cochin | \n", + "DEL → LKO → BOM → COK | \n", + "09:25 | \n", + "04:25 10 Jun | \n", + "19h | \n", + "2 stops | \n", + "No info | \n", + "13882 | \n", + "
3 | \n", + "IndiGo | \n", + "12/05/2019 | \n", + "Kolkata | \n", + "Banglore | \n", + "CCU → NAG → BLR | \n", + "18:05 | \n", + "23:30 | \n", + "5h 25m | \n", + "1 stop | \n", + "No info | \n", + "6218 | \n", + "
4 | \n", + "IndiGo | \n", + "01/03/2019 | \n", + "Banglore | \n", + "New Delhi | \n", + "BLR → NAG → DEL | \n", + "16:50 | \n", + "21:35 | \n", + "4h 45m | \n", + "1 stop | \n", + "No info | \n", + "13302 | \n", + "
\n", + " | Airline | \n", + "Source | \n", + "Destination | \n", + "Route | \n", + "Dep_Time | \n", + "Arrival_Time | \n", + "Duration | \n", + "Total_Stops | \n", + "Additional_Info | \n", + "Price | \n", + "Journey_day | \n", + "Journey_month | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "IndiGo | \n", + "Banglore | \n", + "Delhi | \n", + "BLR → DEL | \n", + "22:20 | \n", + "01:10 22 Mar | \n", + "2h 50m | \n", + "non-stop | \n", + "No info | \n", + "3897 | \n", + "24 | \n", + "3 | \n", + "
1 | \n", + "Air India | \n", + "Kolkata | \n", + "Banglore | \n", + "CCU → IXR → BBI → BLR | \n", + "05:50 | \n", + "13:15 | \n", + "7h 25m | \n", + "2 stops | \n", + "No info | \n", + "7662 | \n", + "1 | \n", + "5 | \n", + "
2 | \n", + "Jet Airways | \n", + "Delhi | \n", + "Cochin | \n", + "DEL → LKO → BOM → COK | \n", + "09:25 | \n", + "04:25 10 Jun | \n", + "19h | \n", + "2 stops | \n", + "No info | \n", + "13882 | \n", + "9 | \n", + "6 | \n", + "
3 | \n", + "IndiGo | \n", + "Kolkata | \n", + "Banglore | \n", + "CCU → NAG → BLR | \n", + "18:05 | \n", + "23:30 | \n", + "5h 25m | \n", + "1 stop | \n", + "No info | \n", + "6218 | \n", + "12 | \n", + "5 | \n", + "
4 | \n", + "IndiGo | \n", + "Banglore | \n", + "Delhi | \n", + "BLR → NAG → DEL | \n", + "16:50 | \n", + "21:35 | \n", + "4h 45m | \n", + "1 stop | \n", + "No info | \n", + "13302 | \n", + "1 | \n", + "3 | \n", + "
\n", + " | Airline | \n", + "Source | \n", + "Destination | \n", + "Route | \n", + "Duration | \n", + "Total_Stops | \n", + "Additional_Info | \n", + "Price | \n", + "Journey_day | \n", + "Journey_month | \n", + "Dep_hour | \n", + "Dep_min | \n", + "Arrival_hour | \n", + "Arrival_min | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "IndiGo | \n", + "Banglore | \n", + "Delhi | \n", + "BLR → DEL | \n", + "2h 50m | \n", + "non-stop | \n", + "No info | \n", + "3897 | \n", + "24 | \n", + "3 | \n", + "22 | \n", + "20 | \n", + "1 | \n", + "10 | \n", + "
1 | \n", + "Air India | \n", + "Kolkata | \n", + "Banglore | \n", + "CCU → IXR → BBI → BLR | \n", + "7h 25m | \n", + "2 stops | \n", + "No info | \n", + "7662 | \n", + "1 | \n", + "5 | \n", + "5 | \n", + "50 | \n", + "13 | \n", + "15 | \n", + "
2 | \n", + "Jet Airways | \n", + "Delhi | \n", + "Cochin | \n", + "DEL → LKO → BOM → COK | \n", + "19h | \n", + "2 stops | \n", + "No info | \n", + "13882 | \n", + "9 | \n", + "6 | \n", + "9 | \n", + "25 | \n", + "4 | \n", + "25 | \n", + "
3 | \n", + "IndiGo | \n", + "Kolkata | \n", + "Banglore | \n", + "CCU → NAG → BLR | \n", + "5h 25m | \n", + "1 stop | \n", + "No info | \n", + "6218 | \n", + "12 | \n", + "5 | \n", + "18 | \n", + "5 | \n", + "23 | \n", + "30 | \n", + "
4 | \n", + "IndiGo | \n", + "Banglore | \n", + "Delhi | \n", + "BLR → NAG → DEL | \n", + "4h 45m | \n", + "1 stop | \n", + "No info | \n", + "13302 | \n", + "1 | \n", + "3 | \n", + "16 | \n", + "50 | \n", + "21 | \n", + "35 | \n", + "
\n", + " | Airline | \n", + "Source | \n", + "Destination | \n", + "Route | \n", + "Total_Stops | \n", + "Additional_Info | \n", + "Price | \n", + "Journey_day | \n", + "Journey_month | \n", + "Dep_hour | \n", + "Dep_min | \n", + "Arrival_hour | \n", + "Arrival_min | \n", + "Duration_hours | \n", + "Duration_mins | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "IndiGo | \n", + "Banglore | \n", + "Delhi | \n", + "BLR → DEL | \n", + "non-stop | \n", + "No info | \n", + "3897 | \n", + "24 | \n", + "3 | \n", + "22 | \n", + "20 | \n", + "1 | \n", + "10 | \n", + "2 | \n", + "50 | \n", + "
1 | \n", + "Air India | \n", + "Kolkata | \n", + "Banglore | \n", + "CCU → IXR → BBI → BLR | \n", + "2 stops | \n", + "No info | \n", + "7662 | \n", + "1 | \n", + "5 | \n", + "5 | \n", + "50 | \n", + "13 | \n", + "15 | \n", + "7 | \n", + "25 | \n", + "
2 | \n", + "Jet Airways | \n", + "Delhi | \n", + "Cochin | \n", + "DEL → LKO → BOM → COK | \n", + "2 stops | \n", + "No info | \n", + "13882 | \n", + "9 | \n", + "6 | \n", + "9 | \n", + "25 | \n", + "4 | \n", + "25 | \n", + "19 | \n", + "0 | \n", + "
3 | \n", + "IndiGo | \n", + "Kolkata | \n", + "Banglore | \n", + "CCU → NAG → BLR | \n", + "1 stop | \n", + "No info | \n", + "6218 | \n", + "12 | \n", + "5 | \n", + "18 | \n", + "5 | \n", + "23 | \n", + "30 | \n", + "5 | \n", + "25 | \n", + "
4 | \n", + "IndiGo | \n", + "Banglore | \n", + "Delhi | \n", + "BLR → NAG → DEL | \n", + "1 stop | \n", + "No info | \n", + "13302 | \n", + "1 | \n", + "3 | \n", + "16 | \n", + "50 | \n", + "21 | \n", + "35 | \n", + "4 | \n", + "45 | \n", + "
\n", + " | Source_Chennai | \n", + "Source_Delhi | \n", + "Source_Kolkata | \n", + "Source_Mumbai | \n", + "
---|---|---|---|---|
0 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
1 | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "
2 | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "
3 | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "
4 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
\n", + " | Destination_Cochin | \n", + "Destination_Delhi | \n", + "Destination_Hyderabad | \n", + "Destination_Kolkata | \n", + "
---|---|---|---|---|
0 | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "
1 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
2 | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "
3 | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
4 | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "
\n", + " | Airline | \n", + "Source | \n", + "Destination | \n", + "Total_Stops | \n", + "Price | \n", + "Journey_day | \n", + "Journey_month | \n", + "Dep_hour | \n", + "Dep_min | \n", + "Arrival_hour | \n", + "Arrival_min | \n", + "Duration_hours | \n", + "Duration_mins | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "IndiGo | \n", + "Banglore | \n", + "Delhi | \n", + "0 | \n", + "3897 | \n", + "24 | \n", + "3 | \n", + "22 | \n", + "20 | \n", + "1 | \n", + "10 | \n", + "2 | \n", + "50 | \n", + "
1 | \n", + "Air India | \n", + "Kolkata | \n", + "Banglore | \n", + "2 | \n", + "7662 | \n", + "1 | \n", + "5 | \n", + "5 | \n", + "50 | \n", + "13 | \n", + "15 | \n", + "7 | \n", + "25 | \n", + "
2 | \n", + "Jet Airways | \n", + "Delhi | \n", + "Cochin | \n", + "2 | \n", + "13882 | \n", + "9 | \n", + "6 | \n", + "9 | \n", + "25 | \n", + "4 | \n", + "25 | \n", + "19 | \n", + "0 | \n", + "
3 | \n", + "IndiGo | \n", + "Kolkata | \n", + "Banglore | \n", + "1 | \n", + "6218 | \n", + "12 | \n", + "5 | \n", + "18 | \n", + "5 | \n", + "23 | \n", + "30 | \n", + "5 | \n", + "25 | \n", + "
4 | \n", + "IndiGo | \n", + "Banglore | \n", + "Delhi | \n", + "1 | \n", + "13302 | \n", + "1 | \n", + "3 | \n", + "16 | \n", + "50 | \n", + "21 | \n", + "35 | \n", + "4 | \n", + "45 | \n", + "
\n", + " | Total_Stops | \n", + "Price | \n", + "Journey_day | \n", + "Journey_month | \n", + "Dep_hour | \n", + "Dep_min | \n", + "Arrival_hour | \n", + "Arrival_min | \n", + "Duration_hours | \n", + "Duration_mins | \n", + "... | \n", + "Airline_Vistara | \n", + "Airline_Vistara Premium economy | \n", + "Source_Chennai | \n", + "Source_Delhi | \n", + "Source_Kolkata | \n", + "Source_Mumbai | \n", + "Destination_Cochin | \n", + "Destination_Delhi | \n", + "Destination_Hyderabad | \n", + "Destination_Kolkata | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "3897 | \n", + "24 | \n", + "3 | \n", + "22 | \n", + "20 | \n", + "1 | \n", + "10 | \n", + "2 | \n", + "50 | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "
1 | \n", + "2 | \n", + "7662 | \n", + "1 | \n", + "5 | \n", + "5 | \n", + "50 | \n", + "13 | \n", + "15 | \n", + "7 | \n", + "25 | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
2 | \n", + "2 | \n", + "13882 | \n", + "9 | \n", + "6 | \n", + "9 | \n", + "25 | \n", + "4 | \n", + "25 | \n", + "19 | \n", + "0 | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "
3 | \n", + "1 | \n", + "6218 | \n", + "12 | \n", + "5 | \n", + "18 | \n", + "5 | \n", + "23 | \n", + "30 | \n", + "5 | \n", + "25 | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
4 | \n", + "1 | \n", + "13302 | \n", + "1 | \n", + "3 | \n", + "16 | \n", + "50 | \n", + "21 | \n", + "35 | \n", + "4 | \n", + "45 | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "
5 rows × 29 columns
\n", + "\n", + " | Total_Stops | \n", + "Journey_day | \n", + "Journey_month | \n", + "Dep_hour | \n", + "Dep_min | \n", + "Arrival_hour | \n", + "Arrival_min | \n", + "Duration_hours | \n", + "Duration_mins | \n", + "Airline_Air India | \n", + "... | \n", + "Airline_Vistara | \n", + "Airline_Vistara Premium economy | \n", + "Source_Chennai | \n", + "Source_Delhi | \n", + "Source_Kolkata | \n", + "Source_Mumbai | \n", + "Destination_Cochin | \n", + "Destination_Delhi | \n", + "Destination_Hyderabad | \n", + "Destination_Kolkata | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "24 | \n", + "3 | \n", + "22 | \n", + "20 | \n", + "1 | \n", + "10 | \n", + "2 | \n", + "50 | \n", + "False | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "
1 | \n", + "2 | \n", + "1 | \n", + "5 | \n", + "5 | \n", + "50 | \n", + "13 | \n", + "15 | \n", + "7 | \n", + "25 | \n", + "True | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
2 | \n", + "2 | \n", + "9 | \n", + "6 | \n", + "9 | \n", + "25 | \n", + "4 | \n", + "25 | \n", + "19 | \n", + "0 | \n", + "False | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "
3 | \n", + "1 | \n", + "12 | \n", + "5 | \n", + "18 | \n", + "5 | \n", + "23 | \n", + "30 | \n", + "5 | \n", + "25 | \n", + "False | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "
4 | \n", + "1 | \n", + "1 | \n", + "3 | \n", + "16 | \n", + "50 | \n", + "21 | \n", + "35 | \n", + "4 | \n", + "45 | \n", + "False | \n", + "... | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "False | \n", + "True | \n", + "False | \n", + "False | \n", + "
5 rows × 28 columns
\n", + "RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(), n_jobs=1,\n", + " param_distributions={'max_depth': [5, 10, 15, 20, 25, 30],\n", + " 'max_features': ['auto', 'sqrt'],\n", + " 'min_samples_leaf': [1, 2, 5, 10],\n", + " 'min_samples_split': [2, 5, 10, 15,\n", + " 100],\n", + " 'n_estimators': [100, 200, 300, 400,\n", + " 500, 600, 700, 800,\n", + " 900, 1000, 1100,\n", + " 1200]},\n", + " random_state=42, scoring='neg_mean_squared_error',\n", + " verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(), n_jobs=1,\n", + " param_distributions={'max_depth': [5, 10, 15, 20, 25, 30],\n", + " 'max_features': ['auto', 'sqrt'],\n", + " 'min_samples_leaf': [1, 2, 5, 10],\n", + " 'min_samples_split': [2, 5, 10, 15,\n", + " 100],\n", + " 'n_estimators': [100, 200, 300, 400,\n", + " 500, 600, 700, 800,\n", + " 900, 1000, 1100,\n", + " 1200]},\n", + " random_state=42, scoring='neg_mean_squared_error',\n", + " verbose=1)
RandomForestRegressor()
RandomForestRegressor()