Spaces:
Runtime error
Runtime error
Upload 8 files
Browse files- README.md +5 -5
- app.py +12 -0
- best_model_dt.pkl +3 -0
- eda.py +90 -0
- flight_price_prediction.csv +0 -0
- prediction.py +54 -0
- requirements.txt +7 -0
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
-
title: Flight Price
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
1 |
---
|
2 |
+
title: Flight Price Predictor
|
3 |
+
emoji: 🐠
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: red
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: 1.27.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Select Page :', ('Exploratory Data Analysis', 'Predict Flight Price'))
|
6 |
+
|
7 |
+
if navigation == 'Exploratory Data Analysis':
|
8 |
+
eda.runEDA()
|
9 |
+
else:
|
10 |
+
prediction.runPredictor()
|
11 |
+
|
12 |
+
#streamlit run app.py
|
best_model_dt.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ed1707153eb931b109bc4f68d04d866bdc4b0761788e1990112da472f7fe2a0
|
3 |
+
size 9769
|
eda.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
st.set_page_config(
|
9 |
+
page_title='Flight Price Prediction - EDA',
|
10 |
+
layout='wide',
|
11 |
+
initial_sidebar_state='expanded'
|
12 |
+
)
|
13 |
+
|
14 |
+
def runEDA():
|
15 |
+
#Title
|
16 |
+
st.title('Flight Price Prediction')
|
17 |
+
|
18 |
+
#Sub Header
|
19 |
+
st.subheader('EDA for Flight Price Prediction')
|
20 |
+
|
21 |
+
#Description
|
22 |
+
st.write('Page Created by Gilang Wiradhyaksa (SBY-001)')
|
23 |
+
|
24 |
+
st.markdown('---')
|
25 |
+
|
26 |
+
'''
|
27 |
+
On this Page we will do a simple exploration,
|
28 |
+
Database Used is Flight Price Prediction.
|
29 |
+
Dataset Source is from Kaggle
|
30 |
+
'''
|
31 |
+
|
32 |
+
#show dataframe
|
33 |
+
st.title('Dataset')
|
34 |
+
df = pd.read_csv('flight_price_prediction.csv')
|
35 |
+
st.dataframe(df)
|
36 |
+
|
37 |
+
plt.style.use('default')
|
38 |
+
st.write('## Histogram Price')
|
39 |
+
fig = plt.figure(figsize=(15,5))
|
40 |
+
sns.histplot(df['price'], bins=20, kde=True).set(title='Price')
|
41 |
+
st.pyplot(fig)
|
42 |
+
st.write('Based on the histogram plot, we can see that most of the flight having price less than 10k INR (Indian Rupee). But for few flight price is goes up to 120k INR, this probably the price of business class.')
|
43 |
+
st.markdown('---')
|
44 |
+
|
45 |
+
st.write('## Airlines Flights Count')
|
46 |
+
df_airlines = df.groupby(['airline']).agg(counts=('flight', 'count')).sort_values(by=['counts'], ascending=False)
|
47 |
+
fig, ax = plt.subplots(ncols=1, figsize=(15, 5))
|
48 |
+
ax.pie(df_airlines['counts'], labels=df_airlines['counts'].index, autopct='%.2f%%')
|
49 |
+
ax.set_title("Airlines Flight Count")
|
50 |
+
st.pyplot(fig)
|
51 |
+
st.write('The flight is dominated by `Vistara` Airlines with more than 127k flights. Their biggest competitor is `Air India` with 80k flights.')
|
52 |
+
st.markdown('---')
|
53 |
+
|
54 |
+
st.write('## Flight Stops/Transit Count')
|
55 |
+
df_stops = df.groupby(['stops']).agg(counts=('airline', 'count'))
|
56 |
+
fig, ax = plt.subplots(ncols=1, figsize=(15, 5))
|
57 |
+
ax.pie(df_stops['counts'], labels=df_stops['counts'].index, autopct='%.2f%%')
|
58 |
+
ax.set_title("Flight Stops (Transit) Count")
|
59 |
+
st.pyplot(fig)
|
60 |
+
st.write('Most of flight on this dataset is having one transit. Only 12% of the data that is a direct flight.')
|
61 |
+
st.markdown('---')
|
62 |
+
|
63 |
+
df_departure = df.groupby(['departure_time']).agg({'price':'mean'}).sort_values(by=['price'], ascending=True)
|
64 |
+
fig = plt.figure(figsize=(7, 5))
|
65 |
+
sns.barplot(data=df_departure, x=df_departure.index.to_list(), y='price', orient='v').set(title='Average price per Departure Time')
|
66 |
+
st.pyplot(fig)
|
67 |
+
st.write('From the bar plot above we can see that Late Night ticket average price is cheapest compared to other time. Meanwhile night and morning flight have the most expensive average price.')
|
68 |
+
st.markdown('---')
|
69 |
+
|
70 |
+
plt.style.use('dark_background')
|
71 |
+
fig = plt.figure(figsize=(20,8))
|
72 |
+
sns.lineplot(data=df, x='duration', y='price', hue='class', palette='hls')
|
73 |
+
plt.title('Ticket Price Versus Flight Duration Based on Class',fontsize=20)
|
74 |
+
plt.xlabel('Duration', fontsize=15)
|
75 |
+
plt.ylabel('Price', fontsize=15)
|
76 |
+
st.pyplot(fig)
|
77 |
+
st.write('Based on the line graph above, we can see that as the flight duration increase the ticket price is also increases in both the Economy and Business classes')
|
78 |
+
st.markdown('---')
|
79 |
+
|
80 |
+
fig = plt.figure(figsize=(20,8))
|
81 |
+
sns.lineplot(data=df, x='days_left', y='price', color='blue')
|
82 |
+
plt.title('Days Left For Departure Versus Ticket Price',fontsize=20)
|
83 |
+
plt.xlabel('Days Left for Departure',fontsize=15)
|
84 |
+
plt.ylabel('Price',fontsize=15)
|
85 |
+
st.pyplot(fig)
|
86 |
+
st.write('Based on the line graph above, we can see that as the flight duration increase the ticket price is also increases in both the Economy and Business classes')
|
87 |
+
st.markdown('---')
|
88 |
+
|
89 |
+
if __name__ == '__main__':
|
90 |
+
runEDA()
|
flight_price_prediction.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
prediction.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
import json
|
6 |
+
|
7 |
+
def runPredictor():
|
8 |
+
#MODEL
|
9 |
+
with open('best_model_dt.pkl', 'rb') as file_1:
|
10 |
+
best_model_dt = pickle.load(file_1)
|
11 |
+
|
12 |
+
# Buat Form
|
13 |
+
with st.form(key='Form Parameters'):
|
14 |
+
airline = st.selectbox('Airlines', ('SpiceJet', 'AirAsia', 'Vistara', 'GO_FIRST', 'Indigo', 'Air_India'), index=0)
|
15 |
+
source_city = st.selectbox('Departure City', ('Delhi', 'Mumbai', 'Bangalore', 'Kolkata', 'Hyderabad', 'Chennai'), index=0)
|
16 |
+
destination_city = st.selectbox('Destination City', ('Mumbai', 'Bangalore', 'Kolkata', 'Hyderabad', 'Chennai', 'Delhi'), index=0)
|
17 |
+
st.markdown('---')
|
18 |
+
departure_time = st.selectbox('Departure Time', ('Evening', 'Early_Morning', 'Morning', 'Afternoon', 'Night', 'Late_Night'), index=0)
|
19 |
+
arrival_time = st.selectbox('Arrival Time', ('Night', 'Morning', 'Early_Morning', 'Afternoon', 'Evening', 'Late_Night'), index=0)
|
20 |
+
st.markdown('---')
|
21 |
+
stops = st.selectbox('Transit', ('Direct', 'One', 'Two or more'), index=0)
|
22 |
+
flight_class = st.selectbox('Class', ('Economy', 'Business'), index=0)
|
23 |
+
st.markdown('---')
|
24 |
+
duration = st.number_input('Flight Duration', min_value=0, max_value=50, step=1)
|
25 |
+
days_left = st.number_input('Days Until Flight', min_value=1, max_value=90, step=1)
|
26 |
+
st.markdown('---')
|
27 |
+
|
28 |
+
submitted = st.form_submit_button('Predict')
|
29 |
+
|
30 |
+
if stops == 'Direct': stops = 'zero'
|
31 |
+
elif stops == 'One': stops = 'one'
|
32 |
+
else: stops = 'two_or_more'
|
33 |
+
|
34 |
+
data_inf = {
|
35 |
+
'airline': airline,
|
36 |
+
'source_city': source_city,
|
37 |
+
'destination_city': destination_city,
|
38 |
+
'departure_time': departure_time,
|
39 |
+
'arrival_time': arrival_time,
|
40 |
+
'stops': stops,
|
41 |
+
'class': flight_class,
|
42 |
+
'duration': duration,
|
43 |
+
'days_left': int(days_left)
|
44 |
+
}
|
45 |
+
|
46 |
+
df = pd.DataFrame([data_inf])
|
47 |
+
st.dataframe(df)
|
48 |
+
|
49 |
+
if submitted:
|
50 |
+
y_predict_new_price = best_model_dt.predict(df)
|
51 |
+
st.write(f'# Ticket Price Prediction : {str(int(y_predict_new_price))} INR')
|
52 |
+
|
53 |
+
if __name__ == '__main__':
|
54 |
+
runPredictor()
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
plotly
|
6 |
+
numpy
|
7 |
+
scikit-learn==1.3.1
|