viruthik commited on
Commit
a4d99c6
·
1 Parent(s): 68d0edf

Upload 8 files

Browse files
Files changed (7) hide show
  1. README.md +5 -5
  2. app.py +12 -0
  3. best_model_dt.pkl +3 -0
  4. eda.py +90 -0
  5. flight_price_prediction.csv +0 -0
  6. prediction.py +54 -0
  7. requirements.txt +7 -0
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Flight Price Analysis
3
- emoji:
4
- colorFrom: pink
5
- colorTo: blue
6
  sdk: streamlit
7
- sdk_version: 1.28.2
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
+ title: Flight Price Predictor
3
+ emoji: 🐠
4
+ colorFrom: green
5
+ colorTo: red
6
  sdk: streamlit
7
+ sdk_version: 1.27.2
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Select Page :', ('Exploratory Data Analysis', 'Predict Flight Price'))
6
+
7
+ if navigation == 'Exploratory Data Analysis':
8
+ eda.runEDA()
9
+ else:
10
+ prediction.runPredictor()
11
+
12
+ #streamlit run app.py
best_model_dt.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ed1707153eb931b109bc4f68d04d866bdc4b0761788e1990112da472f7fe2a0
3
+ size 9769
eda.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+
8
+ st.set_page_config(
9
+ page_title='Flight Price Prediction - EDA',
10
+ layout='wide',
11
+ initial_sidebar_state='expanded'
12
+ )
13
+
14
+ def runEDA():
15
+ #Title
16
+ st.title('Flight Price Prediction')
17
+
18
+ #Sub Header
19
+ st.subheader('EDA for Flight Price Prediction')
20
+
21
+ #Description
22
+ st.write('Page Created by Gilang Wiradhyaksa (SBY-001)')
23
+
24
+ st.markdown('---')
25
+
26
+ '''
27
+ On this Page we will do a simple exploration,
28
+ Database Used is Flight Price Prediction.
29
+ Dataset Source is from Kaggle
30
+ '''
31
+
32
+ #show dataframe
33
+ st.title('Dataset')
34
+ df = pd.read_csv('flight_price_prediction.csv')
35
+ st.dataframe(df)
36
+
37
+ plt.style.use('default')
38
+ st.write('## Histogram Price')
39
+ fig = plt.figure(figsize=(15,5))
40
+ sns.histplot(df['price'], bins=20, kde=True).set(title='Price')
41
+ st.pyplot(fig)
42
+ st.write('Based on the histogram plot, we can see that most of the flight having price less than 10k INR (Indian Rupee). But for few flight price is goes up to 120k INR, this probably the price of business class.')
43
+ st.markdown('---')
44
+
45
+ st.write('## Airlines Flights Count')
46
+ df_airlines = df.groupby(['airline']).agg(counts=('flight', 'count')).sort_values(by=['counts'], ascending=False)
47
+ fig, ax = plt.subplots(ncols=1, figsize=(15, 5))
48
+ ax.pie(df_airlines['counts'], labels=df_airlines['counts'].index, autopct='%.2f%%')
49
+ ax.set_title("Airlines Flight Count")
50
+ st.pyplot(fig)
51
+ st.write('The flight is dominated by `Vistara` Airlines with more than 127k flights. Their biggest competitor is `Air India` with 80k flights.')
52
+ st.markdown('---')
53
+
54
+ st.write('## Flight Stops/Transit Count')
55
+ df_stops = df.groupby(['stops']).agg(counts=('airline', 'count'))
56
+ fig, ax = plt.subplots(ncols=1, figsize=(15, 5))
57
+ ax.pie(df_stops['counts'], labels=df_stops['counts'].index, autopct='%.2f%%')
58
+ ax.set_title("Flight Stops (Transit) Count")
59
+ st.pyplot(fig)
60
+ st.write('Most of flight on this dataset is having one transit. Only 12% of the data that is a direct flight.')
61
+ st.markdown('---')
62
+
63
+ df_departure = df.groupby(['departure_time']).agg({'price':'mean'}).sort_values(by=['price'], ascending=True)
64
+ fig = plt.figure(figsize=(7, 5))
65
+ sns.barplot(data=df_departure, x=df_departure.index.to_list(), y='price', orient='v').set(title='Average price per Departure Time')
66
+ st.pyplot(fig)
67
+ st.write('From the bar plot above we can see that Late Night ticket average price is cheapest compared to other time. Meanwhile night and morning flight have the most expensive average price.')
68
+ st.markdown('---')
69
+
70
+ plt.style.use('dark_background')
71
+ fig = plt.figure(figsize=(20,8))
72
+ sns.lineplot(data=df, x='duration', y='price', hue='class', palette='hls')
73
+ plt.title('Ticket Price Versus Flight Duration Based on Class',fontsize=20)
74
+ plt.xlabel('Duration', fontsize=15)
75
+ plt.ylabel('Price', fontsize=15)
76
+ st.pyplot(fig)
77
+ st.write('Based on the line graph above, we can see that as the flight duration increase the ticket price is also increases in both the Economy and Business classes')
78
+ st.markdown('---')
79
+
80
+ fig = plt.figure(figsize=(20,8))
81
+ sns.lineplot(data=df, x='days_left', y='price', color='blue')
82
+ plt.title('Days Left For Departure Versus Ticket Price',fontsize=20)
83
+ plt.xlabel('Days Left for Departure',fontsize=15)
84
+ plt.ylabel('Price',fontsize=15)
85
+ st.pyplot(fig)
86
+ st.write('Based on the line graph above, we can see that as the flight duration increase the ticket price is also increases in both the Economy and Business classes')
87
+ st.markdown('---')
88
+
89
+ if __name__ == '__main__':
90
+ runEDA()
flight_price_prediction.csv ADDED
The diff for this file is too large to render. See raw diff
 
prediction.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ import json
6
+
7
+ def runPredictor():
8
+ #MODEL
9
+ with open('best_model_dt.pkl', 'rb') as file_1:
10
+ best_model_dt = pickle.load(file_1)
11
+
12
+ # Buat Form
13
+ with st.form(key='Form Parameters'):
14
+ airline = st.selectbox('Airlines', ('SpiceJet', 'AirAsia', 'Vistara', 'GO_FIRST', 'Indigo', 'Air_India'), index=0)
15
+ source_city = st.selectbox('Departure City', ('Delhi', 'Mumbai', 'Bangalore', 'Kolkata', 'Hyderabad', 'Chennai'), index=0)
16
+ destination_city = st.selectbox('Destination City', ('Mumbai', 'Bangalore', 'Kolkata', 'Hyderabad', 'Chennai', 'Delhi'), index=0)
17
+ st.markdown('---')
18
+ departure_time = st.selectbox('Departure Time', ('Evening', 'Early_Morning', 'Morning', 'Afternoon', 'Night', 'Late_Night'), index=0)
19
+ arrival_time = st.selectbox('Arrival Time', ('Night', 'Morning', 'Early_Morning', 'Afternoon', 'Evening', 'Late_Night'), index=0)
20
+ st.markdown('---')
21
+ stops = st.selectbox('Transit', ('Direct', 'One', 'Two or more'), index=0)
22
+ flight_class = st.selectbox('Class', ('Economy', 'Business'), index=0)
23
+ st.markdown('---')
24
+ duration = st.number_input('Flight Duration', min_value=0, max_value=50, step=1)
25
+ days_left = st.number_input('Days Until Flight', min_value=1, max_value=90, step=1)
26
+ st.markdown('---')
27
+
28
+ submitted = st.form_submit_button('Predict')
29
+
30
+ if stops == 'Direct': stops = 'zero'
31
+ elif stops == 'One': stops = 'one'
32
+ else: stops = 'two_or_more'
33
+
34
+ data_inf = {
35
+ 'airline': airline,
36
+ 'source_city': source_city,
37
+ 'destination_city': destination_city,
38
+ 'departure_time': departure_time,
39
+ 'arrival_time': arrival_time,
40
+ 'stops': stops,
41
+ 'class': flight_class,
42
+ 'duration': duration,
43
+ 'days_left': int(days_left)
44
+ }
45
+
46
+ df = pd.DataFrame([data_inf])
47
+ st.dataframe(df)
48
+
49
+ if submitted:
50
+ y_predict_new_price = best_model_dt.predict(df)
51
+ st.write(f'# Ticket Price Prediction : {str(int(y_predict_new_price))} INR')
52
+
53
+ if __name__ == '__main__':
54
+ runPredictor()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ plotly
6
+ numpy
7
+ scikit-learn==1.3.1