flight_delay / app.py
SebastianoMeneghin's picture
Update app.py
96ce51a verified
import gradio as gr
import pandas as pd
import hopsworks
from datetime import datetime
import requests
import json
import math
import os
hopsworks_today_path = "Resources/today_timetable_prediction/today_timetable_prediction.csv"
hopsworks_tomorrow_path = "Resources/tomorrow_timetable_prediction/tomorrow_timetable_prediction.csv"
def one_day_forward(year, month, day):
'''
Return "year", "month" and "day" numbers of the day after the inserted day
It works for all the possible years from 1592
'''
if month == 12:
if day == 31:
day = 1
month = 1
year = year + 1
else:
day = day + 1
elif month == 2:
if (day == 28):
if (year % 4 == 0):
day = 29
else:
day = 1
month = 3
elif (day == 29):
day = 1
month = 3
else:
day = day + 1
elif month == 4 or month == 6 or month == 9 or month == 11:
if (day == 30):
month = month + 1
day = 1
else:
day = day + 1
else:
day = day + 1
return year, month, day
def get_today_date():
'''
Return today's year, month and day numbers
'''
# Get today's date through TimeAPI
time_url = "https://worldtimeapi.org/api/timezone/Europe/Stockholm"
time_response = requests.get(time_url)
time_responseJson = time_response.json()
# Extract datetime
datetime_str = time_responseJson["datetime"]
datetime_object = datetime.fromisoformat(datetime_str[:-6]) # Remove the timezone offset for parsing
# Extract components from datetime
day = datetime_object.day
month = datetime_object.month
year = datetime_object.year
return year, month, day
def get_year_month_label(year, month, mode):
'''
Return the year_month in the format wanted by the different APIs file structure, by passing
the year, month and the mode. It pads with 0 when needed. The "mode" can be specified
between "hyphen", "underscore" and "empty" and it determines which divider you will find in
the year_month_label between the different input passed (e.g. 2024-01 or 20240105)
'''
year_month_label = ''
year_label = str(year)
month_label = ''
if month not in {10, 11, 12}:
month_label = '0' + str(month)
else:
month_label = str(month)
if mode == 'hyphen':
year_month_label = year_label + '-' + month_label
elif mode == 'underscore':
year_month_label = year_label + '_' + month_label
elif mode == 'empty':
year_month_label = year_label + month_label
return year_month_label
def get_date_label(year, month, day, mode):
'''
Return the date in the format wanted by the different APIs file structure, by passing
the year, month, day and the mode. It pads with 0 when needed. The "mode" can be specified
between "hyphen", "underscore" and "empty" and it determines which divider you will find in
the date_label between the different input passed (e.g. 2024-01-05 or 20240105)
'''
date_label = ''
year_month_label = get_year_month_label(year, month, mode)
day_label = ''
if day < 10:
day_label = '0' + str(day)
else:
day_label = str(day)
if mode == 'hyphen':
date_label = year_month_label + '-' + day_label
elif mode == 'underscore':
date_label = year_month_label + '_' + day_label
elif mode == 'empty':
date_label = year_month_label + day_label
return date_label
def get_name_of_cities():
yyyy, mm, dd = get_today_date()
yyyy1, mm1, dd1 = one_day_forward(yyyy, mm, dd)
date_label = get_date_label(yyyy, mm, dd, 'hyphen')
date_label1 = get_date_label(yyyy1, mm1, dd1, 'hyphen')
# Create the request_url, then get the subscription key from Swedavia API and set them in the header
swedavia_url = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label
swedavia_url1 = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label1
subscription_key = os.environ['SWEDAVIA_API_KEY']
headers = {
"Ocp-Apim-Subscription-Key": subscription_key,
"Accept": "application/json",
"Content-Type": 'application/json',
}
# Make the API request for Swedavia API
response = requests.get(swedavia_url, headers = headers)
response1 = requests.get(swedavia_url1, headers = headers)
flights_swedavia = response.json()
flights_swedavia1 = response1.json()
# Load JSON data into a Python dictionary
arrival_airports_info = [{
'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'),
'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')}
for flight in flights_swedavia.get('flights', [])]
df = pd.DataFrame(arrival_airports_info)
arrival_airports_info1 = [{
'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'),
'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')}
for flight in flights_swedavia1.get('flights', [])]
df1 = pd.DataFrame(arrival_airports_info1)
total_df = pd.DataFrame({'ArrivalAirportEnglish': pd.concat([df1['ArrivalAirportEnglish'], df['ArrivalAirportEnglish']]).drop_duplicates().reset_index(drop=True),'ArrivalAirportIata': pd.concat([df1['ArrivalAirportIata'], df['ArrivalAirportIata']]).drop_duplicates().reset_index(drop=True)})
total_df.sort_values('ArrivalAirportEnglish', inplace=True)
return total_df
def create_single_dataframe_from(dataframe):
df = get_name_of_cities()
df['ArrivalAirportIata'] = df['ArrivalAirportIata'].str.lower()
merged_df = pd.merge(df, dataframe, left_on='ArrivalAirportIata', right_on='airport', how='inner')
# Drop the duplicate 'ArrivalAirportIata' column
merged_df = merged_df.drop('ArrivalAirportIata', axis=1)
return merged_df
def get_dataframe(online_dataframe_path):
# Connect to Hopsworks File System
project = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY'])
dataset_api = project.get_dataset_api()
# Download online dataframe and get path
dataframe_path = os.path.abspath(dataset_api.download(online_dataframe_path, overwrite = True))
# Read dataframe from local path, drop duplicates, return
dataframe = pd.read_csv(dataframe_path)
dataframe.drop_duplicates(inplace=True)
dataframe = create_single_dataframe_from(dataframe)
return dataframe
def get_tomorrow_dataframe():
return get_dataframe(hopsworks_today_path)
def get_today_dataframe():
return get_dataframe(hopsworks_tomorrow_path)
def get_metrics():
# Connect to Hopsworks File System
dataframe = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY'])
dataframe = dataframe.get_feature_store()
dataframe = dataframe.get_feature_group(name = 'model_performance', version = 1)
dataframe = dataframe.read(dataframe_type = 'pandas')
dataframe = dataframe.sort_values('timestamp')
dataframe = dataframe[['timestamp', 'mae', 'dateset_size']].rename(columns={'dateset_size':'Dataset Size', 'mae':'Mean Absolute Error', 'timestamp':'Date'})
dataframe = dataframe.sort_values(['Date'], ascending = False)
return dataframe
selected_columns = ['destination', 'airport code', 'flight number', 'ontime', 'delayed']
ciccio, pasticcio = pd.DataFrame(), pd.DataFrame()
cities_datafram = get_name_of_cities()
ciccio = get_today_dataframe()
ciccio = ciccio.rename(columns={'airport':'airport code', 'ArrivalAirportEnglish':'destination', 'flight_number':'flight number'})
today_dataframe = ciccio[selected_columns]
pasticcio = get_tomorrow_dataframe()
pasticcio = pasticcio.rename(columns={'airport':'airport code','ArrivalAirportEnglish':'destination', 'flight_number':'flight number'})
tomorrow_dataframe = pasticcio[selected_columns]
performance_metric = get_metrics()
def get_possible_destinations():
global today_dataframe, tomorrow_dataframe
today_df, tomorrow_df = today_dataframe, tomorrow_dataframe
total_df = pd.DataFrame({'destination': pd.concat([today_df['destination'], tomorrow_df['destination']]).drop_duplicates().reset_index(drop=True).sort_values()})
total_dest = (total_df['destination']).tolist()
return total_dest
def get_dataframe_of(day):
global cities_datafram, today_dataframe, tomorrow_dataframe
today_df, tomorrow_df = pd.DataFrame(), pd.DataFrame()
today_df, tomorrow_df = today_dataframe, tomorrow_dataframe
if (day.lower() == 'today'):
return today_df
elif (day.lower() == 'tomorrow'):
return tomorrow_df
def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinations, yes):
df = get_dataframe_of(day)
if ('Select all' in destinations):
destinations = get_possible_destinations()
# Remove unwanted destinations
destinations = [dest for dest in destinations if dest not in ["That's a reason why I travel alone...", "I prefer not to say", 'Select all']]
# Select only flight during the same departure hour
df['departure_hour'] = df['ontime'].str.split(':').str[0].astype(int)
df = df[df['departure_hour'] == departure_hour].drop(columns=['departure_hour'])
# Convert time columns to datetime objects
df['ontime'] = pd.to_datetime(df['ontime'], format='%H:%M')
df['delayed'] = pd.to_datetime(df['delayed'], format='%H:%M')
# Get flight with less delay than the given and from the destinations selected, of the right day
df['delay'] = (df['delayed'] - df['ontime']).dt.total_seconds() / 60
filtered_df = df.loc[(df['delay'] < max_delay) & (df['destination'].isin(destinations)), ['destination', 'flight number', 'ontime', 'delayed']]
# Convert the string to datetime, then the datetime column to HH:MM
filtered_df['ontime'] = pd.to_datetime(filtered_df['ontime'])
filtered_df['ontime'] = filtered_df['ontime'].dt.strftime('%H:%M')
filtered_df['delayed'] = pd.to_datetime(filtered_df['delayed'])
filtered_df['delayed'] = filtered_df['delayed'].dt.strftime('%H:%M')
return filtered_df
def full_day_departure(day):
dataframe = get_dataframe_of(day)
copy_df = dataframe.drop(columns=['airport code']).sort_values(['ontime'], ascending = False)
return copy_df
def get_performance():
global performance_metric
return performance_metric
'''
print(get_specific_flights('today', 100, 8, 'am', 'weather', ['Select all'], 'yes'))
print(full_day_departure('tomorrow'))
print(get_performance())
'''
specific_flights = gr.Interface(
get_specific_flights,
[
gr.Radio(["today", "tomorrow"], type="value", label="Day", info="When do you have the plane?"),
gr.Slider(0, 50, value=20, label="Possible Delay", info="How unfortunate do you wanna be?"),
gr.Number(precision=0, minimum=0, maximum=23, label="Departure Time"),
gr.Radio(["am", "pm"], type="index", info="It's the same, no worries!", label = "Am or Pm?"),
gr.CheckboxGroup(["Yes, it's cloudy", "I am not in Stockholm"], label="Weather", info="Is it a typical Stockholm day?"),
gr.Dropdown(get_possible_destinations() + ["That's a reason why I travel alone...", "I prefer not to say", "Select all"],
type = "value", multiselect=True, label="Destination", value=["That's a reason why I travel alone..."],
info="Are you just curious or you are actually going somewhere? Where? With who?"),
gr.Radio(["Yes", "Yes", "Yes"], type="index", label="Let's guess?", info="We know that you'll say yes!"),
],
"dataframe",
)
total_departure = gr.Interface(
full_day_departure,
[
gr.Radio(["Today", "Tomorrow"], type="value", label="Departure", info="When are you departing?"),
],
"dataframe",
)
metrics = gr.Interface(fn = get_performance, inputs=None, outputs='dataframe', allow_flagging="never")
#flights.launch()
interface = gr.TabbedInterface([specific_flights, total_departure, metrics], {"Full Day Departure", "Specific Flights", "Model Performances"})
interface.launch()