Spaces:

SebastianoMeneghin
/

flight_delay

Runtime error

App Files Files Community

flight_delay / app.py

SebastianoMeneghin

Update app.py

96ce51a verified about 1 year ago

raw

history blame contribute delete

12.1 kB

	import gradio as gr
	import pandas as pd
	import hopsworks
	from datetime import datetime
	import requests
	import json
	import math
	import os

	hopsworks_today_path = "Resources/today_timetable_prediction/today_timetable_prediction.csv"
	hopsworks_tomorrow_path = "Resources/tomorrow_timetable_prediction/tomorrow_timetable_prediction.csv"

	def one_day_forward(year, month, day):
	'''
	Return "year", "month" and "day" numbers of the day after the inserted day
	It works for all the possible years from 1592
	'''
	if month == 12:
	if day == 31:
	day = 1
	month = 1
	year = year + 1
	else:
	day = day + 1

	elif month == 2:
	if (day == 28):
	if (year % 4 == 0):
	day = 29
	else:
	day = 1
	month = 3
	elif (day == 29):
	day = 1
	month = 3
	else:
	day = day + 1

	elif month == 4 or month == 6 or month == 9 or month == 11:
	if (day == 30):
	month = month + 1
	day = 1
	else:
	day = day + 1

	else:
	day = day + 1

	return year, month, day


	def get_today_date():
	'''
	Return today's year, month and day numbers
	'''
	# Get today's date through TimeAPI
	time_url = "https://worldtimeapi.org/api/timezone/Europe/Stockholm"
	time_response = requests.get(time_url)
	time_responseJson = time_response.json()

	# Extract datetime
	datetime_str = time_responseJson["datetime"]
	datetime_object = datetime.fromisoformat(datetime_str[:-6]) # Remove the timezone offset for parsing

	# Extract components from datetime
	day = datetime_object.day
	month = datetime_object.month
	year = datetime_object.year

	return year, month, day


	def get_year_month_label(year, month, mode):
	'''
	Return the year_month in the format wanted by the different APIs file structure, by passing
	the year, month and the mode. It pads with 0 when needed. The "mode" can be specified
	between "hyphen", "underscore" and "empty" and it determines which divider you will find in
	the year_month_label between the different input passed (e.g. 2024-01 or 20240105)
	'''
	year_month_label = ''

	year_label = str(year)
	month_label = ''
	if month not in {10, 11, 12}:
	month_label = '0' + str(month)
	else:
	month_label = str(month)

	if mode == 'hyphen':
	year_month_label = year_label + '-' + month_label
	elif mode == 'underscore':
	year_month_label = year_label + '_' + month_label
	elif mode == 'empty':
	year_month_label = year_label + month_label

	return year_month_label


	def get_date_label(year, month, day, mode):
	'''
	Return the date in the format wanted by the different APIs file structure, by passing
	the year, month, day and the mode. It pads with 0 when needed. The "mode" can be specified
	between "hyphen", "underscore" and "empty" and it determines which divider you will find in
	the date_label between the different input passed (e.g. 2024-01-05 or 20240105)
	'''

	date_label = ''
	year_month_label = get_year_month_label(year, month, mode)

	day_label = ''
	if day < 10:
	day_label = '0' + str(day)
	else:
	day_label = str(day)

	if mode == 'hyphen':
	date_label = year_month_label + '-' + day_label
	elif mode == 'underscore':
	date_label = year_month_label + '_' + day_label
	elif mode == 'empty':
	date_label = year_month_label + day_label

	return date_label


	def get_name_of_cities():
	yyyy, mm, dd = get_today_date()
	yyyy1, mm1, dd1 = one_day_forward(yyyy, mm, dd)

	date_label = get_date_label(yyyy, mm, dd, 'hyphen')
	date_label1 = get_date_label(yyyy1, mm1, dd1, 'hyphen')

	# Create the request_url, then get the subscription key from Swedavia API and set them in the header
	swedavia_url = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label
	swedavia_url1 = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label1

	subscription_key = os.environ['SWEDAVIA_API_KEY']
	headers = {
	"Ocp-Apim-Subscription-Key": subscription_key,
	"Accept": "application/json",
	"Content-Type": 'application/json',
	}

	# Make the API request for Swedavia API
	response = requests.get(swedavia_url, headers = headers)
	response1 = requests.get(swedavia_url1, headers = headers)
	flights_swedavia = response.json()
	flights_swedavia1 = response1.json()


	# Load JSON data into a Python dictionary
	arrival_airports_info = [{
	'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'),
	'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')}
	for flight in flights_swedavia.get('flights', [])]
	df = pd.DataFrame(arrival_airports_info)
	arrival_airports_info1 = [{
	'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'),
	'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')}
	for flight in flights_swedavia1.get('flights', [])]
	df1 = pd.DataFrame(arrival_airports_info1)

	total_df = pd.DataFrame({'ArrivalAirportEnglish': pd.concat([df1['ArrivalAirportEnglish'], df['ArrivalAirportEnglish']]).drop_duplicates().reset_index(drop=True),'ArrivalAirportIata': pd.concat([df1['ArrivalAirportIata'], df['ArrivalAirportIata']]).drop_duplicates().reset_index(drop=True)})
	total_df.sort_values('ArrivalAirportEnglish', inplace=True)

	return total_df


	def create_single_dataframe_from(dataframe):
	df = get_name_of_cities()
	df['ArrivalAirportIata'] = df['ArrivalAirportIata'].str.lower()
	merged_df = pd.merge(df, dataframe, left_on='ArrivalAirportIata', right_on='airport', how='inner')
	# Drop the duplicate 'ArrivalAirportIata' column
	merged_df = merged_df.drop('ArrivalAirportIata', axis=1)

	return merged_df

	def get_dataframe(online_dataframe_path):
	# Connect to Hopsworks File System
	project = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY'])
	dataset_api = project.get_dataset_api()

	# Download online dataframe and get path
	dataframe_path = os.path.abspath(dataset_api.download(online_dataframe_path, overwrite = True))

	# Read dataframe from local path, drop duplicates, return
	dataframe = pd.read_csv(dataframe_path)
	dataframe.drop_duplicates(inplace=True)

	dataframe = create_single_dataframe_from(dataframe)
	return dataframe

	def get_tomorrow_dataframe():
	return get_dataframe(hopsworks_today_path)

	def get_today_dataframe():
	return get_dataframe(hopsworks_tomorrow_path)

	def get_metrics():
	# Connect to Hopsworks File System
	dataframe = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY'])
	dataframe = dataframe.get_feature_store()
	dataframe = dataframe.get_feature_group(name = 'model_performance', version = 1)
	dataframe = dataframe.read(dataframe_type = 'pandas')
	dataframe = dataframe.sort_values('timestamp')
	dataframe = dataframe[['timestamp', 'mae', 'dateset_size']].rename(columns={'dateset_size':'Dataset Size', 'mae':'Mean Absolute Error', 'timestamp':'Date'})
	dataframe = dataframe.sort_values(['Date'], ascending = False)
	return dataframe

	selected_columns = ['destination', 'airport code', 'flight number', 'ontime', 'delayed']
	ciccio, pasticcio = pd.DataFrame(), pd.DataFrame()
	cities_datafram = get_name_of_cities()
	ciccio = get_today_dataframe()
	ciccio = ciccio.rename(columns={'airport':'airport code', 'ArrivalAirportEnglish':'destination', 'flight_number':'flight number'})
	today_dataframe = ciccio[selected_columns]
	pasticcio = get_tomorrow_dataframe()
	pasticcio = pasticcio.rename(columns={'airport':'airport code','ArrivalAirportEnglish':'destination', 'flight_number':'flight number'})
	tomorrow_dataframe = pasticcio[selected_columns]
	performance_metric = get_metrics()


	def get_possible_destinations():
	global today_dataframe, tomorrow_dataframe
	today_df, tomorrow_df = today_dataframe, tomorrow_dataframe
	total_df = pd.DataFrame({'destination': pd.concat([today_df['destination'], tomorrow_df['destination']]).drop_duplicates().reset_index(drop=True).sort_values()})
	total_dest = (total_df['destination']).tolist()
	return total_dest

	def get_dataframe_of(day):
	global cities_datafram, today_dataframe, tomorrow_dataframe
	today_df, tomorrow_df = pd.DataFrame(), pd.DataFrame()
	today_df, tomorrow_df = today_dataframe, tomorrow_dataframe
	if (day.lower() == 'today'):
	return today_df
	elif (day.lower() == 'tomorrow'):
	return tomorrow_df

	def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinations, yes):
	df = get_dataframe_of(day)

	if ('Select all' in destinations):
	destinations = get_possible_destinations()

	# Remove unwanted destinations
	destinations = [dest for dest in destinations if dest not in ["That's a reason why I travel alone...", "I prefer not to say", 'Select all']]

	# Select only flight during the same departure hour
	df['departure_hour'] = df['ontime'].str.split(':').str[0].astype(int)
	df = df[df['departure_hour'] == departure_hour].drop(columns=['departure_hour'])

	# Convert time columns to datetime objects
	df['ontime'] = pd.to_datetime(df['ontime'], format='%H:%M')
	df['delayed'] = pd.to_datetime(df['delayed'], format='%H:%M')

	# Get flight with less delay than the given and from the destinations selected, of the right day
	df['delay'] = (df['delayed'] - df['ontime']).dt.total_seconds() / 60
	filtered_df = df.loc[(df['delay'] < max_delay) & (df['destination'].isin(destinations)), ['destination', 'flight number', 'ontime', 'delayed']]

	# Convert the string to datetime, then the datetime column to HH:MM
	filtered_df['ontime'] = pd.to_datetime(filtered_df['ontime'])
	filtered_df['ontime'] = filtered_df['ontime'].dt.strftime('%H:%M')
	filtered_df['delayed'] = pd.to_datetime(filtered_df['delayed'])
	filtered_df['delayed'] = filtered_df['delayed'].dt.strftime('%H:%M')

	return filtered_df

	def full_day_departure(day):
	dataframe = get_dataframe_of(day)
	copy_df = dataframe.drop(columns=['airport code']).sort_values(['ontime'], ascending = False)
	return copy_df

	def get_performance():
	global performance_metric
	return performance_metric


	'''
	print(get_specific_flights('today', 100, 8, 'am', 'weather', ['Select all'], 'yes'))
	print(full_day_departure('tomorrow'))
	print(get_performance())
	'''

	specific_flights = gr.Interface(
	get_specific_flights,
	[
	gr.Radio(["today", "tomorrow"], type="value", label="Day", info="When do you have the plane?"),
	gr.Slider(0, 50, value=20, label="Possible Delay", info="How unfortunate do you wanna be?"),
	gr.Number(precision=0, minimum=0, maximum=23, label="Departure Time"),
	gr.Radio(["am", "pm"], type="index", info="It's the same, no worries!", label = "Am or Pm?"),
	gr.CheckboxGroup(["Yes, it's cloudy", "I am not in Stockholm"], label="Weather", info="Is it a typical Stockholm day?"),
	gr.Dropdown(get_possible_destinations() + ["That's a reason why I travel alone...", "I prefer not to say", "Select all"],
	type = "value", multiselect=True, label="Destination", value=["That's a reason why I travel alone..."],
	info="Are you just curious or you are actually going somewhere? Where? With who?"),
	gr.Radio(["Yes", "Yes", "Yes"], type="index", label="Let's guess?", info="We know that you'll say yes!"),
	],
	"dataframe",
	)

	total_departure = gr.Interface(
	full_day_departure,
	[
	gr.Radio(["Today", "Tomorrow"], type="value", label="Departure", info="When are you departing?"),
	],
	"dataframe",
	)

	metrics = gr.Interface(fn = get_performance, inputs=None, outputs='dataframe', allow_flagging="never")

	#flights.launch()

	interface = gr.TabbedInterface([specific_flights, total_departure, metrics], {"Full Day Departure", "Specific Flights", "Model Performances"})
	interface.launch()