Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import hopsworks | |
from datetime import datetime | |
import requests | |
import json | |
import math | |
import os | |
hopsworks_today_path = "Resources/today_timetable_prediction/today_timetable_prediction.csv" | |
hopsworks_tomorrow_path = "Resources/tomorrow_timetable_prediction/tomorrow_timetable_prediction.csv" | |
def one_day_forward(year, month, day): | |
''' | |
Return "year", "month" and "day" numbers of the day after the inserted day | |
It works for all the possible years from 1592 | |
''' | |
if month == 12: | |
if day == 31: | |
day = 1 | |
month = 1 | |
year = year + 1 | |
else: | |
day = day + 1 | |
elif month == 2: | |
if (day == 28): | |
if (year % 4 == 0): | |
day = 29 | |
else: | |
day = 1 | |
month = 3 | |
elif (day == 29): | |
day = 1 | |
month = 3 | |
else: | |
day = day + 1 | |
elif month == 4 or month == 6 or month == 9 or month == 11: | |
if (day == 30): | |
month = month + 1 | |
day = 1 | |
else: | |
day = day + 1 | |
else: | |
day = day + 1 | |
return year, month, day | |
def get_today_date(): | |
''' | |
Return today's year, month and day numbers | |
''' | |
# Get today's date through TimeAPI | |
time_url = "https://worldtimeapi.org/api/timezone/Europe/Stockholm" | |
time_response = requests.get(time_url) | |
time_responseJson = time_response.json() | |
# Extract datetime | |
datetime_str = time_responseJson["datetime"] | |
datetime_object = datetime.fromisoformat(datetime_str[:-6]) # Remove the timezone offset for parsing | |
# Extract components from datetime | |
day = datetime_object.day | |
month = datetime_object.month | |
year = datetime_object.year | |
return year, month, day | |
def get_year_month_label(year, month, mode): | |
''' | |
Return the year_month in the format wanted by the different APIs file structure, by passing | |
the year, month and the mode. It pads with 0 when needed. The "mode" can be specified | |
between "hyphen", "underscore" and "empty" and it determines which divider you will find in | |
the year_month_label between the different input passed (e.g. 2024-01 or 20240105) | |
''' | |
year_month_label = '' | |
year_label = str(year) | |
month_label = '' | |
if month not in {10, 11, 12}: | |
month_label = '0' + str(month) | |
else: | |
month_label = str(month) | |
if mode == 'hyphen': | |
year_month_label = year_label + '-' + month_label | |
elif mode == 'underscore': | |
year_month_label = year_label + '_' + month_label | |
elif mode == 'empty': | |
year_month_label = year_label + month_label | |
return year_month_label | |
def get_date_label(year, month, day, mode): | |
''' | |
Return the date in the format wanted by the different APIs file structure, by passing | |
the year, month, day and the mode. It pads with 0 when needed. The "mode" can be specified | |
between "hyphen", "underscore" and "empty" and it determines which divider you will find in | |
the date_label between the different input passed (e.g. 2024-01-05 or 20240105) | |
''' | |
date_label = '' | |
year_month_label = get_year_month_label(year, month, mode) | |
day_label = '' | |
if day < 10: | |
day_label = '0' + str(day) | |
else: | |
day_label = str(day) | |
if mode == 'hyphen': | |
date_label = year_month_label + '-' + day_label | |
elif mode == 'underscore': | |
date_label = year_month_label + '_' + day_label | |
elif mode == 'empty': | |
date_label = year_month_label + day_label | |
return date_label | |
def get_name_of_cities(): | |
yyyy, mm, dd = get_today_date() | |
yyyy1, mm1, dd1 = one_day_forward(yyyy, mm, dd) | |
date_label = get_date_label(yyyy, mm, dd, 'hyphen') | |
date_label1 = get_date_label(yyyy1, mm1, dd1, 'hyphen') | |
# Create the request_url, then get the subscription key from Swedavia API and set them in the header | |
swedavia_url = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label | |
swedavia_url1 = 'https://api.swedavia.se/flightinfo/v2/ARN/departures/' + date_label1 | |
subscription_key = os.environ['SWEDAVIA_API_KEY'] | |
headers = { | |
"Ocp-Apim-Subscription-Key": subscription_key, | |
"Accept": "application/json", | |
"Content-Type": 'application/json', | |
} | |
# Make the API request for Swedavia API | |
response = requests.get(swedavia_url, headers = headers) | |
response1 = requests.get(swedavia_url1, headers = headers) | |
flights_swedavia = response.json() | |
flights_swedavia1 = response1.json() | |
# Load JSON data into a Python dictionary | |
arrival_airports_info = [{ | |
'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'), | |
'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')} | |
for flight in flights_swedavia.get('flights', [])] | |
df = pd.DataFrame(arrival_airports_info) | |
arrival_airports_info1 = [{ | |
'ArrivalAirportIata': flight.get('flightLegIdentifier', {}).get('arrivalAirportIata'), | |
'ArrivalAirportEnglish': flight.get('arrivalAirportEnglish')} | |
for flight in flights_swedavia1.get('flights', [])] | |
df1 = pd.DataFrame(arrival_airports_info1) | |
total_df = pd.DataFrame({'ArrivalAirportEnglish': pd.concat([df1['ArrivalAirportEnglish'], df['ArrivalAirportEnglish']]).drop_duplicates().reset_index(drop=True),'ArrivalAirportIata': pd.concat([df1['ArrivalAirportIata'], df['ArrivalAirportIata']]).drop_duplicates().reset_index(drop=True)}) | |
total_df.sort_values('ArrivalAirportEnglish', inplace=True) | |
return total_df | |
def create_single_dataframe_from(dataframe): | |
df = get_name_of_cities() | |
df['ArrivalAirportIata'] = df['ArrivalAirportIata'].str.lower() | |
merged_df = pd.merge(df, dataframe, left_on='ArrivalAirportIata', right_on='airport', how='inner') | |
# Drop the duplicate 'ArrivalAirportIata' column | |
merged_df = merged_df.drop('ArrivalAirportIata', axis=1) | |
return merged_df | |
def get_dataframe(online_dataframe_path): | |
# Connect to Hopsworks File System | |
project = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY']) | |
dataset_api = project.get_dataset_api() | |
# Download online dataframe and get path | |
dataframe_path = os.path.abspath(dataset_api.download(online_dataframe_path, overwrite = True)) | |
# Read dataframe from local path, drop duplicates, return | |
dataframe = pd.read_csv(dataframe_path) | |
dataframe.drop_duplicates(inplace=True) | |
dataframe = create_single_dataframe_from(dataframe) | |
return dataframe | |
def get_tomorrow_dataframe(): | |
return get_dataframe(hopsworks_today_path) | |
def get_today_dataframe(): | |
return get_dataframe(hopsworks_tomorrow_path) | |
def get_metrics(): | |
# Connect to Hopsworks File System | |
dataframe = hopsworks.login(api_key_value = os.environ['HOPSWORKS_API_KEY']) | |
dataframe = dataframe.get_feature_store() | |
dataframe = dataframe.get_feature_group(name = 'model_performance', version = 1) | |
dataframe = dataframe.read(dataframe_type = 'pandas') | |
dataframe = dataframe.sort_values('timestamp') | |
dataframe = dataframe[['timestamp', 'mae', 'dateset_size']].rename(columns={'dateset_size':'Dataset Size', 'mae':'Mean Absolute Error', 'timestamp':'Date'}) | |
dataframe = dataframe.sort_values(['Date'], ascending = False) | |
return dataframe | |
selected_columns = ['destination', 'airport code', 'flight number', 'ontime', 'delayed'] | |
ciccio, pasticcio = pd.DataFrame(), pd.DataFrame() | |
cities_datafram = get_name_of_cities() | |
ciccio = get_today_dataframe() | |
ciccio = ciccio.rename(columns={'airport':'airport code', 'ArrivalAirportEnglish':'destination', 'flight_number':'flight number'}) | |
today_dataframe = ciccio[selected_columns] | |
pasticcio = get_tomorrow_dataframe() | |
pasticcio = pasticcio.rename(columns={'airport':'airport code','ArrivalAirportEnglish':'destination', 'flight_number':'flight number'}) | |
tomorrow_dataframe = pasticcio[selected_columns] | |
performance_metric = get_metrics() | |
def get_possible_destinations(): | |
global today_dataframe, tomorrow_dataframe | |
today_df, tomorrow_df = today_dataframe, tomorrow_dataframe | |
total_df = pd.DataFrame({'destination': pd.concat([today_df['destination'], tomorrow_df['destination']]).drop_duplicates().reset_index(drop=True).sort_values()}) | |
total_dest = (total_df['destination']).tolist() | |
return total_dest | |
def get_dataframe_of(day): | |
global cities_datafram, today_dataframe, tomorrow_dataframe | |
today_df, tomorrow_df = pd.DataFrame(), pd.DataFrame() | |
today_df, tomorrow_df = today_dataframe, tomorrow_dataframe | |
if (day.lower() == 'today'): | |
return today_df | |
elif (day.lower() == 'tomorrow'): | |
return tomorrow_df | |
def get_specific_flights(day, max_delay, departure_hour, ampm, weather, destinations, yes): | |
df = get_dataframe_of(day) | |
if ('Select all' in destinations): | |
destinations = get_possible_destinations() | |
# Remove unwanted destinations | |
destinations = [dest for dest in destinations if dest not in ["That's a reason why I travel alone...", "I prefer not to say", 'Select all']] | |
# Select only flight during the same departure hour | |
df['departure_hour'] = df['ontime'].str.split(':').str[0].astype(int) | |
df = df[df['departure_hour'] == departure_hour].drop(columns=['departure_hour']) | |
# Convert time columns to datetime objects | |
df['ontime'] = pd.to_datetime(df['ontime'], format='%H:%M') | |
df['delayed'] = pd.to_datetime(df['delayed'], format='%H:%M') | |
# Get flight with less delay than the given and from the destinations selected, of the right day | |
df['delay'] = (df['delayed'] - df['ontime']).dt.total_seconds() / 60 | |
filtered_df = df.loc[(df['delay'] < max_delay) & (df['destination'].isin(destinations)), ['destination', 'flight number', 'ontime', 'delayed']] | |
# Convert the string to datetime, then the datetime column to HH:MM | |
filtered_df['ontime'] = pd.to_datetime(filtered_df['ontime']) | |
filtered_df['ontime'] = filtered_df['ontime'].dt.strftime('%H:%M') | |
filtered_df['delayed'] = pd.to_datetime(filtered_df['delayed']) | |
filtered_df['delayed'] = filtered_df['delayed'].dt.strftime('%H:%M') | |
return filtered_df | |
def full_day_departure(day): | |
dataframe = get_dataframe_of(day) | |
copy_df = dataframe.drop(columns=['airport code']).sort_values(['ontime'], ascending = False) | |
return copy_df | |
def get_performance(): | |
global performance_metric | |
return performance_metric | |
''' | |
print(get_specific_flights('today', 100, 8, 'am', 'weather', ['Select all'], 'yes')) | |
print(full_day_departure('tomorrow')) | |
print(get_performance()) | |
''' | |
specific_flights = gr.Interface( | |
get_specific_flights, | |
[ | |
gr.Radio(["today", "tomorrow"], type="value", label="Day", info="When do you have the plane?"), | |
gr.Slider(0, 50, value=20, label="Possible Delay", info="How unfortunate do you wanna be?"), | |
gr.Number(precision=0, minimum=0, maximum=23, label="Departure Time"), | |
gr.Radio(["am", "pm"], type="index", info="It's the same, no worries!", label = "Am or Pm?"), | |
gr.CheckboxGroup(["Yes, it's cloudy", "I am not in Stockholm"], label="Weather", info="Is it a typical Stockholm day?"), | |
gr.Dropdown(get_possible_destinations() + ["That's a reason why I travel alone...", "I prefer not to say", "Select all"], | |
type = "value", multiselect=True, label="Destination", value=["That's a reason why I travel alone..."], | |
info="Are you just curious or you are actually going somewhere? Where? With who?"), | |
gr.Radio(["Yes", "Yes", "Yes"], type="index", label="Let's guess?", info="We know that you'll say yes!"), | |
], | |
"dataframe", | |
) | |
total_departure = gr.Interface( | |
full_day_departure, | |
[ | |
gr.Radio(["Today", "Tomorrow"], type="value", label="Departure", info="When are you departing?"), | |
], | |
"dataframe", | |
) | |
metrics = gr.Interface(fn = get_performance, inputs=None, outputs='dataframe', allow_flagging="never") | |
#flights.launch() | |
interface = gr.TabbedInterface([specific_flights, total_departure, metrics], {"Full Day Departure", "Specific Flights", "Model Performances"}) | |
interface.launch() |