yassir-eta-shiny / utils /dashboard.py
gabcares's picture
Upload 28 files
8f44d6d verified
from .data import test_df, train_df, weather_df, time_sec_hms, full_time_sec_hms, get_country_geojson
from .config import BRANDCOLORS, BRANDTHEMES, ALL_MODELS
import time
import requests
import requests_cache
from pathlib import Path
from faicons import icon_svg
import pandas as pd
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly_calplot import calplot
from plotly.subplots import make_subplots
from shiny import reactive, render, Inputs, Outputs, Session
from shiny.express import input, output, module, ui
from shinywidgets import render_plotly, render_widget
# Set pandas to display all columns
pd.set_option("display.max_columns", None)
# High precision longitudes and Latitudes
pd.set_option('display.float_format', '{:.16f}'.format)
# Yassir plotly theme
yassir_theme = pio.templates["plotly_dark"]
yassir_theme.layout.update(
plot_bgcolor=BRANDCOLORS["purple-dark"],
paper_bgcolor=BRANDCOLORS["purple-dark"],
colorway=[BRANDCOLORS["red"], BRANDCOLORS["purple-light"],
BRANDCOLORS["purple-dark"]], # Brand colors
)
pio.templates["yassir_theme"] = yassir_theme
pio.templates.default = yassir_theme
@module
def dashboard_page(input: Inputs, output: Outputs, session: Session):
# Disable loading spinners, use elegant pulse
ui.busy_indicators.use(spinners=False)
ui.panel_title(title=ui.h1(ui.strong("Dashboard πŸ“ˆ")),
window_title="Yassir Dashboard")
# Link to the external CSS file
ui.tags.link(rel="stylesheet", href="styles.css")
# Add main content
ICONS = {
"eta": icon_svg("clock"),
"distance": icon_svg("route"),
"ellipsis": icon_svg("ellipsis"),
}
# Define the target column
target = 'eta'
# Columns
columns = train_df.columns.to_list()
# Weather columns
w_columns = weather_df.columns.to_list()
# Numericals
numericals = train_df.select_dtypes(include=['number']).columns.tolist()
# Input range
eta_rng = (min(train_df["eta"]), max(train_df["eta"]))
trip_distance_rng = (min(train_df["trip_distance"]),
max(train_df["trip_distance"]))
with ui.layout_sidebar():
with ui.sidebar(open="desktop"):
ui.tags.style("""
.shiny-input-container input[type="range"] {
background: linear-gradient(to right, red, #4CAF50) no-repeat;
height: 8px;
}
"""),
ui.input_slider(
"trip_distance",
"Trip Distance",
min=trip_distance_rng[0],
max=trip_distance_rng[1],
value=trip_distance_rng,
post=" m",
)
ui.input_slider(
"eta",
"ETA",
min=eta_rng[0],
max=eta_rng[1],
value=eta_rng,
post=" sec",
)
ui.input_action_button("reset", "Reset filter")
# KPIs
with ui.layout_column_wrap(fill=False):
with ui.value_box(showcase=ICONS["eta"], theme=BRANDTHEMES['purple-dark']):
"ETA (Total)"
@reactive.calc
def total_eta():
return float(train_data().eta.sum())
@render.text
def ts():
return f"{round(total_eta()):,} s"
@render.text
def thms():
return full_time_sec_hms(total_eta())
with ui.value_box(showcase=ICONS["eta"], theme=BRANDTHEMES['purple-dark']):
"ETA (Median)"
@reactive.calc
def median_eta():
d = train_data()
m_eta = None
if d.shape[0] > 0:
m_eta = d.eta.median()
return m_eta
@render.text
def ms():
return f"{round(median_eta()):,} s"
@render.text
def mhms():
return time_sec_hms(median_eta())
with ui.value_box(showcase=ICONS["distance"], theme=BRANDTHEMES['purple-light']):
"TRIP DISTANCE (Total)"
@reactive.calc
def total_trip():
return float(train_data().trip_distance.sum())
@render.text
def tdkm():
return f"{total_trip()/1000:,.1f} km"
@render.text
def tdm():
return f"{total_trip():,.1f} m"
with ui.value_box(showcase=ICONS["distance"], theme=BRANDTHEMES['purple-light']):
"TRIP DISTANCE (Median)"
@reactive.calc
def median_trip():
d = train_data()
m_trip = None
if d.shape[0] > 0:
m_trip = d.trip_distance.median()
return m_trip
@render.text
def mtdkm():
return f"{median_trip()/1000:,.1f} km"
@render.text
def mtdm():
return f"{median_trip():,.1f} m"
# Dataset view
with ui.layout_column_wrap(fill=False):
with ui.navset_card_pill(id="data_tab"):
with ui.nav_panel("Train data"):
with ui.card(full_screen=True):
ui.card_header("Train data")
@render.data_frame
def train_table():
return render.DataGrid(train_data(), filters=True)
with ui.nav_panel("Test data"):
with ui.card(full_screen=True):
ui.card_header("Test data")
@render.data_frame
def test_table():
return render.DataGrid(test_df)
with ui.nav_panel("Weather data"):
with ui.card(full_screen=True):
ui.card_header("Weather data")
@render.data_frame
def weather_table():
return render.DataGrid(weather_df)
value = "Explore the visualizations"
with ui.accordion(id="plot_acc", open=value):
with ui.accordion_panel(title=ui.strong(value), value=value):
with ui.navset_card_pill(id="eda_tab"):
with ui.nav_panel("Train features"):
with ui.card(full_screen=True):
ui.card_header(
"Correlation in the Train features")
@render_plotly
def train_features():
numeric_correlation_matrix = train_data()[
numericals].corr()
# Create heatmap trace
heatmap_trace = go.Heatmap(
z=numeric_correlation_matrix.values,
x=numeric_correlation_matrix.columns,
y=numeric_correlation_matrix.index,
colorbar=dict(title='coefficient'),
colorscale="Agsunset",
texttemplate='%{z:.3f}',
)
# Create figure
fig = go.Figure(data=[heatmap_trace])
return fig
with ui.nav_panel("Trip distance vs Eta"):
with ui.card(full_screen=True):
ui.card_header(
"Relationship between Trip Distance and ETA")
@render_plotly
def scatterplot():
return px.scatter(
train_data(),
x='trip_distance',
y='eta',
trendline='ols',
trendline_color_override=BRANDCOLORS["purple-light"],
labels={
'eta': 'Eta (seconds)', 'trip_distance': 'Trip Distance (meters)'},
)
with ui.nav_panel("Distribution"):
with ui.card(full_screen=True):
ui.card_header("Distribution per train column")
with ui.popover(title="Choose a train column"):
ICONS["ellipsis"]
ui.input_radio_buttons(
"train_col",
"Select:",
numericals,
selected="eta",
inline=True,
)
@render_plotly
def distribution():
column = input.train_col()
data = train_data()
fig1 = px.violin(data, x=column, box=True)
fig2 = px.histogram(data, x=column)
# Create a subplot layout with 1 row and 2 columns
fig = make_subplots(rows=1, cols=2)
# Add traces from fig1 to the subplot
for trace in fig1.data:
fig.add_trace(trace, row=1, col=1)
# Add traces from fig2 to the subplot
for trace in fig2.data:
fig.add_trace(trace, row=1, col=2)
# Update layout
fig.update_layout(title_text=f"Distribution in the {column} column",
showlegend=True,
legend_title_text=target
)
return fig
with ui.nav_panel("Weather features vs Eta"):
with ui.card(full_screen=True):
ui.card_header(
"Relationship between weather features and Median Eta in seconds")
with ui.popover(title="Choose a feature column"):
ICONS["ellipsis"]
ui.input_radio_buttons(
"weather_col",
"Select:",
[col for col in w_columns if col != "date"],
selected="dewpoint_2m_temperature",
inline=True,
)
@render_plotly
def weather_eta():
column = input.weather_col()
fig = px.scatter(
x=daily_weather_eta_df()[column],
y=daily_weather_eta_df()[target],
)
# Update layout
fig.update_layout(
title_text=f"Distribution in the {column} column",
showlegend=False
)
fig.update_xaxes(title_text=column) # Set x-axis title
fig.update_yaxes(title_text=target) # Set y-axis title
return fig
value = "More Visualizations..."
with ui.accordion_panel(title=ui.strong(value), value=value):
with ui.navset_card_pill(id="more_visualizations"):
with ui.nav_panel("Weather vs Eta"):
with ui.card(full_screen=True):
ui.card_header(
"Weather vs Eta Features summary")
@render_plotly
def eta_weather_summary():
daily_weather_eta_correlation_matrix = daily_weather_eta_df().corr().sort_values(by='eta')
# Create heatmap trace
heatmap_trace = go.Heatmap(
z=daily_weather_eta_correlation_matrix[[
'eta']].values,
x=daily_weather_eta_correlation_matrix[[
'eta']].columns,
y=daily_weather_eta_correlation_matrix[[
'eta']].index,
colorbar=dict(title='Coefficient'),
colorscale="Agsunset",
texttemplate='%{z:.3f}',
)
# Create figure
fig = go.Figure(data=[heatmap_trace])
return fig
with ui.nav_panel("Top locations"):
with ui.card(full_screen=True):
ui.card_header(
"Top 10 Most Common Origin and Destination Locations")
with ui.popover(title="Origin or Destination?"):
ICONS["ellipsis"]
ui.input_radio_buttons(
"location_type",
"Select:",
["origin", "destination"],
selected="origin",
inline=True,
)
@render_plotly
def top_locations():
location_type = input.location_type()
top_10_origin, top_10_dest = top_bottom_location()[
0]
data = top_10_origin if location_type == "origin" else top_10_dest
# Prepare data for origin locations
data.sort_values(by='count', inplace=True)
data['location'] = data.sort_values(by='count').apply(
lambda row: f"({row[f'{location_type}_lat']}, {row[f'{location_type}_lon']})", axis=1)
fig = go.Figure()
fig.add_trace(
go.Bar(
x=data['count'],
y=data['location'],
orientation='h',
marker=dict(
color=BRANDCOLORS['purple-light'] if location_type == "origin" else BRANDCOLORS['red']),
name=f'{location_type.title()} Locations'
)
)
# Update layout
fig.update_layout(
xaxis_title=f'{location_type.title()} Locations',
yaxis_title='Number of Rides',
showlegend=False
)
return fig
with ui.nav_panel("Trips by hour"):
with ui.card(full_screen=True):
ui.card_header(
"No of trips by Hour of the day")
with ui.popover(title="Average or Median?"):
ICONS["ellipsis"]
ui.input_radio_buttons(
"trip_agg",
"Select:",
["average", "median"],
selected="median",
inline=True,
)
@render_plotly
def trips_by_hour():
trip_agg = input.trip_agg()
# Create a DataFrame with only the Timestamp column
time_df = train_data()[['timestamp']]
# Extract the hour from the timestamp and add it as a new column
time_df = time_df.assign(hour=time_df['timestamp'].dt.hour)
# Count the number of trips for each hour of the day
tps = time_df['hour'].value_counts().sort_index().reset_index().rename(
columns={'hour': 'Hour', 'count': f'{trip_agg.title()} number of Trips'})
# Calculate the average number of trips per hour (Note: This line might not be necessary as 'trips_per_hour' already represents counts per hour)
if trip_agg == "median":
agg_trips_per_hour = tps.groupby(
'Hour')[f'{trip_agg.title()} number of Trips'].median().reset_index()
else:
agg_trips_per_hour = tps.groupby(
'Hour')[f'{trip_agg.title()} number of Trips'].mean().reset_index()
# Plot count ETA by hour
fig = px.line(
agg_trips_per_hour, x='Hour', y=f'{trip_agg.title()} number of Trips')
return fig
with ui.nav_panel("Mapping dataset locations"):
with ui.card(full_screen=True):
ui.card_header(
"Map of locations in the train dataset")
@render_plotly
def location_map():
country, geojson, data = get_country_geojson()
data['country'] = country
fig = px.scatter_geo(
data,
locations='country',
hover_name='country',
geojson=geojson,
fitbounds='geojson',
)
# Add longitude and latitude points
fig.add_scattergeo(
lon=data['longitude'],
lat=data['latitude'],
mode='markers',
marker=dict(
color=BRANDCOLORS["red"],
),
name='Locations in dataset'
)
# Add annotation to the map
fig.add_annotation(
text=f"{country}",
showarrow=False,
font=dict(size=18),
align="center"
)
fig.update_layout(
title=f'Dataset locations in {country}',
geo_scope='africa'
)
return fig
value = "Model Explainer"
with ui.accordion_panel(title=ui.strong(value), value=value):
with ui.navset_card_pill(id="model_explainer"):
with ui.nav_panel("Model Explainer..."):
with ui.card(full_screen=True):
ui.card_header("Coming Soon...")
ui.h3("Models")
@render.ui
def all_models():
return ui.tags.ul(
[ui.tags.li(item) for item in ALL_MODELS]
)
# ui.include_css("styles.css")
# --------------------------------------------------------
# Reactive calculations and effects
# --------------------------------------------------------
@reactive.calc
def train_data():
trip_distances = input.trip_distance()
idx1 = train_df.trip_distance.between(
trip_distances[0], trip_distances[1])
eta = input.eta()
idx2 = train_df.eta.between(eta[0], eta[1])
return train_df[idx1 & idx2]
@reactive.calc
def daily_weather_eta_df():
# Select relevant columns from the training DataFrame
time_eta_df = train_data()[['timestamp', 'eta']]
# Extract the date from the timestamp
time_eta_df = time_eta_df.assign(
date=pd.to_datetime(time_eta_df['timestamp'].dt.date))
# Prepare daily aggregated ETA data
daily_eta_df = (
time_eta_df
# Remove the 'timestamp' column as it's no longer needed
.drop(columns=['timestamp'])
# Set 'date' as the index for resampling
.set_index('date')
# Resample the data on a daily frequency
.resample('D')
.median() # Compute the median ETA for each day
.reset_index() # Reset the index to include 'date' as a column
)
# Merge the daily ETA data with the weather data
return (
pd.merge(daily_eta_df, weather_df, left_on='date', right_on='date')
.drop(columns=['date'])
)
@reactive.calc
def top_bottom_location():
# Group by origin locations and count occurrences
origin_counts = train_data().groupby(['origin_lat', 'origin_lon'])[
'origin_lon'].count().reset_index(name='count')
# Sort by count in descending order
top_origin = origin_counts.nlargest(10, columns=['count'])
bottom_origin = origin_counts.nsmallest(10, columns=['count'])
# Group by destination locations and count occurrences
destination_counts = train_data().groupby(['destination_lat', 'destination_lon'])[
'destination_lon'].count().reset_index(name='count')
# Sort by count in descending order
top_dest = destination_counts.nlargest(10, columns=['count'])
bottom_dest = destination_counts.nsmallest(10, columns=['count'])
return [(top_origin, top_dest), (bottom_origin, bottom_dest)]
@reactive.effect
@reactive.event(input.reset)
def _():
ui.update_slider("trip_distance", value=trip_distance_rng)
ui.update_slider("eta", value=eta_rng)