Spaces:
Sleeping
Sleeping
from .data import test_df, train_df, weather_df, time_sec_hms, full_time_sec_hms, get_country_geojson | |
from .config import BRANDCOLORS, BRANDTHEMES, ALL_MODELS | |
import time | |
import requests | |
import requests_cache | |
from pathlib import Path | |
from faicons import icon_svg | |
import pandas as pd | |
import plotly.io as pio | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from plotly_calplot import calplot | |
from plotly.subplots import make_subplots | |
from shiny import reactive, render, Inputs, Outputs, Session | |
from shiny.express import input, output, module, ui | |
from shinywidgets import render_plotly, render_widget | |
# Set pandas to display all columns | |
pd.set_option("display.max_columns", None) | |
# High precision longitudes and Latitudes | |
pd.set_option('display.float_format', '{:.16f}'.format) | |
# Yassir plotly theme | |
yassir_theme = pio.templates["plotly_dark"] | |
yassir_theme.layout.update( | |
plot_bgcolor=BRANDCOLORS["purple-dark"], | |
paper_bgcolor=BRANDCOLORS["purple-dark"], | |
colorway=[BRANDCOLORS["red"], BRANDCOLORS["purple-light"], | |
BRANDCOLORS["purple-dark"]], # Brand colors | |
) | |
pio.templates["yassir_theme"] = yassir_theme | |
pio.templates.default = yassir_theme | |
def dashboard_page(input: Inputs, output: Outputs, session: Session): | |
# Disable loading spinners, use elegant pulse | |
ui.busy_indicators.use(spinners=False) | |
ui.panel_title(title=ui.h1(ui.strong("Dashboard π")), | |
window_title="Yassir Dashboard") | |
# Link to the external CSS file | |
ui.tags.link(rel="stylesheet", href="styles.css") | |
# Add main content | |
ICONS = { | |
"eta": icon_svg("clock"), | |
"distance": icon_svg("route"), | |
"ellipsis": icon_svg("ellipsis"), | |
} | |
# Define the target column | |
target = 'eta' | |
# Columns | |
columns = train_df.columns.to_list() | |
# Weather columns | |
w_columns = weather_df.columns.to_list() | |
# Numericals | |
numericals = train_df.select_dtypes(include=['number']).columns.tolist() | |
# Input range | |
eta_rng = (min(train_df["eta"]), max(train_df["eta"])) | |
trip_distance_rng = (min(train_df["trip_distance"]), | |
max(train_df["trip_distance"])) | |
with ui.layout_sidebar(): | |
with ui.sidebar(open="desktop"): | |
ui.tags.style(""" | |
.shiny-input-container input[type="range"] { | |
background: linear-gradient(to right, red, #4CAF50) no-repeat; | |
height: 8px; | |
} | |
"""), | |
ui.input_slider( | |
"trip_distance", | |
"Trip Distance", | |
min=trip_distance_rng[0], | |
max=trip_distance_rng[1], | |
value=trip_distance_rng, | |
post=" m", | |
) | |
ui.input_slider( | |
"eta", | |
"ETA", | |
min=eta_rng[0], | |
max=eta_rng[1], | |
value=eta_rng, | |
post=" sec", | |
) | |
ui.input_action_button("reset", "Reset filter") | |
# KPIs | |
with ui.layout_column_wrap(fill=False): | |
with ui.value_box(showcase=ICONS["eta"], theme=BRANDTHEMES['purple-dark']): | |
"ETA (Total)" | |
def total_eta(): | |
return float(train_data().eta.sum()) | |
def ts(): | |
return f"{round(total_eta()):,} s" | |
def thms(): | |
return full_time_sec_hms(total_eta()) | |
with ui.value_box(showcase=ICONS["eta"], theme=BRANDTHEMES['purple-dark']): | |
"ETA (Median)" | |
def median_eta(): | |
d = train_data() | |
m_eta = None | |
if d.shape[0] > 0: | |
m_eta = d.eta.median() | |
return m_eta | |
def ms(): | |
return f"{round(median_eta()):,} s" | |
def mhms(): | |
return time_sec_hms(median_eta()) | |
with ui.value_box(showcase=ICONS["distance"], theme=BRANDTHEMES['purple-light']): | |
"TRIP DISTANCE (Total)" | |
def total_trip(): | |
return float(train_data().trip_distance.sum()) | |
def tdkm(): | |
return f"{total_trip()/1000:,.1f} km" | |
def tdm(): | |
return f"{total_trip():,.1f} m" | |
with ui.value_box(showcase=ICONS["distance"], theme=BRANDTHEMES['purple-light']): | |
"TRIP DISTANCE (Median)" | |
def median_trip(): | |
d = train_data() | |
m_trip = None | |
if d.shape[0] > 0: | |
m_trip = d.trip_distance.median() | |
return m_trip | |
def mtdkm(): | |
return f"{median_trip()/1000:,.1f} km" | |
def mtdm(): | |
return f"{median_trip():,.1f} m" | |
# Dataset view | |
with ui.layout_column_wrap(fill=False): | |
with ui.navset_card_pill(id="data_tab"): | |
with ui.nav_panel("Train data"): | |
with ui.card(full_screen=True): | |
ui.card_header("Train data") | |
def train_table(): | |
return render.DataGrid(train_data(), filters=True) | |
with ui.nav_panel("Test data"): | |
with ui.card(full_screen=True): | |
ui.card_header("Test data") | |
def test_table(): | |
return render.DataGrid(test_df) | |
with ui.nav_panel("Weather data"): | |
with ui.card(full_screen=True): | |
ui.card_header("Weather data") | |
def weather_table(): | |
return render.DataGrid(weather_df) | |
value = "Explore the visualizations" | |
with ui.accordion(id="plot_acc", open=value): | |
with ui.accordion_panel(title=ui.strong(value), value=value): | |
with ui.navset_card_pill(id="eda_tab"): | |
with ui.nav_panel("Train features"): | |
with ui.card(full_screen=True): | |
ui.card_header( | |
"Correlation in the Train features") | |
def train_features(): | |
numeric_correlation_matrix = train_data()[ | |
numericals].corr() | |
# Create heatmap trace | |
heatmap_trace = go.Heatmap( | |
z=numeric_correlation_matrix.values, | |
x=numeric_correlation_matrix.columns, | |
y=numeric_correlation_matrix.index, | |
colorbar=dict(title='coefficient'), | |
colorscale="Agsunset", | |
texttemplate='%{z:.3f}', | |
) | |
# Create figure | |
fig = go.Figure(data=[heatmap_trace]) | |
return fig | |
with ui.nav_panel("Trip distance vs Eta"): | |
with ui.card(full_screen=True): | |
ui.card_header( | |
"Relationship between Trip Distance and ETA") | |
def scatterplot(): | |
return px.scatter( | |
train_data(), | |
x='trip_distance', | |
y='eta', | |
trendline='ols', | |
trendline_color_override=BRANDCOLORS["purple-light"], | |
labels={ | |
'eta': 'Eta (seconds)', 'trip_distance': 'Trip Distance (meters)'}, | |
) | |
with ui.nav_panel("Distribution"): | |
with ui.card(full_screen=True): | |
ui.card_header("Distribution per train column") | |
with ui.popover(title="Choose a train column"): | |
ICONS["ellipsis"] | |
ui.input_radio_buttons( | |
"train_col", | |
"Select:", | |
numericals, | |
selected="eta", | |
inline=True, | |
) | |
def distribution(): | |
column = input.train_col() | |
data = train_data() | |
fig1 = px.violin(data, x=column, box=True) | |
fig2 = px.histogram(data, x=column) | |
# Create a subplot layout with 1 row and 2 columns | |
fig = make_subplots(rows=1, cols=2) | |
# Add traces from fig1 to the subplot | |
for trace in fig1.data: | |
fig.add_trace(trace, row=1, col=1) | |
# Add traces from fig2 to the subplot | |
for trace in fig2.data: | |
fig.add_trace(trace, row=1, col=2) | |
# Update layout | |
fig.update_layout(title_text=f"Distribution in the {column} column", | |
showlegend=True, | |
legend_title_text=target | |
) | |
return fig | |
with ui.nav_panel("Weather features vs Eta"): | |
with ui.card(full_screen=True): | |
ui.card_header( | |
"Relationship between weather features and Median Eta in seconds") | |
with ui.popover(title="Choose a feature column"): | |
ICONS["ellipsis"] | |
ui.input_radio_buttons( | |
"weather_col", | |
"Select:", | |
[col for col in w_columns if col != "date"], | |
selected="dewpoint_2m_temperature", | |
inline=True, | |
) | |
def weather_eta(): | |
column = input.weather_col() | |
fig = px.scatter( | |
x=daily_weather_eta_df()[column], | |
y=daily_weather_eta_df()[target], | |
) | |
# Update layout | |
fig.update_layout( | |
title_text=f"Distribution in the {column} column", | |
showlegend=False | |
) | |
fig.update_xaxes(title_text=column) # Set x-axis title | |
fig.update_yaxes(title_text=target) # Set y-axis title | |
return fig | |
value = "More Visualizations..." | |
with ui.accordion_panel(title=ui.strong(value), value=value): | |
with ui.navset_card_pill(id="more_visualizations"): | |
with ui.nav_panel("Weather vs Eta"): | |
with ui.card(full_screen=True): | |
ui.card_header( | |
"Weather vs Eta Features summary") | |
def eta_weather_summary(): | |
daily_weather_eta_correlation_matrix = daily_weather_eta_df().corr().sort_values(by='eta') | |
# Create heatmap trace | |
heatmap_trace = go.Heatmap( | |
z=daily_weather_eta_correlation_matrix[[ | |
'eta']].values, | |
x=daily_weather_eta_correlation_matrix[[ | |
'eta']].columns, | |
y=daily_weather_eta_correlation_matrix[[ | |
'eta']].index, | |
colorbar=dict(title='Coefficient'), | |
colorscale="Agsunset", | |
texttemplate='%{z:.3f}', | |
) | |
# Create figure | |
fig = go.Figure(data=[heatmap_trace]) | |
return fig | |
with ui.nav_panel("Top locations"): | |
with ui.card(full_screen=True): | |
ui.card_header( | |
"Top 10 Most Common Origin and Destination Locations") | |
with ui.popover(title="Origin or Destination?"): | |
ICONS["ellipsis"] | |
ui.input_radio_buttons( | |
"location_type", | |
"Select:", | |
["origin", "destination"], | |
selected="origin", | |
inline=True, | |
) | |
def top_locations(): | |
location_type = input.location_type() | |
top_10_origin, top_10_dest = top_bottom_location()[ | |
0] | |
data = top_10_origin if location_type == "origin" else top_10_dest | |
# Prepare data for origin locations | |
data.sort_values(by='count', inplace=True) | |
data['location'] = data.sort_values(by='count').apply( | |
lambda row: f"({row[f'{location_type}_lat']}, {row[f'{location_type}_lon']})", axis=1) | |
fig = go.Figure() | |
fig.add_trace( | |
go.Bar( | |
x=data['count'], | |
y=data['location'], | |
orientation='h', | |
marker=dict( | |
color=BRANDCOLORS['purple-light'] if location_type == "origin" else BRANDCOLORS['red']), | |
name=f'{location_type.title()} Locations' | |
) | |
) | |
# Update layout | |
fig.update_layout( | |
xaxis_title=f'{location_type.title()} Locations', | |
yaxis_title='Number of Rides', | |
showlegend=False | |
) | |
return fig | |
with ui.nav_panel("Trips by hour"): | |
with ui.card(full_screen=True): | |
ui.card_header( | |
"No of trips by Hour of the day") | |
with ui.popover(title="Average or Median?"): | |
ICONS["ellipsis"] | |
ui.input_radio_buttons( | |
"trip_agg", | |
"Select:", | |
["average", "median"], | |
selected="median", | |
inline=True, | |
) | |
def trips_by_hour(): | |
trip_agg = input.trip_agg() | |
# Create a DataFrame with only the Timestamp column | |
time_df = train_data()[['timestamp']] | |
# Extract the hour from the timestamp and add it as a new column | |
time_df = time_df.assign(hour=time_df['timestamp'].dt.hour) | |
# Count the number of trips for each hour of the day | |
tps = time_df['hour'].value_counts().sort_index().reset_index().rename( | |
columns={'hour': 'Hour', 'count': f'{trip_agg.title()} number of Trips'}) | |
# Calculate the average number of trips per hour (Note: This line might not be necessary as 'trips_per_hour' already represents counts per hour) | |
if trip_agg == "median": | |
agg_trips_per_hour = tps.groupby( | |
'Hour')[f'{trip_agg.title()} number of Trips'].median().reset_index() | |
else: | |
agg_trips_per_hour = tps.groupby( | |
'Hour')[f'{trip_agg.title()} number of Trips'].mean().reset_index() | |
# Plot count ETA by hour | |
fig = px.line( | |
agg_trips_per_hour, x='Hour', y=f'{trip_agg.title()} number of Trips') | |
return fig | |
with ui.nav_panel("Mapping dataset locations"): | |
with ui.card(full_screen=True): | |
ui.card_header( | |
"Map of locations in the train dataset") | |
def location_map(): | |
country, geojson, data = get_country_geojson() | |
data['country'] = country | |
fig = px.scatter_geo( | |
data, | |
locations='country', | |
hover_name='country', | |
geojson=geojson, | |
fitbounds='geojson', | |
) | |
# Add longitude and latitude points | |
fig.add_scattergeo( | |
lon=data['longitude'], | |
lat=data['latitude'], | |
mode='markers', | |
marker=dict( | |
color=BRANDCOLORS["red"], | |
), | |
name='Locations in dataset' | |
) | |
# Add annotation to the map | |
fig.add_annotation( | |
text=f"{country}", | |
showarrow=False, | |
font=dict(size=18), | |
align="center" | |
) | |
fig.update_layout( | |
title=f'Dataset locations in {country}', | |
geo_scope='africa' | |
) | |
return fig | |
value = "Model Explainer" | |
with ui.accordion_panel(title=ui.strong(value), value=value): | |
with ui.navset_card_pill(id="model_explainer"): | |
with ui.nav_panel("Model Explainer..."): | |
with ui.card(full_screen=True): | |
ui.card_header("Coming Soon...") | |
ui.h3("Models") | |
def all_models(): | |
return ui.tags.ul( | |
[ui.tags.li(item) for item in ALL_MODELS] | |
) | |
# ui.include_css("styles.css") | |
# -------------------------------------------------------- | |
# Reactive calculations and effects | |
# -------------------------------------------------------- | |
def train_data(): | |
trip_distances = input.trip_distance() | |
idx1 = train_df.trip_distance.between( | |
trip_distances[0], trip_distances[1]) | |
eta = input.eta() | |
idx2 = train_df.eta.between(eta[0], eta[1]) | |
return train_df[idx1 & idx2] | |
def daily_weather_eta_df(): | |
# Select relevant columns from the training DataFrame | |
time_eta_df = train_data()[['timestamp', 'eta']] | |
# Extract the date from the timestamp | |
time_eta_df = time_eta_df.assign( | |
date=pd.to_datetime(time_eta_df['timestamp'].dt.date)) | |
# Prepare daily aggregated ETA data | |
daily_eta_df = ( | |
time_eta_df | |
# Remove the 'timestamp' column as it's no longer needed | |
.drop(columns=['timestamp']) | |
# Set 'date' as the index for resampling | |
.set_index('date') | |
# Resample the data on a daily frequency | |
.resample('D') | |
.median() # Compute the median ETA for each day | |
.reset_index() # Reset the index to include 'date' as a column | |
) | |
# Merge the daily ETA data with the weather data | |
return ( | |
pd.merge(daily_eta_df, weather_df, left_on='date', right_on='date') | |
.drop(columns=['date']) | |
) | |
def top_bottom_location(): | |
# Group by origin locations and count occurrences | |
origin_counts = train_data().groupby(['origin_lat', 'origin_lon'])[ | |
'origin_lon'].count().reset_index(name='count') | |
# Sort by count in descending order | |
top_origin = origin_counts.nlargest(10, columns=['count']) | |
bottom_origin = origin_counts.nsmallest(10, columns=['count']) | |
# Group by destination locations and count occurrences | |
destination_counts = train_data().groupby(['destination_lat', 'destination_lon'])[ | |
'destination_lon'].count().reset_index(name='count') | |
# Sort by count in descending order | |
top_dest = destination_counts.nlargest(10, columns=['count']) | |
bottom_dest = destination_counts.nsmallest(10, columns=['count']) | |
return [(top_origin, top_dest), (bottom_origin, bottom_dest)] | |
def _(): | |
ui.update_slider("trip_distance", value=trip_distance_rng) | |
ui.update_slider("eta", value=eta_rng) | |