from .data import test_df, train_df, weather_df, time_sec_hms, full_time_sec_hms, get_country_geojson from .config import BRANDCOLORS, BRANDTHEMES, ALL_MODELS import time import requests import requests_cache from pathlib import Path from faicons import icon_svg import pandas as pd import plotly.io as pio import plotly.express as px import plotly.graph_objects as go from plotly_calplot import calplot from plotly.subplots import make_subplots from shiny import reactive, render, Inputs, Outputs, Session from shiny.express import input, output, module, ui from shinywidgets import render_plotly, render_widget # Set pandas to display all columns pd.set_option("display.max_columns", None) # High precision longitudes and Latitudes pd.set_option('display.float_format', '{:.16f}'.format) # Yassir plotly theme yassir_theme = pio.templates["plotly_dark"] yassir_theme.layout.update( plot_bgcolor=BRANDCOLORS["purple-dark"], paper_bgcolor=BRANDCOLORS["purple-dark"], colorway=[BRANDCOLORS["red"], BRANDCOLORS["purple-light"], BRANDCOLORS["purple-dark"]], # Brand colors ) pio.templates["yassir_theme"] = yassir_theme pio.templates.default = yassir_theme @module def dashboard_page(input: Inputs, output: Outputs, session: Session): # Disable loading spinners, use elegant pulse ui.busy_indicators.use(spinners=False) ui.panel_title(title=ui.h1(ui.strong("Dashboard 📈")), window_title="Yassir Dashboard") # Link to the external CSS file ui.tags.link(rel="stylesheet", href="styles.css") # Add main content ICONS = { "eta": icon_svg("clock"), "distance": icon_svg("route"), "ellipsis": icon_svg("ellipsis"), } # Define the target column target = 'eta' # Columns columns = train_df.columns.to_list() # Weather columns w_columns = weather_df.columns.to_list() # Numericals numericals = train_df.select_dtypes(include=['number']).columns.tolist() # Input range eta_rng = (min(train_df["eta"]), max(train_df["eta"])) trip_distance_rng = (min(train_df["trip_distance"]), max(train_df["trip_distance"])) with ui.layout_sidebar(): with ui.sidebar(open="desktop"): ui.tags.style(""" .shiny-input-container input[type="range"] { background: linear-gradient(to right, red, #4CAF50) no-repeat; height: 8px; } """), ui.input_slider( "trip_distance", "Trip Distance", min=trip_distance_rng[0], max=trip_distance_rng[1], value=trip_distance_rng, post=" m", ) ui.input_slider( "eta", "ETA", min=eta_rng[0], max=eta_rng[1], value=eta_rng, post=" sec", ) ui.input_action_button("reset", "Reset filter") # KPIs with ui.layout_column_wrap(fill=False): with ui.value_box(showcase=ICONS["eta"], theme=BRANDTHEMES['purple-dark']): "ETA (Total)" @reactive.calc def total_eta(): return float(train_data().eta.sum()) @render.text def ts(): return f"{round(total_eta()):,} s" @render.text def thms(): return full_time_sec_hms(total_eta()) with ui.value_box(showcase=ICONS["eta"], theme=BRANDTHEMES['purple-dark']): "ETA (Median)" @reactive.calc def median_eta(): d = train_data() m_eta = None if d.shape[0] > 0: m_eta = d.eta.median() return m_eta @render.text def ms(): return f"{round(median_eta()):,} s" @render.text def mhms(): return time_sec_hms(median_eta()) with ui.value_box(showcase=ICONS["distance"], theme=BRANDTHEMES['purple-light']): "TRIP DISTANCE (Total)" @reactive.calc def total_trip(): return float(train_data().trip_distance.sum()) @render.text def tdkm(): return f"{total_trip()/1000:,.1f} km" @render.text def tdm(): return f"{total_trip():,.1f} m" with ui.value_box(showcase=ICONS["distance"], theme=BRANDTHEMES['purple-light']): "TRIP DISTANCE (Median)" @reactive.calc def median_trip(): d = train_data() m_trip = None if d.shape[0] > 0: m_trip = d.trip_distance.median() return m_trip @render.text def mtdkm(): return f"{median_trip()/1000:,.1f} km" @render.text def mtdm(): return f"{median_trip():,.1f} m" # Dataset view with ui.layout_column_wrap(fill=False): with ui.navset_card_pill(id="data_tab"): with ui.nav_panel("Train data"): with ui.card(full_screen=True): ui.card_header("Train data") @render.data_frame def train_table(): return render.DataGrid(train_data(), filters=True) with ui.nav_panel("Test data"): with ui.card(full_screen=True): ui.card_header("Test data") @render.data_frame def test_table(): return render.DataGrid(test_df) with ui.nav_panel("Weather data"): with ui.card(full_screen=True): ui.card_header("Weather data") @render.data_frame def weather_table(): return render.DataGrid(weather_df) value = "Explore the visualizations" with ui.accordion(id="plot_acc", open=value): with ui.accordion_panel(title=ui.strong(value), value=value): with ui.navset_card_pill(id="eda_tab"): with ui.nav_panel("Train features"): with ui.card(full_screen=True): ui.card_header( "Correlation in the Train features") @render_plotly def train_features(): numeric_correlation_matrix = train_data()[ numericals].corr() # Create heatmap trace heatmap_trace = go.Heatmap( z=numeric_correlation_matrix.values, x=numeric_correlation_matrix.columns, y=numeric_correlation_matrix.index, colorbar=dict(title='coefficient'), colorscale="Agsunset", texttemplate='%{z:.3f}', ) # Create figure fig = go.Figure(data=[heatmap_trace]) return fig with ui.nav_panel("Trip distance vs Eta"): with ui.card(full_screen=True): ui.card_header( "Relationship between Trip Distance and ETA") @render_plotly def scatterplot(): return px.scatter( train_data(), x='trip_distance', y='eta', trendline='ols', trendline_color_override=BRANDCOLORS["purple-light"], labels={ 'eta': 'Eta (seconds)', 'trip_distance': 'Trip Distance (meters)'}, ) with ui.nav_panel("Distribution"): with ui.card(full_screen=True): ui.card_header("Distribution per train column") with ui.popover(title="Choose a train column"): ICONS["ellipsis"] ui.input_radio_buttons( "train_col", "Select:", numericals, selected="eta", inline=True, ) @render_plotly def distribution(): column = input.train_col() data = train_data() fig1 = px.violin(data, x=column, box=True) fig2 = px.histogram(data, x=column) # Create a subplot layout with 1 row and 2 columns fig = make_subplots(rows=1, cols=2) # Add traces from fig1 to the subplot for trace in fig1.data: fig.add_trace(trace, row=1, col=1) # Add traces from fig2 to the subplot for trace in fig2.data: fig.add_trace(trace, row=1, col=2) # Update layout fig.update_layout(title_text=f"Distribution in the {column} column", showlegend=True, legend_title_text=target ) return fig with ui.nav_panel("Weather features vs Eta"): with ui.card(full_screen=True): ui.card_header( "Relationship between weather features and Median Eta in seconds") with ui.popover(title="Choose a feature column"): ICONS["ellipsis"] ui.input_radio_buttons( "weather_col", "Select:", [col for col in w_columns if col != "date"], selected="dewpoint_2m_temperature", inline=True, ) @render_plotly def weather_eta(): column = input.weather_col() fig = px.scatter( x=daily_weather_eta_df()[column], y=daily_weather_eta_df()[target], ) # Update layout fig.update_layout( title_text=f"Distribution in the {column} column", showlegend=False ) fig.update_xaxes(title_text=column) # Set x-axis title fig.update_yaxes(title_text=target) # Set y-axis title return fig value = "More Visualizations..." with ui.accordion_panel(title=ui.strong(value), value=value): with ui.navset_card_pill(id="more_visualizations"): with ui.nav_panel("Weather vs Eta"): with ui.card(full_screen=True): ui.card_header( "Weather vs Eta Features summary") @render_plotly def eta_weather_summary(): daily_weather_eta_correlation_matrix = daily_weather_eta_df().corr().sort_values(by='eta') # Create heatmap trace heatmap_trace = go.Heatmap( z=daily_weather_eta_correlation_matrix[[ 'eta']].values, x=daily_weather_eta_correlation_matrix[[ 'eta']].columns, y=daily_weather_eta_correlation_matrix[[ 'eta']].index, colorbar=dict(title='Coefficient'), colorscale="Agsunset", texttemplate='%{z:.3f}', ) # Create figure fig = go.Figure(data=[heatmap_trace]) return fig with ui.nav_panel("Top locations"): with ui.card(full_screen=True): ui.card_header( "Top 10 Most Common Origin and Destination Locations") with ui.popover(title="Origin or Destination?"): ICONS["ellipsis"] ui.input_radio_buttons( "location_type", "Select:", ["origin", "destination"], selected="origin", inline=True, ) @render_plotly def top_locations(): location_type = input.location_type() top_10_origin, top_10_dest = top_bottom_location()[ 0] data = top_10_origin if location_type == "origin" else top_10_dest # Prepare data for origin locations data.sort_values(by='count', inplace=True) data['location'] = data.sort_values(by='count').apply( lambda row: f"({row[f'{location_type}_lat']}, {row[f'{location_type}_lon']})", axis=1) fig = go.Figure() fig.add_trace( go.Bar( x=data['count'], y=data['location'], orientation='h', marker=dict( color=BRANDCOLORS['purple-light'] if location_type == "origin" else BRANDCOLORS['red']), name=f'{location_type.title()} Locations' ) ) # Update layout fig.update_layout( xaxis_title=f'{location_type.title()} Locations', yaxis_title='Number of Rides', showlegend=False ) return fig with ui.nav_panel("Trips by hour"): with ui.card(full_screen=True): ui.card_header( "No of trips by Hour of the day") with ui.popover(title="Average or Median?"): ICONS["ellipsis"] ui.input_radio_buttons( "trip_agg", "Select:", ["average", "median"], selected="median", inline=True, ) @render_plotly def trips_by_hour(): trip_agg = input.trip_agg() # Create a DataFrame with only the Timestamp column time_df = train_data()[['timestamp']] # Extract the hour from the timestamp and add it as a new column time_df = time_df.assign(hour=time_df['timestamp'].dt.hour) # Count the number of trips for each hour of the day tps = time_df['hour'].value_counts().sort_index().reset_index().rename( columns={'hour': 'Hour', 'count': f'{trip_agg.title()} number of Trips'}) # Calculate the average number of trips per hour (Note: This line might not be necessary as 'trips_per_hour' already represents counts per hour) if trip_agg == "median": agg_trips_per_hour = tps.groupby( 'Hour')[f'{trip_agg.title()} number of Trips'].median().reset_index() else: agg_trips_per_hour = tps.groupby( 'Hour')[f'{trip_agg.title()} number of Trips'].mean().reset_index() # Plot count ETA by hour fig = px.line( agg_trips_per_hour, x='Hour', y=f'{trip_agg.title()} number of Trips') return fig with ui.nav_panel("Mapping dataset locations"): with ui.card(full_screen=True): ui.card_header( "Map of locations in the train dataset") @render_plotly def location_map(): country, geojson, data = get_country_geojson() data['country'] = country fig = px.scatter_geo( data, locations='country', hover_name='country', geojson=geojson, fitbounds='geojson', ) # Add longitude and latitude points fig.add_scattergeo( lon=data['longitude'], lat=data['latitude'], mode='markers', marker=dict( color=BRANDCOLORS["red"], ), name='Locations in dataset' ) # Add annotation to the map fig.add_annotation( text=f"{country}", showarrow=False, font=dict(size=18), align="center" ) fig.update_layout( title=f'Dataset locations in {country}', geo_scope='africa' ) return fig value = "Model Explainer" with ui.accordion_panel(title=ui.strong(value), value=value): with ui.navset_card_pill(id="model_explainer"): with ui.nav_panel("Model Explainer..."): with ui.card(full_screen=True): ui.card_header("Coming Soon...") ui.h3("Models") @render.ui def all_models(): return ui.tags.ul( [ui.tags.li(item) for item in ALL_MODELS] ) # ui.include_css("styles.css") # -------------------------------------------------------- # Reactive calculations and effects # -------------------------------------------------------- @reactive.calc def train_data(): trip_distances = input.trip_distance() idx1 = train_df.trip_distance.between( trip_distances[0], trip_distances[1]) eta = input.eta() idx2 = train_df.eta.between(eta[0], eta[1]) return train_df[idx1 & idx2] @reactive.calc def daily_weather_eta_df(): # Select relevant columns from the training DataFrame time_eta_df = train_data()[['timestamp', 'eta']] # Extract the date from the timestamp time_eta_df = time_eta_df.assign( date=pd.to_datetime(time_eta_df['timestamp'].dt.date)) # Prepare daily aggregated ETA data daily_eta_df = ( time_eta_df # Remove the 'timestamp' column as it's no longer needed .drop(columns=['timestamp']) # Set 'date' as the index for resampling .set_index('date') # Resample the data on a daily frequency .resample('D') .median() # Compute the median ETA for each day .reset_index() # Reset the index to include 'date' as a column ) # Merge the daily ETA data with the weather data return ( pd.merge(daily_eta_df, weather_df, left_on='date', right_on='date') .drop(columns=['date']) ) @reactive.calc def top_bottom_location(): # Group by origin locations and count occurrences origin_counts = train_data().groupby(['origin_lat', 'origin_lon'])[ 'origin_lon'].count().reset_index(name='count') # Sort by count in descending order top_origin = origin_counts.nlargest(10, columns=['count']) bottom_origin = origin_counts.nsmallest(10, columns=['count']) # Group by destination locations and count occurrences destination_counts = train_data().groupby(['destination_lat', 'destination_lon'])[ 'destination_lon'].count().reset_index(name='count') # Sort by count in descending order top_dest = destination_counts.nlargest(10, columns=['count']) bottom_dest = destination_counts.nsmallest(10, columns=['count']) return [(top_origin, top_dest), (bottom_origin, bottom_dest)] @reactive.effect @reactive.event(input.reset) def _(): ui.update_slider("trip_distance", value=trip_distance_rng) ui.update_slider("eta", value=eta_rng)