li-nguyen's picture
Add final app
f9305b4
"""Contains helper functions to tidy/format data and refactor bigger components on screen."""
import pandas as pd
import vizro.models as vm
from vizro.figures import kpi_card_reference
from .config import NULL_VALUE, REGION_MAPPING, SELECTED_CUSTOMER_COLUMNS
def shorten_product_name(name, n_words=4):
"""Shortens product labels by reducing it to the first n words."""
words = str(name).split()
return " ".join(words[:n_words])
def tidy_orders_data(orders: pd.DataFrame):
"""Tidies and filters the data frame and creates additional columns."""
# Remove rows where 'Category' is missing
orders = orders[orders["Category"].notna()]
orders = orders.dropna(subset=["Order Date"])
# Convert to correct data types
orders["Order Date"] = pd.to_datetime(orders["Order Date"], format="%Y-%m-%d", errors="coerce")
# Create new columns
orders = orders.assign(
Region=orders["Shipping Address State"].map(REGION_MAPPING).fillna(NULL_VALUE),
Order_Value=orders["Purchase Price Per Unit"] * orders["Quantity"],
Short_Title=orders["Title"].apply(shorten_product_name),
Year=orders["Order Date"].dt.year.astype(str),
Month_Day=orders["Order Date"].dt.strftime("%b-%d"),
Month=orders["Order Date"].dt.month,
)
# Fill null values
orders["Shipping Address State"] = orders["Shipping Address State"].fillna(NULL_VALUE)
return orders
def create_kpi_data(orders):
"""Calculate KPIs and create correct data format."""
df_kpi = (
orders.groupby("Year")
.agg({"Order_Value": "sum", "Survey ResponseID": pd.Series.nunique, "Quantity": "sum"})
.reset_index()
)
df_kpi = df_kpi.rename(
columns={
"Order_Value": "Total order value",
"Survey ResponseID": "Number of customers",
"Quantity": "Total units ordered",
}
)
df_kpi["Avg product unit price"] = df_kpi["Total order value"] / df_kpi["Total units ordered"]
df_kpi["Total order value mil"] = df_kpi["Total order value"] / 1000000
df_kpi["index"] = 0
df_kpi = df_kpi.pivot(
index="index",
columns="Year",
values=[
"Total order value mil",
"Number of customers",
"Total units ordered",
"Avg product unit price",
],
)
df_kpi.columns = [f"{kpi}_{year}" for kpi, year in df_kpi.columns]
return df_kpi
def create_kpi_container(data_frame: pd.DataFrame, id: str, layout: vm.Layout):
"""Creates reusable KPI container configuration."""
container = vm.Container(
id=f"kpi-container-{id}",
title="",
layout=layout if layout else None,
components=[
vm.Figure(
figure=kpi_card_reference(
data_frame,
value_column="Total order value mil_2021",
reference_column="Total order value mil_2020",
title="Total order value",
value_format="${value:.2f}M",
reference_format="{delta_relative:+.1%} vs. LY (${reference:.2f}M)",
icon="payments",
),
),
vm.Figure(
figure=kpi_card_reference(
data_frame,
value_column="Number of customers_2021",
reference_column="Number of customers_2020",
title="No. of customers",
value_format="{value:,.0f}",
reference_format="{delta_relative:+.1%} vs. LY ({reference:,.0f})",
icon="groups",
)
),
vm.Figure(
figure=kpi_card_reference(
data_frame,
value_column="Total units ordered_2021",
reference_column="Total units ordered_2020",
title="Total units ordered",
value_format="{value:,.0f}",
reference_format="{delta_relative:+.1%} vs. LY ({reference:,.0f})",
icon="production_quantity_limits",
)
),
vm.Figure(
figure=kpi_card_reference(
data_frame,
value_column="Avg product unit price_2021",
reference_column="Avg product unit price_2020",
title="Avg. unit price",
value_format="${value:.2f}",
reference_format="{delta_relative:+.1%} vs. LY (${reference:.2f})",
icon="price_change",
)
),
],
)
return container
def create_customer_df(orders_cy: pd.DataFrame, survey: pd.DataFrame):
"""Creates aggregated customer dataframe with socioeconomic columns added."""
df_customer = (
orders_cy.groupby(["Year", "Survey ResponseID", "Region", "Shipping Address State"])
.agg(
{
"Order_Value": "sum",
"Category": pd.Series.nunique,
"ASIN/ISBN (Product Code)": pd.Series.nunique,
"Order Date": pd.Series.nunique,
"Quantity": "sum",
}
)
.reset_index()
)
df_customer = df_customer.merge(survey, on="Survey ResponseID", how="left")
# Filter and rename columns for better understanding
df_customer = df_customer[SELECTED_CUSTOMER_COLUMNS]
df_customer = df_customer.rename(
columns={
"Order_Value": "Total order value",
"Category": "Number of unique categories",
"ASIN/ISBN (Product Code)": "Number of unique products",
"Order Date": "Number of unique order dates",
"Quantity": "Total units ordered",
}
)
# Create new metrics
df_customer["Avg unit price"] = df_customer["Total order value"] / df_customer["Total units ordered"]
df_customer["Avg order value"] = df_customer["Total order value"] / df_customer["Number of unique order dates"]
df_customer["Quintiles"] = pd.qcut(df_customer["Total order value"], 5, labels=False)
return df_customer