Spaces:
Running
Running
File size: 6,198 Bytes
f9305b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
"""Contains helper functions to tidy/format data and refactor bigger components on screen."""
import pandas as pd
import vizro.models as vm
from vizro.figures import kpi_card_reference
from .config import NULL_VALUE, REGION_MAPPING, SELECTED_CUSTOMER_COLUMNS
def shorten_product_name(name, n_words=4):
"""Shortens product labels by reducing it to the first n words."""
words = str(name).split()
return " ".join(words[:n_words])
def tidy_orders_data(orders: pd.DataFrame):
"""Tidies and filters the data frame and creates additional columns."""
# Remove rows where 'Category' is missing
orders = orders[orders["Category"].notna()]
orders = orders.dropna(subset=["Order Date"])
# Convert to correct data types
orders["Order Date"] = pd.to_datetime(orders["Order Date"], format="%Y-%m-%d", errors="coerce")
# Create new columns
orders = orders.assign(
Region=orders["Shipping Address State"].map(REGION_MAPPING).fillna(NULL_VALUE),
Order_Value=orders["Purchase Price Per Unit"] * orders["Quantity"],
Short_Title=orders["Title"].apply(shorten_product_name),
Year=orders["Order Date"].dt.year.astype(str),
Month_Day=orders["Order Date"].dt.strftime("%b-%d"),
Month=orders["Order Date"].dt.month,
)
# Fill null values
orders["Shipping Address State"] = orders["Shipping Address State"].fillna(NULL_VALUE)
return orders
def create_kpi_data(orders):
"""Calculate KPIs and create correct data format."""
df_kpi = (
orders.groupby("Year")
.agg({"Order_Value": "sum", "Survey ResponseID": pd.Series.nunique, "Quantity": "sum"})
.reset_index()
)
df_kpi = df_kpi.rename(
columns={
"Order_Value": "Total order value",
"Survey ResponseID": "Number of customers",
"Quantity": "Total units ordered",
}
)
df_kpi["Avg product unit price"] = df_kpi["Total order value"] / df_kpi["Total units ordered"]
df_kpi["Total order value mil"] = df_kpi["Total order value"] / 1000000
df_kpi["index"] = 0
df_kpi = df_kpi.pivot(
index="index",
columns="Year",
values=[
"Total order value mil",
"Number of customers",
"Total units ordered",
"Avg product unit price",
],
)
df_kpi.columns = [f"{kpi}_{year}" for kpi, year in df_kpi.columns]
return df_kpi
def create_kpi_container(data_frame: pd.DataFrame, id: str, layout: vm.Layout):
"""Creates reusable KPI container configuration."""
container = vm.Container(
id=f"kpi-container-{id}",
title="",
layout=layout if layout else None,
components=[
vm.Figure(
figure=kpi_card_reference(
data_frame,
value_column="Total order value mil_2021",
reference_column="Total order value mil_2020",
title="Total order value",
value_format="${value:.2f}M",
reference_format="{delta_relative:+.1%} vs. LY (${reference:.2f}M)",
icon="payments",
),
),
vm.Figure(
figure=kpi_card_reference(
data_frame,
value_column="Number of customers_2021",
reference_column="Number of customers_2020",
title="No. of customers",
value_format="{value:,.0f}",
reference_format="{delta_relative:+.1%} vs. LY ({reference:,.0f})",
icon="groups",
)
),
vm.Figure(
figure=kpi_card_reference(
data_frame,
value_column="Total units ordered_2021",
reference_column="Total units ordered_2020",
title="Total units ordered",
value_format="{value:,.0f}",
reference_format="{delta_relative:+.1%} vs. LY ({reference:,.0f})",
icon="production_quantity_limits",
)
),
vm.Figure(
figure=kpi_card_reference(
data_frame,
value_column="Avg product unit price_2021",
reference_column="Avg product unit price_2020",
title="Avg. unit price",
value_format="${value:.2f}",
reference_format="{delta_relative:+.1%} vs. LY (${reference:.2f})",
icon="price_change",
)
),
],
)
return container
def create_customer_df(orders_cy: pd.DataFrame, survey: pd.DataFrame):
"""Creates aggregated customer dataframe with socioeconomic columns added."""
df_customer = (
orders_cy.groupby(["Year", "Survey ResponseID", "Region", "Shipping Address State"])
.agg(
{
"Order_Value": "sum",
"Category": pd.Series.nunique,
"ASIN/ISBN (Product Code)": pd.Series.nunique,
"Order Date": pd.Series.nunique,
"Quantity": "sum",
}
)
.reset_index()
)
df_customer = df_customer.merge(survey, on="Survey ResponseID", how="left")
# Filter and rename columns for better understanding
df_customer = df_customer[SELECTED_CUSTOMER_COLUMNS]
df_customer = df_customer.rename(
columns={
"Order_Value": "Total order value",
"Category": "Number of unique categories",
"ASIN/ISBN (Product Code)": "Number of unique products",
"Order Date": "Number of unique order dates",
"Quantity": "Total units ordered",
}
)
# Create new metrics
df_customer["Avg unit price"] = df_customer["Total order value"] / df_customer["Total units ordered"]
df_customer["Avg order value"] = df_customer["Total order value"] / df_customer["Number of unique order dates"]
df_customer["Quintiles"] = pd.qcut(df_customer["Total order value"], 5, labels=False)
return df_customer
|