File size: 6,198 Bytes
f9305b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""Contains helper functions to tidy/format data and refactor bigger components on screen."""

import pandas as pd
import vizro.models as vm
from vizro.figures import kpi_card_reference

from .config import NULL_VALUE, REGION_MAPPING, SELECTED_CUSTOMER_COLUMNS


def shorten_product_name(name, n_words=4):
    """Shortens product labels by reducing it to the first n words."""
    words = str(name).split()
    return " ".join(words[:n_words])


def tidy_orders_data(orders: pd.DataFrame):
    """Tidies and filters the data frame and creates additional columns."""
    # Remove rows where 'Category' is missing
    orders = orders[orders["Category"].notna()]
    orders = orders.dropna(subset=["Order Date"])

    # Convert to correct data types
    orders["Order Date"] = pd.to_datetime(orders["Order Date"], format="%Y-%m-%d", errors="coerce")

    # Create new columns
    orders = orders.assign(
        Region=orders["Shipping Address State"].map(REGION_MAPPING).fillna(NULL_VALUE),
        Order_Value=orders["Purchase Price Per Unit"] * orders["Quantity"],
        Short_Title=orders["Title"].apply(shorten_product_name),
        Year=orders["Order Date"].dt.year.astype(str),
        Month_Day=orders["Order Date"].dt.strftime("%b-%d"),
        Month=orders["Order Date"].dt.month,
    )

    # Fill null values
    orders["Shipping Address State"] = orders["Shipping Address State"].fillna(NULL_VALUE)
    return orders


def create_kpi_data(orders):
    """Calculate KPIs and create correct data format."""
    df_kpi = (
        orders.groupby("Year")
        .agg({"Order_Value": "sum", "Survey ResponseID": pd.Series.nunique, "Quantity": "sum"})
        .reset_index()
    )

    df_kpi = df_kpi.rename(
        columns={
            "Order_Value": "Total order value",
            "Survey ResponseID": "Number of customers",
            "Quantity": "Total units ordered",
        }
    )

    df_kpi["Avg product unit price"] = df_kpi["Total order value"] / df_kpi["Total units ordered"]
    df_kpi["Total order value mil"] = df_kpi["Total order value"] / 1000000

    df_kpi["index"] = 0
    df_kpi = df_kpi.pivot(
        index="index",
        columns="Year",
        values=[
            "Total order value mil",
            "Number of customers",
            "Total units ordered",
            "Avg product unit price",
        ],
    )
    df_kpi.columns = [f"{kpi}_{year}" for kpi, year in df_kpi.columns]
    return df_kpi


def create_kpi_container(data_frame: pd.DataFrame, id: str, layout: vm.Layout):
    """Creates reusable KPI container configuration."""
    container = vm.Container(
        id=f"kpi-container-{id}",
        title="",
        layout=layout if layout else None,
        components=[
            vm.Figure(
                figure=kpi_card_reference(
                    data_frame,
                    value_column="Total order value mil_2021",
                    reference_column="Total order value mil_2020",
                    title="Total order value",
                    value_format="${value:.2f}M",
                    reference_format="{delta_relative:+.1%} vs. LY (${reference:.2f}M)",
                    icon="payments",
                ),
            ),
            vm.Figure(
                figure=kpi_card_reference(
                    data_frame,
                    value_column="Number of customers_2021",
                    reference_column="Number of customers_2020",
                    title="No. of customers",
                    value_format="{value:,.0f}",
                    reference_format="{delta_relative:+.1%} vs. LY ({reference:,.0f})",
                    icon="groups",
                )
            ),
            vm.Figure(
                figure=kpi_card_reference(
                    data_frame,
                    value_column="Total units ordered_2021",
                    reference_column="Total units ordered_2020",
                    title="Total units ordered",
                    value_format="{value:,.0f}",
                    reference_format="{delta_relative:+.1%} vs. LY ({reference:,.0f})",
                    icon="production_quantity_limits",
                )
            ),
            vm.Figure(
                figure=kpi_card_reference(
                    data_frame,
                    value_column="Avg product unit price_2021",
                    reference_column="Avg product unit price_2020",
                    title="Avg. unit price",
                    value_format="${value:.2f}",
                    reference_format="{delta_relative:+.1%} vs. LY (${reference:.2f})",
                    icon="price_change",
                )
            ),
        ],
    )
    return container


def create_customer_df(orders_cy: pd.DataFrame, survey: pd.DataFrame):
    """Creates aggregated customer dataframe with socioeconomic columns added."""
    df_customer = (
        orders_cy.groupby(["Year", "Survey ResponseID", "Region", "Shipping Address State"])
        .agg(
            {
                "Order_Value": "sum",
                "Category": pd.Series.nunique,
                "ASIN/ISBN (Product Code)": pd.Series.nunique,
                "Order Date": pd.Series.nunique,
                "Quantity": "sum",
            }
        )
        .reset_index()
    )
    df_customer = df_customer.merge(survey, on="Survey ResponseID", how="left")

    # Filter and rename columns for better understanding
    df_customer = df_customer[SELECTED_CUSTOMER_COLUMNS]
    df_customer = df_customer.rename(
        columns={
            "Order_Value": "Total order value",
            "Category": "Number of unique categories",
            "ASIN/ISBN (Product Code)": "Number of unique products",
            "Order Date": "Number of unique order dates",
            "Quantity": "Total units ordered",
        }
    )

    # Create new metrics
    df_customer["Avg unit price"] = df_customer["Total order value"] / df_customer["Total units ordered"]
    df_customer["Avg order value"] = df_customer["Total order value"] / df_customer["Number of unique order dates"]
    df_customer["Quintiles"] = pd.qcut(df_customer["Total order value"], 5, labels=False)
    return df_customer