Spaces:
Sleeping
Sleeping
Add final app
Browse files- .gitignore +144 -0
- Dockerfile +18 -0
- README.md +67 -5
- app.py +448 -0
- assets/css/custom.css +73 -0
- assets/favicon.ico +0 -0
- assets/images/app.svg +9 -0
- assets/images/logo.svg +38 -0
- images/customer-overview.png +0 -0
- images/kpi-overview.png +0 -0
- images/product-overview.png +0 -0
- images/regional-comparison.png +0 -0
- images/regional-map.png +0 -0
- requirements.txt +2 -0
- utils/__init__.py +8 -0
- utils/charts.py +273 -0
- utils/config.py +29 -0
- utils/helper.py +163 -0
.gitignore
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# npm node_modules
|
2 |
+
node_modules/
|
3 |
+
|
4 |
+
# Ipynb
|
5 |
+
ipynb_checkpoints
|
6 |
+
*/.ipynb_checkpoints/*
|
7 |
+
|
8 |
+
# IPython
|
9 |
+
profile_default/
|
10 |
+
ipython_config.py
|
11 |
+
|
12 |
+
# Byte-compiled / optimized / DLL files
|
13 |
+
__pycache__/
|
14 |
+
*.py[cod]
|
15 |
+
*$py.class
|
16 |
+
|
17 |
+
# C extensions
|
18 |
+
*.so
|
19 |
+
|
20 |
+
# Distribution / packaging
|
21 |
+
.Python
|
22 |
+
build/
|
23 |
+
develop-eggs/
|
24 |
+
dist/
|
25 |
+
downloads/
|
26 |
+
eggs/
|
27 |
+
.eggs/
|
28 |
+
lib/
|
29 |
+
lib64/
|
30 |
+
parts/
|
31 |
+
sdist/
|
32 |
+
var/
|
33 |
+
wheels/
|
34 |
+
share/python-wheels/
|
35 |
+
*.egg-info/
|
36 |
+
.installed.cfg
|
37 |
+
*.egg
|
38 |
+
MANIFEST
|
39 |
+
|
40 |
+
# macOS
|
41 |
+
*.DS_Store
|
42 |
+
.DS_Store
|
43 |
+
.AppleDouble
|
44 |
+
.LSOverride
|
45 |
+
.Trashes
|
46 |
+
|
47 |
+
# PyInstaller
|
48 |
+
*.manifest
|
49 |
+
*.spec
|
50 |
+
|
51 |
+
# Installer logs
|
52 |
+
pip-log.txt
|
53 |
+
pip-delete-this-directory.txt
|
54 |
+
|
55 |
+
# Unit test / coverage reports
|
56 |
+
htmlcov/
|
57 |
+
.tox/
|
58 |
+
.nox/
|
59 |
+
.coverage
|
60 |
+
.coverage.*
|
61 |
+
.cache
|
62 |
+
nosetests.xml
|
63 |
+
coverage.xml
|
64 |
+
*.cover
|
65 |
+
*.py,cover
|
66 |
+
.hypothesis/
|
67 |
+
.pytest_cache/
|
68 |
+
cover/
|
69 |
+
|
70 |
+
# Translations
|
71 |
+
*.mo
|
72 |
+
*.pot
|
73 |
+
|
74 |
+
# Django
|
75 |
+
*.log
|
76 |
+
local_settings.py
|
77 |
+
db.sqlite3
|
78 |
+
db.sqlite3-journal
|
79 |
+
|
80 |
+
# Flask
|
81 |
+
instance/
|
82 |
+
.webassets-cache
|
83 |
+
|
84 |
+
# Scrapy
|
85 |
+
.scrapy
|
86 |
+
|
87 |
+
# PyBuilder
|
88 |
+
.pybuilder/
|
89 |
+
target/
|
90 |
+
|
91 |
+
# IntelliJ
|
92 |
+
.idea/
|
93 |
+
*.iml
|
94 |
+
out/
|
95 |
+
.idea_modules/
|
96 |
+
|
97 |
+
# Vscode
|
98 |
+
.vscode/
|
99 |
+
|
100 |
+
# PEP 582
|
101 |
+
__pypackages__/
|
102 |
+
|
103 |
+
# Celery
|
104 |
+
celerybeat-schedule
|
105 |
+
celerybeat.pid
|
106 |
+
|
107 |
+
# SageMath
|
108 |
+
*.sage.py
|
109 |
+
|
110 |
+
# Environments
|
111 |
+
.env
|
112 |
+
.venv
|
113 |
+
env/
|
114 |
+
venv/
|
115 |
+
ENV/
|
116 |
+
env.bak/
|
117 |
+
venv.bak/
|
118 |
+
|
119 |
+
# Spyder
|
120 |
+
.spyderproject
|
121 |
+
.spyproject
|
122 |
+
|
123 |
+
# Rope
|
124 |
+
.ropeproject
|
125 |
+
|
126 |
+
# Mkdocs
|
127 |
+
/site
|
128 |
+
|
129 |
+
# Mypy
|
130 |
+
.mypy_cache/
|
131 |
+
.dmypy.json
|
132 |
+
dmypy.json
|
133 |
+
|
134 |
+
# Pyre
|
135 |
+
.pyre/
|
136 |
+
|
137 |
+
# Pytype
|
138 |
+
.pytype/
|
139 |
+
|
140 |
+
# Cython
|
141 |
+
cython_debug/
|
142 |
+
|
143 |
+
# Ruff
|
144 |
+
.ruff_cache/
|
Dockerfile
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.12
|
5 |
+
|
6 |
+
RUN useradd -m -u 1000 user
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
11 |
+
|
12 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
13 |
+
|
14 |
+
COPY --chown=user . /app
|
15 |
+
|
16 |
+
EXPOSE 7860
|
17 |
+
|
18 |
+
CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:server"]
|
README.md
CHANGED
@@ -1,11 +1,73 @@
|
|
1 |
---
|
2 |
-
title: Dash
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: apache-2.0
|
|
|
9 |
---
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Dash summer app challenge 2024
|
3 |
+
emoji: 📊
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: blue
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: apache-2.0
|
9 |
+
short_description: Entry for the dash summer app challenge 2024
|
10 |
---
|
11 |
|
12 |
+
# 🌞 Dash summer app challenge 2024
|
13 |
+
|
14 |
+
This is my entry for the [Dash summer app challenge 2024](https://community.plotly.com/t/summer-dash-app-building-challenge-amazon-purchases/84396)
|
15 |
+
where I have visualized Amazon purchase history data.
|
16 |
+
|
17 |
+
**Note:** The dashboard focuses on data from 2021 only. Observations with missing product information have been removed for the purpose of the analysis.
|
18 |
+
|
19 |
+
The dashboard showcases how purchase data can be visualized and is divided into four main pages:
|
20 |
+
|
21 |
+
- **Purchase Overview:** Displays key metrics and top performers across four major categories (product category, product items, region, states), with year-to-year comparisons.
|
22 |
+
- **Product Overview:** Highlights the top N best-performing products, allowing users to explore the product hierarchy and identify seasonal patterns. Users can also select the top N for more detailed analysis.
|
23 |
+
- **Regional Overview:** Provides a regional comparison of key metrics and top performers across the four major categories. Users can drill down from region to states to analyze product performance regionally.
|
24 |
+
- **Customer Overview:** Compares various key metrics across different socio-economic categories (e.g., age group, education, income group).
|
25 |
+
|
26 |
+
**Created by:** [Huong Li Nguyen](https://github.com/huong-li-nguyen)
|
27 |
+
|
28 |
+
---
|
29 |
+
|
30 |
+
### 🗓️ Data
|
31 |
+
|
32 |
+
A special thanks to the authors mentioned below for supplying the data set. The original data set can be accessed
|
33 |
+
[here](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/YGLYDY#).
|
34 |
+
|
35 |
+
- **Authors**: Alex Berke and Dan Calacci and Robert Mahari and Takahiro Yabe and Kent Larson and Sandy Pentland
|
36 |
+
- **Publisher**: Harvard Dataverse
|
37 |
+
- **Title**: Open e-commerce 1.0: Five years of crowdsourced U.S. Amazon purchase histories with user demographics
|
38 |
+
- **Publisher**: Harvard Dataverse
|
39 |
+
- **Year**: 2023
|
40 |
+
- **Version**: V1
|
41 |
+
- **URL**: https://doi.org/10.7910/DVN/YGLYDY
|
42 |
+
|
43 |
+
### 📊 Plotly/Dash resources
|
44 |
+
|
45 |
+
- [Bar charts](https://plotly.com/python/bar-charts/)
|
46 |
+
- [Line charts](https://plotly.com/python/line-charts/)
|
47 |
+
- [Density heatmap](https://plotly.com/python/heatmaps/)
|
48 |
+
- [Choropleth maps](https://plotly.com/python/choropleth-maps/)
|
49 |
+
- [Dash AgGrid](https://dash.plotly.com/dash-ag-grid)
|
50 |
+
|
51 |
+
### 🚀 Vizro features applied
|
52 |
+
|
53 |
+
- [Vizro tutorial on pages, layouts and dashboards](https://vizro.readthedocs.io/en/stable/pages/tutorials/explore-components/)
|
54 |
+
- [Graphs](https://vizro.readthedocs.io/en/stable/pages/user-guides/graph/)
|
55 |
+
- [Tables](https://vizro.readthedocs.io/en/stable/pages/user-guides/table/)
|
56 |
+
- [KPI cards](https://vizro.readthedocs.io/en/stable/pages/user-guides/figure/)
|
57 |
+
- [Actions](https://vizro.readthedocs.io/en/stable/pages/user-guides/actions/)
|
58 |
+
- [Custom components](https://vizro.readthedocs.io/en/stable/pages/user-guides/custom-components/)
|
59 |
+
- [Custom charts](https://vizro.readthedocs.io/en/stable/pages/user-guides/custom-charts/)
|
60 |
+
- [Custom CSS](https://vizro.readthedocs.io/en/stable/pages/user-guides/assets/)
|
61 |
+
|
62 |
+
### 🖥️ App demo
|
63 |
+
|
64 |
+
<img src="./images/dash-summer-app.gif" alt="Gif to app" width="600">
|
65 |
+
|
66 |
+
---
|
67 |
+
|
68 |
+
## How to run the example locally
|
69 |
+
|
70 |
+
1. Install the `requirements.txt` in your environment.
|
71 |
+
2. Download the data `survey.csv` and `amazon-purchases.csv` from [here](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/YGLYDY#) and place it in a folder called `data`.
|
72 |
+
3. Run the `app.py` file with your environment activated.
|
73 |
+
4. You should now be able to access the app locally via http://127.0.0.1:8050/.
|
app.py
ADDED
@@ -0,0 +1,448 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Contains Vizro app configuration."""
|
2 |
+
|
3 |
+
import dash_bootstrap_components as dbc
|
4 |
+
import pandas as pd
|
5 |
+
import vizro.models as vm
|
6 |
+
from dash import html
|
7 |
+
from utils.charts import COLUMNDEFS, FlexContainer, bar_avg, bar_top_n, choropleth, line, product_seasonality_heatmap
|
8 |
+
from utils.config import CURRENT_YEAR, LAST_YEAR, ORANGE_SEQUENTIAL_PALETTE
|
9 |
+
from utils.helper import create_customer_df, create_kpi_container, create_kpi_data, tidy_orders_data
|
10 |
+
from vizro import Vizro
|
11 |
+
from vizro.actions import filter_interaction
|
12 |
+
from vizro.tables import dash_ag_grid
|
13 |
+
|
14 |
+
# TIDY AND CREATE RELEVANT DATA SETS ---------------------
|
15 |
+
# Data set below was created by loading original `amazon-purchases.csv` and filtering data on CURRENT_YEAR and LAST_YEAR
|
16 |
+
orders = pd.read_csv(f"data/amazon-purchases.csv")
|
17 |
+
survey = pd.read_csv("data/survey.csv")
|
18 |
+
|
19 |
+
# Tidy and filter the data set on CURRENT_YEAR and LAST_YEAR
|
20 |
+
orders = tidy_orders_data(orders)
|
21 |
+
orders = orders[orders["Year"].isin([str(CURRENT_YEAR), str(LAST_YEAR)])]
|
22 |
+
|
23 |
+
orders_cy = orders[orders["Year"] == str(CURRENT_YEAR)]
|
24 |
+
kpi_overall_df = create_kpi_data(orders)
|
25 |
+
kpi_midwest_df = create_kpi_data(orders.query("Region=='Midwest'"))
|
26 |
+
kpi_northeast_df = create_kpi_data(orders.query("Region=='Northeast'"))
|
27 |
+
kpi_south_df = create_kpi_data(orders.query("Region=='South'"))
|
28 |
+
kpi_west_df = create_kpi_data(orders.query("Region=='West'"))
|
29 |
+
customer_df = create_customer_df(orders_cy, survey)
|
30 |
+
|
31 |
+
# CONFIGURE BIGGER COMPONENTS FOR PAGES -------------------
|
32 |
+
kpi_overall = create_kpi_container(kpi_overall_df, "overall", vm.Layout(grid=[[0, 1], [2, 3]]))
|
33 |
+
kpi_northeast = create_kpi_container(
|
34 |
+
kpi_northeast_df,
|
35 |
+
"northeast",
|
36 |
+
vm.Layout(grid=[[0], [1], [2], [3]], row_gap="8px", col_gap="8px"),
|
37 |
+
)
|
38 |
+
kpi_midwest = create_kpi_container(
|
39 |
+
kpi_midwest_df,
|
40 |
+
"midwest",
|
41 |
+
vm.Layout(grid=[[0], [1], [2], [3]], row_gap="8px", col_gap="8px"),
|
42 |
+
)
|
43 |
+
kpi_south = create_kpi_container(
|
44 |
+
kpi_south_df,
|
45 |
+
"south",
|
46 |
+
vm.Layout(grid=[[0], [1], [2], [3]], row_gap="8px", col_gap="8px"),
|
47 |
+
)
|
48 |
+
kpi_west = create_kpi_container(
|
49 |
+
kpi_west_df,
|
50 |
+
"west",
|
51 |
+
vm.Layout(grid=[[0], [1], [2], [3]], row_gap="8px", col_gap="8px"),
|
52 |
+
)
|
53 |
+
|
54 |
+
|
55 |
+
tabs_overall = vm.Tabs(
|
56 |
+
tabs=[
|
57 |
+
vm.Container(
|
58 |
+
title="By Category",
|
59 |
+
components=[
|
60 |
+
vm.Graph(
|
61 |
+
figure=bar_top_n(
|
62 |
+
data_frame=orders_cy,
|
63 |
+
y="Category",
|
64 |
+
x="Order_Value",
|
65 |
+
),
|
66 |
+
)
|
67 |
+
],
|
68 |
+
),
|
69 |
+
vm.Container(
|
70 |
+
title="By Product",
|
71 |
+
components=[
|
72 |
+
vm.Graph(
|
73 |
+
figure=bar_top_n(
|
74 |
+
data_frame=orders_cy,
|
75 |
+
y="Short_Title",
|
76 |
+
x="Order_Value",
|
77 |
+
),
|
78 |
+
)
|
79 |
+
],
|
80 |
+
),
|
81 |
+
vm.Container(
|
82 |
+
title="By Region",
|
83 |
+
components=[
|
84 |
+
vm.Graph(
|
85 |
+
figure=bar_top_n(
|
86 |
+
data_frame=orders_cy,
|
87 |
+
y="Region",
|
88 |
+
x="Order_Value",
|
89 |
+
),
|
90 |
+
)
|
91 |
+
],
|
92 |
+
),
|
93 |
+
vm.Container(
|
94 |
+
title="By State",
|
95 |
+
components=[
|
96 |
+
vm.Graph(
|
97 |
+
figure=bar_top_n(
|
98 |
+
data_frame=orders_cy,
|
99 |
+
y="Shipping Address State",
|
100 |
+
x="Order_Value",
|
101 |
+
),
|
102 |
+
)
|
103 |
+
],
|
104 |
+
),
|
105 |
+
],
|
106 |
+
)
|
107 |
+
|
108 |
+
|
109 |
+
# CONFIGURE PAGES -------------------
|
110 |
+
page_orders = vm.Page(
|
111 |
+
title="Order summary",
|
112 |
+
layout=vm.Layout(grid=[[0, 1], [2, 1]], col_gap="40px", row_gap="40px"),
|
113 |
+
components=[
|
114 |
+
kpi_overall,
|
115 |
+
vm.Container(title="Top performers 🚀", components=[tabs_overall]),
|
116 |
+
vm.Graph(
|
117 |
+
figure=line(
|
118 |
+
data_frame=orders,
|
119 |
+
x="Month_Day",
|
120 |
+
y="Order_Value",
|
121 |
+
color="Year",
|
122 |
+
title="Performance vs. last year (LY) ⏳",
|
123 |
+
)
|
124 |
+
),
|
125 |
+
],
|
126 |
+
)
|
127 |
+
|
128 |
+
page_region_comparison = vm.Page(
|
129 |
+
title="Regional comparison",
|
130 |
+
layout=vm.Layout(grid=[[0, 1, 2, 3]], row_min_height="800px"),
|
131 |
+
components=[
|
132 |
+
FlexContainer(
|
133 |
+
title="Midwest",
|
134 |
+
components=[
|
135 |
+
vm.Graph(
|
136 |
+
figure=choropleth(
|
137 |
+
data_frame=orders_cy.query('Region=="Midwest"'),
|
138 |
+
locations="Shipping Address State",
|
139 |
+
color="Order_Value",
|
140 |
+
custom_data=["Shipping Address State"],
|
141 |
+
color_continuous_scale=ORANGE_SEQUENTIAL_PALETTE,
|
142 |
+
show_region_only=True,
|
143 |
+
),
|
144 |
+
),
|
145 |
+
kpi_midwest,
|
146 |
+
vm.Graph(
|
147 |
+
id="midwest-bar",
|
148 |
+
figure=bar_top_n(
|
149 |
+
data_frame=orders_cy.query('Region=="Midwest"'),
|
150 |
+
y="Short_Title",
|
151 |
+
x="Order_Value",
|
152 |
+
top_n=6,
|
153 |
+
x_visible=False,
|
154 |
+
title="Top performers 🚀",
|
155 |
+
),
|
156 |
+
),
|
157 |
+
],
|
158 |
+
classname="flex-container-regional",
|
159 |
+
),
|
160 |
+
FlexContainer(
|
161 |
+
title="Northeast",
|
162 |
+
components=[
|
163 |
+
vm.Graph(
|
164 |
+
figure=choropleth(
|
165 |
+
data_frame=orders_cy.query('Region=="Northeast"'),
|
166 |
+
locations="Shipping Address State",
|
167 |
+
color="Order_Value",
|
168 |
+
custom_data=["Shipping Address State"],
|
169 |
+
color_continuous_scale=ORANGE_SEQUENTIAL_PALETTE,
|
170 |
+
show_region_only=True,
|
171 |
+
),
|
172 |
+
),
|
173 |
+
kpi_northeast,
|
174 |
+
vm.Graph(
|
175 |
+
id="northeast-bar",
|
176 |
+
figure=bar_top_n(
|
177 |
+
data_frame=orders_cy.query('Region=="Northeast"'),
|
178 |
+
y="Short_Title",
|
179 |
+
x="Order_Value",
|
180 |
+
top_n=6,
|
181 |
+
x_visible=False,
|
182 |
+
title="Top performers 🚀",
|
183 |
+
),
|
184 |
+
),
|
185 |
+
],
|
186 |
+
classname="flex-container-regional",
|
187 |
+
),
|
188 |
+
FlexContainer(
|
189 |
+
title="South",
|
190 |
+
components=[
|
191 |
+
vm.Graph(
|
192 |
+
figure=choropleth(
|
193 |
+
data_frame=orders_cy.query('Region=="South"'),
|
194 |
+
locations="Shipping Address State",
|
195 |
+
color="Order_Value",
|
196 |
+
custom_data=["Shipping Address State"],
|
197 |
+
color_continuous_scale=ORANGE_SEQUENTIAL_PALETTE,
|
198 |
+
show_region_only=True,
|
199 |
+
),
|
200 |
+
),
|
201 |
+
kpi_south,
|
202 |
+
vm.Graph(
|
203 |
+
id="south-bar",
|
204 |
+
figure=bar_top_n(
|
205 |
+
data_frame=orders_cy.query('Region=="South"'),
|
206 |
+
y="Short_Title",
|
207 |
+
x="Order_Value",
|
208 |
+
top_n=6,
|
209 |
+
x_visible=False,
|
210 |
+
title="Top performers 🚀",
|
211 |
+
),
|
212 |
+
),
|
213 |
+
],
|
214 |
+
classname="flex-container-regional",
|
215 |
+
),
|
216 |
+
FlexContainer(
|
217 |
+
title="West",
|
218 |
+
components=[
|
219 |
+
vm.Graph(
|
220 |
+
figure=choropleth(
|
221 |
+
data_frame=orders_cy.query('Region=="West"'),
|
222 |
+
locations="Shipping Address State",
|
223 |
+
color="Order_Value",
|
224 |
+
custom_data=["Shipping Address State"],
|
225 |
+
color_continuous_scale=ORANGE_SEQUENTIAL_PALETTE,
|
226 |
+
show_region_only=True,
|
227 |
+
),
|
228 |
+
),
|
229 |
+
kpi_west,
|
230 |
+
vm.Graph(
|
231 |
+
id="west-bar",
|
232 |
+
figure=bar_top_n(
|
233 |
+
data_frame=orders_cy.query('Region=="West"'),
|
234 |
+
y="Short_Title",
|
235 |
+
x="Order_Value",
|
236 |
+
top_n=6,
|
237 |
+
x_visible=False,
|
238 |
+
title="Top performers 🚀",
|
239 |
+
),
|
240 |
+
),
|
241 |
+
],
|
242 |
+
classname="flex-container-regional",
|
243 |
+
),
|
244 |
+
],
|
245 |
+
controls=[
|
246 |
+
vm.Parameter(
|
247 |
+
targets=["south-bar.y", "northeast-bar.y", "west-bar.y", "midwest-bar.y"],
|
248 |
+
selector=vm.Dropdown(
|
249 |
+
options=[
|
250 |
+
{"value": "Shipping Address State", "label": "State"},
|
251 |
+
{"value": "Category", "label": "Category"},
|
252 |
+
{"value": "Short_Title", "label": "Product item"},
|
253 |
+
],
|
254 |
+
title="Change y-axis on top performers:",
|
255 |
+
multi=False,
|
256 |
+
value="Shipping Address State",
|
257 |
+
),
|
258 |
+
),
|
259 |
+
],
|
260 |
+
)
|
261 |
+
|
262 |
+
page_region_map = vm.Page(
|
263 |
+
title="Regional map",
|
264 |
+
layout=vm.Layout(grid=[[0, 1]]),
|
265 |
+
components=[
|
266 |
+
vm.Graph(
|
267 |
+
figure=choropleth(
|
268 |
+
data_frame=orders_cy,
|
269 |
+
locations="Shipping Address State",
|
270 |
+
color="Order_Value",
|
271 |
+
title="Total order value by shipping state <br><sup> ⤵ Click on a state to filter the "
|
272 |
+
"charts on the right. Refresh the page to deselect.</sup>",
|
273 |
+
custom_data=["Shipping Address State"],
|
274 |
+
color_continuous_scale=ORANGE_SEQUENTIAL_PALETTE,
|
275 |
+
),
|
276 |
+
actions=[
|
277 |
+
vm.Action(
|
278 |
+
function=filter_interaction(targets=["regional-bar"]),
|
279 |
+
)
|
280 |
+
],
|
281 |
+
),
|
282 |
+
vm.Graph(
|
283 |
+
id="regional-bar",
|
284 |
+
figure=bar_top_n(data_frame=orders_cy, y="Category", x="Order_Value", title="Top performers 🚀"),
|
285 |
+
),
|
286 |
+
],
|
287 |
+
controls=[
|
288 |
+
vm.Filter(column="Region"),
|
289 |
+
vm.Filter(column="Shipping Address State"),
|
290 |
+
vm.Filter(column="Category"),
|
291 |
+
vm.Parameter(
|
292 |
+
targets=["regional-bar.y"],
|
293 |
+
selector=vm.Dropdown(
|
294 |
+
options=[
|
295 |
+
{"value": "Shipping Address State", "label": "State"},
|
296 |
+
{"value": "Category", "label": "Category"},
|
297 |
+
{"value": "Short_Title", "label": "Product item"},
|
298 |
+
],
|
299 |
+
title="Change y-axis on bar chart:",
|
300 |
+
multi=False,
|
301 |
+
value="Category",
|
302 |
+
),
|
303 |
+
),
|
304 |
+
vm.Parameter(
|
305 |
+
targets=["regional-bar.top_n"],
|
306 |
+
selector=vm.Slider(min=10, max=35, step=5, value=15, title="Display top N:"),
|
307 |
+
),
|
308 |
+
],
|
309 |
+
)
|
310 |
+
|
311 |
+
page_product = vm.Page(
|
312 |
+
title="Product overview",
|
313 |
+
layout=vm.Layout(grid=[[0, 0, 1, 1, 1]], col_gap="0px"),
|
314 |
+
components=[
|
315 |
+
vm.Graph(
|
316 |
+
id="product-bar",
|
317 |
+
figure=bar_top_n(
|
318 |
+
data_frame=orders_cy,
|
319 |
+
y="Category",
|
320 |
+
x="Order_Value",
|
321 |
+
title="Top categories / product items by total order value",
|
322 |
+
),
|
323 |
+
),
|
324 |
+
vm.Graph(
|
325 |
+
id="product-heatmap",
|
326 |
+
figure=product_seasonality_heatmap(
|
327 |
+
orders_cy, x="Month", y="Category", z="Order_Value", color_continuous_scale=ORANGE_SEQUENTIAL_PALETTE
|
328 |
+
),
|
329 |
+
),
|
330 |
+
],
|
331 |
+
controls=[
|
332 |
+
vm.Filter(column="Category"),
|
333 |
+
vm.Parameter(
|
334 |
+
targets=["product-bar.y", "product-heatmap.y"],
|
335 |
+
selector=vm.Dropdown(
|
336 |
+
options=[{"value": "Category", "label": "Category"}, {"value": "Short_Title", "label": "Product item"}],
|
337 |
+
title="Change product level:",
|
338 |
+
multi=False,
|
339 |
+
value="Category",
|
340 |
+
),
|
341 |
+
),
|
342 |
+
vm.Parameter(
|
343 |
+
targets=["product-bar.top_n", "product-heatmap.top_n"],
|
344 |
+
selector=vm.Slider(min=10, max=35, step=5, value=15, title="Display top N:"),
|
345 |
+
),
|
346 |
+
vm.Filter(column="Region"),
|
347 |
+
vm.Filter(column="Shipping Address State"),
|
348 |
+
],
|
349 |
+
)
|
350 |
+
|
351 |
+
|
352 |
+
page_customer = vm.Page(
|
353 |
+
title="Customer overview",
|
354 |
+
components=[
|
355 |
+
vm.Graph(
|
356 |
+
id="average-bar",
|
357 |
+
figure=bar_avg(
|
358 |
+
customer_df,
|
359 |
+
x="Q-demos-age",
|
360 |
+
y="Total order value",
|
361 |
+
title="Average KPI numbers across selected categories 📊",
|
362 |
+
),
|
363 |
+
),
|
364 |
+
vm.AgGrid(figure=dash_ag_grid(customer_df, columnDefs=COLUMNDEFS, dashGridOptions={"pagination": True})),
|
365 |
+
],
|
366 |
+
controls=[
|
367 |
+
vm.Parameter(
|
368 |
+
targets=["average-bar.x"],
|
369 |
+
selector=vm.Dropdown(
|
370 |
+
options=[
|
371 |
+
{"value": "Q-demos-age", "label": "Age group"},
|
372 |
+
{"value": "Q-demos-education", "label": "Education level"},
|
373 |
+
{"value": "Q-demos-income", "label": "Income group"},
|
374 |
+
{"value": "Q-demos-gender", "label": "Gender"},
|
375 |
+
],
|
376 |
+
title="Change category on x-axis",
|
377 |
+
multi=False,
|
378 |
+
value="Q-demos-age",
|
379 |
+
),
|
380 |
+
),
|
381 |
+
vm.Parameter(
|
382 |
+
targets=["average-bar.y"],
|
383 |
+
selector=vm.Dropdown(
|
384 |
+
options=[
|
385 |
+
"Total order value",
|
386 |
+
"Number of unique categories",
|
387 |
+
"Number of unique products",
|
388 |
+
"Number of unique order dates",
|
389 |
+
"Total units ordered",
|
390 |
+
"Avg. unit price",
|
391 |
+
"Avg. order value",
|
392 |
+
],
|
393 |
+
title="Change KPI on y-axis",
|
394 |
+
multi=False,
|
395 |
+
value="Total order value",
|
396 |
+
),
|
397 |
+
),
|
398 |
+
],
|
399 |
+
)
|
400 |
+
|
401 |
+
# CONFIGURE DASHBOARD WITH PAGES AND NAVIGATION -------------------
|
402 |
+
dashboard = vm.Dashboard(
|
403 |
+
pages=[page_orders, page_product, page_region_comparison, page_region_map, page_customer],
|
404 |
+
navigation=vm.Navigation(
|
405 |
+
nav_selector=vm.NavBar(
|
406 |
+
items=[
|
407 |
+
vm.NavLink(label="Order summary", pages=["Order summary"], icon="Home"),
|
408 |
+
vm.NavLink(
|
409 |
+
label="Product overview",
|
410 |
+
pages=["Product overview"],
|
411 |
+
icon="Shopping Cart",
|
412 |
+
),
|
413 |
+
vm.NavLink(
|
414 |
+
label="Regional overview",
|
415 |
+
pages=["Regional comparison", "Regional map"],
|
416 |
+
icon="South America",
|
417 |
+
),
|
418 |
+
vm.NavLink(
|
419 |
+
label="Customer overview",
|
420 |
+
pages=["Customer overview"],
|
421 |
+
icon="Groups",
|
422 |
+
),
|
423 |
+
]
|
424 |
+
)
|
425 |
+
),
|
426 |
+
title=f"Purchase history dashboard: {CURRENT_YEAR}",
|
427 |
+
)
|
428 |
+
|
429 |
+
app = Vizro().build(dashboard)
|
430 |
+
|
431 |
+
# Add footer
|
432 |
+
app.dash.layout.children.append(
|
433 |
+
html.Div(
|
434 |
+
[
|
435 |
+
dbc.NavLink("🌸 Created by Li Nguyen", href="https://github.com/huong-li-nguyen"),
|
436 |
+
dbc.NavLink("💻 Code", href=""),
|
437 |
+
dbc.NavLink(
|
438 |
+
"💾 Data", href="https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/YGLYDY"
|
439 |
+
),
|
440 |
+
],
|
441 |
+
className="anchor-container",
|
442 |
+
)
|
443 |
+
)
|
444 |
+
server = app.dash.server
|
445 |
+
|
446 |
+
|
447 |
+
if __name__ == "__main__":
|
448 |
+
app.run()
|
assets/css/custom.css
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#logo {
|
2 |
+
padding: 12px 0 8px;
|
3 |
+
}
|
4 |
+
|
5 |
+
#logo-and-title {
|
6 |
+
gap: 16px;
|
7 |
+
}
|
8 |
+
|
9 |
+
#dashboard-title {
|
10 |
+
margin-bottom: 0.5rem;
|
11 |
+
}
|
12 |
+
|
13 |
+
#left-main {
|
14 |
+
width: 300px;
|
15 |
+
}
|
16 |
+
|
17 |
+
.card-kpi .card-footer {
|
18 |
+
font-size: 1rem;
|
19 |
+
}
|
20 |
+
|
21 |
+
.card-kpi .card-footer .material-symbols-outlined {
|
22 |
+
font-size: 1.25rem;
|
23 |
+
}
|
24 |
+
|
25 |
+
.flex-container-regional {
|
26 |
+
background: var(--surfaces-bg-02);
|
27 |
+
display: flex;
|
28 |
+
flex-direction: column;
|
29 |
+
gap: 20px;
|
30 |
+
padding: 12px;
|
31 |
+
}
|
32 |
+
|
33 |
+
.flex-container-regional .card-kpi .material-symbols-outlined {
|
34 |
+
font-size: 1.2rem;
|
35 |
+
}
|
36 |
+
|
37 |
+
.flex-container-regional .card-kpi .card-footer {
|
38 |
+
font-size: 0.8rem;
|
39 |
+
}
|
40 |
+
|
41 |
+
.flex-container-regional .card-kpi .card-footer .material-symbols-outlined {
|
42 |
+
font-size: 1rem;
|
43 |
+
}
|
44 |
+
|
45 |
+
.flex-container-regional .card-kpi,
|
46 |
+
.flex-container-regional .card {
|
47 |
+
height: unset;
|
48 |
+
padding: 0.5rem;
|
49 |
+
}
|
50 |
+
|
51 |
+
.flex-container-regional .card-kpi .card-body {
|
52 |
+
font-size: 2vh;
|
53 |
+
padding: 0.25rem;
|
54 |
+
}
|
55 |
+
|
56 |
+
.flex-container-regional .card-kpi-title {
|
57 |
+
font-size: 1rem;
|
58 |
+
}
|
59 |
+
|
60 |
+
.anchor-container {
|
61 |
+
background: var(--main-container-bg-color);
|
62 |
+
border-top: 1px solid var(--border-subtleAlpha01);
|
63 |
+
bottom: 0;
|
64 |
+
display: flex;
|
65 |
+
font-size: 11px;
|
66 |
+
font-weight: 600;
|
67 |
+
gap: 2rem;
|
68 |
+
height: 24px;
|
69 |
+
padding: 2px 12px;
|
70 |
+
place-content: baseline right;
|
71 |
+
position: fixed;
|
72 |
+
right: 0;
|
73 |
+
}
|
assets/favicon.ico
ADDED
|
assets/images/app.svg
ADDED
|
assets/images/logo.svg
ADDED
|
images/customer-overview.png
ADDED
![]() |
images/kpi-overview.png
ADDED
![]() |
images/product-overview.png
ADDED
![]() |
images/regional-comparison.png
ADDED
![]() |
images/regional-map.png
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
vizro
|
2 |
+
gunicorn
|
utils/__init__.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Contains helper functions and custom chart/components configurations.
|
2 |
+
|
3 |
+
For more information, see:
|
4 |
+
|
5 |
+
- Custom charts: https://vizro.readthedocs.io/en/stable/pages/user-guides/custom-charts/
|
6 |
+
- Custom components: https://vizro.readthedocs.io/en/stable/pages/user-guides/custom-components/
|
7 |
+
|
8 |
+
"""
|
utils/charts.py
ADDED
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Contains code for custom charts and table configurations."""
|
2 |
+
|
3 |
+
from typing import List, Literal, Optional
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
import vizro.models as vm
|
7 |
+
import vizro.plotly.express as px
|
8 |
+
from dash import html
|
9 |
+
from vizro.models.types import capture
|
10 |
+
|
11 |
+
from .config import PRIMARY_COLOR, SECONDARY_COLOR
|
12 |
+
from .helper import shorten_product_name
|
13 |
+
|
14 |
+
|
15 |
+
# CUSTOM CHARTS ---------------
|
16 |
+
@capture("graph")
|
17 |
+
def bar_top_n(
|
18 |
+
data_frame: pd.DataFrame,
|
19 |
+
x: str,
|
20 |
+
y: str,
|
21 |
+
top_n: int = 15,
|
22 |
+
custom_data: Optional[List[str]] = None,
|
23 |
+
title: Optional[str] = None,
|
24 |
+
x_visible: bool = True,
|
25 |
+
):
|
26 |
+
"""Custom bar chart implementation.
|
27 |
+
|
28 |
+
Based on [px.bar](https://plotly.com/python-api-reference/generated/plotly.express.bar).
|
29 |
+
"""
|
30 |
+
df_agg = data_frame.groupby(y).agg({x: "sum"}).sort_values(by=x, ascending=False).reset_index()
|
31 |
+
fig = px.bar(
|
32 |
+
data_frame=df_agg.head(top_n),
|
33 |
+
x=x,
|
34 |
+
y=y,
|
35 |
+
orientation="h",
|
36 |
+
text_auto=".3s",
|
37 |
+
color_discrete_sequence=[SECONDARY_COLOR],
|
38 |
+
custom_data=custom_data,
|
39 |
+
)
|
40 |
+
fig.update_layout(
|
41 |
+
title=title,
|
42 |
+
xaxis={"title": "Total order value in USD", "visible": x_visible},
|
43 |
+
yaxis={"title": "", "autorange": "reversed"},
|
44 |
+
margin={"r": 0, "b": 16, "t": 32},
|
45 |
+
paper_bgcolor="rgba(0, 0, 0, 0)",
|
46 |
+
plot_bgcolor="rgba(0, 0, 0, 0)",
|
47 |
+
)
|
48 |
+
return fig
|
49 |
+
|
50 |
+
|
51 |
+
@capture("graph")
|
52 |
+
def line(data_frame: pd.DataFrame, x: str, y: str, color: str, title: Optional[str] = None):
|
53 |
+
"""Custom line chart implementation.
|
54 |
+
|
55 |
+
Based on [px.line](https://plotly.com/python-api-reference/generated/plotly.express.line).
|
56 |
+
"""
|
57 |
+
df_agg = data_frame.groupby([x, color]).agg({y: "sum"}).reset_index()
|
58 |
+
|
59 |
+
# Create full order date for correct sorting
|
60 |
+
df_agg["order_date_sort"] = pd.to_datetime(df_agg[x] + "-" + df_agg[color], format="%b-%d-%Y")
|
61 |
+
df_agg = df_agg.sort_values(by="order_date_sort")
|
62 |
+
|
63 |
+
fig = px.line(
|
64 |
+
data_frame=df_agg,
|
65 |
+
x=x,
|
66 |
+
y=y,
|
67 |
+
color=color,
|
68 |
+
color_discrete_sequence=[SECONDARY_COLOR, PRIMARY_COLOR],
|
69 |
+
)
|
70 |
+
fig.update_layout(
|
71 |
+
title=title,
|
72 |
+
xaxis={"title": "", "nticks": 12, "showgrid": False},
|
73 |
+
yaxis_title="Total order value in USD",
|
74 |
+
legend_title="",
|
75 |
+
)
|
76 |
+
return fig
|
77 |
+
|
78 |
+
|
79 |
+
@capture("graph")
|
80 |
+
def product_seasonality_heatmap(
|
81 |
+
data_frame: pd.DataFrame,
|
82 |
+
x: str,
|
83 |
+
y: str,
|
84 |
+
z: str,
|
85 |
+
top_n: int = 15,
|
86 |
+
color_continuous_scale: Optional[List[str]] = None,
|
87 |
+
):
|
88 |
+
"""Custom density heatmap implementation.
|
89 |
+
|
90 |
+
Based on [px.density_heatmap](https://plotly.com/python-api-reference/generated/plotly.express.density_heatmap).
|
91 |
+
"""
|
92 |
+
# Filter for top n categories
|
93 |
+
top_products = (
|
94 |
+
data_frame.groupby([y]).agg({z: "sum"}).sort_values(by=z, ascending=False).reset_index().head(top_n)[y]
|
95 |
+
)
|
96 |
+
df_filtered = data_frame[data_frame[y].isin(top_products)]
|
97 |
+
|
98 |
+
# Get average order value per category and month
|
99 |
+
df_agg = df_filtered.groupby([x, y]).agg({z: "sum"}).sort_values(by=z, ascending=False).reset_index()
|
100 |
+
|
101 |
+
fig = px.density_heatmap(
|
102 |
+
data_frame=df_agg,
|
103 |
+
x=x,
|
104 |
+
y=y,
|
105 |
+
z=z,
|
106 |
+
text_auto=".2s",
|
107 |
+
nbinsx=12,
|
108 |
+
color_continuous_scale=color_continuous_scale,
|
109 |
+
)
|
110 |
+
|
111 |
+
fig.update_coloraxes(colorbar_title="")
|
112 |
+
fig.update_yaxes(categoryorder="array", categoryarray=top_products)
|
113 |
+
fig.update_layout(
|
114 |
+
title={"text": f"Seasonality of {top_n} categories / products", "pad_l": 0, "pad_r": 0},
|
115 |
+
yaxis={"title": "", "autorange": "reversed", "visible": False},
|
116 |
+
xaxis={
|
117 |
+
"title": "",
|
118 |
+
"showgrid": False,
|
119 |
+
"tickmode": "array",
|
120 |
+
"tickvals": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
|
121 |
+
"ticktext": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
|
122 |
+
},
|
123 |
+
margin={"l": 0, "r": 0, "t": 32, "b": 0},
|
124 |
+
)
|
125 |
+
return fig
|
126 |
+
|
127 |
+
|
128 |
+
@capture("graph")
|
129 |
+
def choropleth(
|
130 |
+
locations: str,
|
131 |
+
color: str,
|
132 |
+
data_frame: pd.DataFrame = None,
|
133 |
+
title: Optional[str] = None,
|
134 |
+
custom_data: Optional[List[str]] = None,
|
135 |
+
color_continuous_scale: Optional[List[str]] = None,
|
136 |
+
show_region_only: bool = False,
|
137 |
+
):
|
138 |
+
"""Custom choropleth implementation.
|
139 |
+
|
140 |
+
Based on [px.choropleth](https://plotly.com/python-api-reference/generated/plotly.express.choropleth).
|
141 |
+
"""
|
142 |
+
df_agg = data_frame.groupby(locations).agg({color: "sum"}).reset_index()
|
143 |
+
fig = px.choropleth(
|
144 |
+
data_frame=df_agg,
|
145 |
+
locations=locations,
|
146 |
+
color=color,
|
147 |
+
color_continuous_scale=color_continuous_scale,
|
148 |
+
scope="usa",
|
149 |
+
locationmode="USA-states",
|
150 |
+
title=title,
|
151 |
+
custom_data=custom_data,
|
152 |
+
)
|
153 |
+
fig.update_coloraxes(colorbar={"thickness": 10, "title": ""})
|
154 |
+
fig.update_layout(geo_bgcolor="rgba(0,0,0,0)")
|
155 |
+
|
156 |
+
if show_region_only:
|
157 |
+
fig.update_geos(
|
158 |
+
fitbounds="locations",
|
159 |
+
visible=False,
|
160 |
+
projection_scale=10,
|
161 |
+
)
|
162 |
+
fig.update_layout(
|
163 |
+
{
|
164 |
+
"coloraxis_showscale": False,
|
165 |
+
"margin": {"t": 0, "b": 0, "r": 0, "l": 0},
|
166 |
+
"title_pad": {"t": 0, "b": 0, "r": 0, "l": 0},
|
167 |
+
"height": 160,
|
168 |
+
}
|
169 |
+
)
|
170 |
+
return fig
|
171 |
+
|
172 |
+
|
173 |
+
@capture("graph")
|
174 |
+
def bar_avg(data_frame: pd.DataFrame, x: str, y: str, title: Optional[str] = None):
|
175 |
+
"""Custom bar chart implementation.
|
176 |
+
|
177 |
+
Based on [px.bar](https://plotly.com/python-api-reference/generated/plotly.express.bar).
|
178 |
+
"""
|
179 |
+
df_agg = data_frame.groupby(x).agg({y: "mean"}).sort_values(by=y, ascending=False).reset_index()
|
180 |
+
df_agg[x] = df_agg[x].apply(shorten_product_name)
|
181 |
+
fig = px.bar(
|
182 |
+
data_frame=df_agg,
|
183 |
+
x=x,
|
184 |
+
y=y,
|
185 |
+
text_auto=".0f",
|
186 |
+
color_discrete_sequence=[SECONDARY_COLOR],
|
187 |
+
)
|
188 |
+
fig.update_layout(
|
189 |
+
title=title,
|
190 |
+
xaxis={"title": ""},
|
191 |
+
yaxis={"title": f"{y}"},
|
192 |
+
)
|
193 |
+
if x == "Q-demos-income":
|
194 |
+
fig.update_xaxes(
|
195 |
+
categoryorder="array",
|
196 |
+
categoryarray=[
|
197 |
+
"Less than $25,000",
|
198 |
+
"$25,000 - $49,999",
|
199 |
+
"$50,000 - $74,999",
|
200 |
+
"$75,000 - $99,999",
|
201 |
+
"$100,000 - $149,999",
|
202 |
+
"$150,000 or more",
|
203 |
+
"Prefer not to say",
|
204 |
+
],
|
205 |
+
)
|
206 |
+
else:
|
207 |
+
fig.update_xaxes(categoryorder="category ascending")
|
208 |
+
return fig
|
209 |
+
|
210 |
+
|
211 |
+
# CUSTOM COMPONENTS -----------
|
212 |
+
class FlexContainer(vm.Container):
|
213 |
+
"""Custom flex `Container`."""
|
214 |
+
|
215 |
+
type: Literal["flex_container"] = "flex_container"
|
216 |
+
classname: Optional[str] = "d-flex"
|
217 |
+
|
218 |
+
def build(self):
|
219 |
+
"""Returns a flex container."""
|
220 |
+
components_container = [component.build() for component in self.components]
|
221 |
+
|
222 |
+
return html.Div(
|
223 |
+
id=self.id,
|
224 |
+
children=[html.H3(children=self.title, className="container__title"), *components_container],
|
225 |
+
className=self.classname,
|
226 |
+
)
|
227 |
+
|
228 |
+
|
229 |
+
vm.Page.add_type("components", FlexContainer)
|
230 |
+
|
231 |
+
|
232 |
+
# TABLE SPECIFICATIONS ----
|
233 |
+
CELL_STYLE = {
|
234 |
+
"styleConditions": [
|
235 |
+
{
|
236 |
+
"condition": "params.data.Quintiles == 0",
|
237 |
+
"style": {"backgroundColor": "#ffc495"},
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"condition": "params.data.Quintiles == 1",
|
241 |
+
"style": {"backgroundColor": "#ffb276"},
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"condition": "params.data.Quintiles == 2",
|
245 |
+
"style": {"backgroundColor": "#fe9f56"},
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"condition": "params.data.Quintiles == 3",
|
249 |
+
"style": {"backgroundColor": "#fb8d35"},
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"condition": "params.data.Quintiles == 4",
|
253 |
+
"style": {"backgroundColor": "#f77a00"},
|
254 |
+
},
|
255 |
+
],
|
256 |
+
}
|
257 |
+
|
258 |
+
COLUMNDEFS = [
|
259 |
+
{"field": "Survey ResponseID", "cellDataType": "text"},
|
260 |
+
{"field": "Q-demos-age", "cellDataType": "text"},
|
261 |
+
{"field": "Q-demos-education", "cellDataType": "text"},
|
262 |
+
{"field": "Q-demos-income", "cellDataType": "text"},
|
263 |
+
{"field": "Q-demos-gender", "cellDataType": "text"},
|
264 |
+
{"field": "Total order value", "cellDataType": "dollar", "cellStyle": CELL_STYLE},
|
265 |
+
{"field": "Quintiles"},
|
266 |
+
{"field": "Avg unit price", "cellDataType": "dollar"},
|
267 |
+
{"field": "Avg order value", "cellDataType": "dollar"},
|
268 |
+
{"field": "Total units ordered"},
|
269 |
+
{"field": "Number of unique categories"},
|
270 |
+
{"field": "Number of unique products"},
|
271 |
+
{"field": "Number of unique order dates"},
|
272 |
+
{"field": "pop", "flex": 3},
|
273 |
+
]
|
utils/config.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Contains configuration constants to be reused inside dashboard."""
|
2 |
+
|
3 |
+
PRIMARY_COLOR = "#FF9900"
|
4 |
+
SECONDARY_COLOR = "#bfbdc1"
|
5 |
+
CURRENT_YEAR = 2021
|
6 |
+
LAST_YEAR = CURRENT_YEAR - 1
|
7 |
+
REGION_MAPPING = {
|
8 |
+
**dict.fromkeys(["CT", "ME", "MA", "NH", "RI", "VT", "NJ", "NY", "PA"], "Northeast"),
|
9 |
+
**dict.fromkeys(["IL", "IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD"], "Midwest"),
|
10 |
+
**dict.fromkeys(
|
11 |
+
["DE", "FL", "GA", "MD", "NC", "SC", "VA", "WV", "DC", "AL", "KY", "MS", "TN", "AR", "LA", "TX", "OK"], "South"
|
12 |
+
),
|
13 |
+
**dict.fromkeys(["AZ", "NM", "CO", "ID", "MT", "NV", "UT", "WY", "CA", "OR", "WA"], "West"),
|
14 |
+
**dict.fromkeys(["UM", "PR", "AP", "VI", "AE", "AS", "GU", "FM", "PW", "MP"], "Other"),
|
15 |
+
}
|
16 |
+
NULL_VALUE = "Unknown"
|
17 |
+
ORANGE_SEQUENTIAL_PALETTE = ["#ffffff", "#dedce0", "#f0c8aa", "#f9b578", "#fca14b", "#fb8e22", "#f77a00"]
|
18 |
+
SELECTED_CUSTOMER_COLUMNS = [
|
19 |
+
"Survey ResponseID",
|
20 |
+
"Q-demos-age",
|
21 |
+
"Q-demos-education",
|
22 |
+
"Q-demos-income",
|
23 |
+
"Q-demos-gender",
|
24 |
+
"Order_Value",
|
25 |
+
"Category",
|
26 |
+
"ASIN/ISBN (Product Code)",
|
27 |
+
"Order Date",
|
28 |
+
"Quantity",
|
29 |
+
]
|
utils/helper.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Contains helper functions to tidy/format data and refactor bigger components on screen."""
|
2 |
+
|
3 |
+
import pandas as pd
|
4 |
+
import vizro.models as vm
|
5 |
+
from vizro.figures import kpi_card_reference
|
6 |
+
|
7 |
+
from .config import NULL_VALUE, REGION_MAPPING, SELECTED_CUSTOMER_COLUMNS
|
8 |
+
|
9 |
+
|
10 |
+
def shorten_product_name(name, n_words=4):
|
11 |
+
"""Shortens product labels by reducing it to the first n words."""
|
12 |
+
words = str(name).split()
|
13 |
+
return " ".join(words[:n_words])
|
14 |
+
|
15 |
+
|
16 |
+
def tidy_orders_data(orders: pd.DataFrame):
|
17 |
+
"""Tidies and filters the data frame and creates additional columns."""
|
18 |
+
# Remove rows where 'Category' is missing
|
19 |
+
orders = orders[orders["Category"].notna()]
|
20 |
+
orders = orders.dropna(subset=["Order Date"])
|
21 |
+
|
22 |
+
# Convert to correct data types
|
23 |
+
orders["Order Date"] = pd.to_datetime(orders["Order Date"], format="%Y-%m-%d", errors="coerce")
|
24 |
+
|
25 |
+
# Create new columns
|
26 |
+
orders = orders.assign(
|
27 |
+
Region=orders["Shipping Address State"].map(REGION_MAPPING).fillna(NULL_VALUE),
|
28 |
+
Order_Value=orders["Purchase Price Per Unit"] * orders["Quantity"],
|
29 |
+
Short_Title=orders["Title"].apply(shorten_product_name),
|
30 |
+
Year=orders["Order Date"].dt.year.astype(str),
|
31 |
+
Month_Day=orders["Order Date"].dt.strftime("%b-%d"),
|
32 |
+
Month=orders["Order Date"].dt.month,
|
33 |
+
)
|
34 |
+
|
35 |
+
# Fill null values
|
36 |
+
orders["Shipping Address State"] = orders["Shipping Address State"].fillna(NULL_VALUE)
|
37 |
+
return orders
|
38 |
+
|
39 |
+
|
40 |
+
def create_kpi_data(orders):
|
41 |
+
"""Calculate KPIs and create correct data format."""
|
42 |
+
df_kpi = (
|
43 |
+
orders.groupby("Year")
|
44 |
+
.agg({"Order_Value": "sum", "Survey ResponseID": pd.Series.nunique, "Quantity": "sum"})
|
45 |
+
.reset_index()
|
46 |
+
)
|
47 |
+
|
48 |
+
df_kpi = df_kpi.rename(
|
49 |
+
columns={
|
50 |
+
"Order_Value": "Total order value",
|
51 |
+
"Survey ResponseID": "Number of customers",
|
52 |
+
"Quantity": "Total units ordered",
|
53 |
+
}
|
54 |
+
)
|
55 |
+
|
56 |
+
df_kpi["Avg product unit price"] = df_kpi["Total order value"] / df_kpi["Total units ordered"]
|
57 |
+
df_kpi["Total order value mil"] = df_kpi["Total order value"] / 1000000
|
58 |
+
|
59 |
+
df_kpi["index"] = 0
|
60 |
+
df_kpi = df_kpi.pivot(
|
61 |
+
index="index",
|
62 |
+
columns="Year",
|
63 |
+
values=[
|
64 |
+
"Total order value mil",
|
65 |
+
"Number of customers",
|
66 |
+
"Total units ordered",
|
67 |
+
"Avg product unit price",
|
68 |
+
],
|
69 |
+
)
|
70 |
+
df_kpi.columns = [f"{kpi}_{year}" for kpi, year in df_kpi.columns]
|
71 |
+
return df_kpi
|
72 |
+
|
73 |
+
|
74 |
+
def create_kpi_container(data_frame: pd.DataFrame, id: str, layout: vm.Layout):
|
75 |
+
"""Creates reusable KPI container configuration."""
|
76 |
+
container = vm.Container(
|
77 |
+
id=f"kpi-container-{id}",
|
78 |
+
title="",
|
79 |
+
layout=layout if layout else None,
|
80 |
+
components=[
|
81 |
+
vm.Figure(
|
82 |
+
figure=kpi_card_reference(
|
83 |
+
data_frame,
|
84 |
+
value_column="Total order value mil_2021",
|
85 |
+
reference_column="Total order value mil_2020",
|
86 |
+
title="Total order value",
|
87 |
+
value_format="${value:.2f}M",
|
88 |
+
reference_format="{delta_relative:+.1%} vs. LY (${reference:.2f}M)",
|
89 |
+
icon="payments",
|
90 |
+
),
|
91 |
+
),
|
92 |
+
vm.Figure(
|
93 |
+
figure=kpi_card_reference(
|
94 |
+
data_frame,
|
95 |
+
value_column="Number of customers_2021",
|
96 |
+
reference_column="Number of customers_2020",
|
97 |
+
title="No. of customers",
|
98 |
+
value_format="{value:,.0f}",
|
99 |
+
reference_format="{delta_relative:+.1%} vs. LY ({reference:,.0f})",
|
100 |
+
icon="groups",
|
101 |
+
)
|
102 |
+
),
|
103 |
+
vm.Figure(
|
104 |
+
figure=kpi_card_reference(
|
105 |
+
data_frame,
|
106 |
+
value_column="Total units ordered_2021",
|
107 |
+
reference_column="Total units ordered_2020",
|
108 |
+
title="Total units ordered",
|
109 |
+
value_format="{value:,.0f}",
|
110 |
+
reference_format="{delta_relative:+.1%} vs. LY ({reference:,.0f})",
|
111 |
+
icon="production_quantity_limits",
|
112 |
+
)
|
113 |
+
),
|
114 |
+
vm.Figure(
|
115 |
+
figure=kpi_card_reference(
|
116 |
+
data_frame,
|
117 |
+
value_column="Avg product unit price_2021",
|
118 |
+
reference_column="Avg product unit price_2020",
|
119 |
+
title="Avg. unit price",
|
120 |
+
value_format="${value:.2f}",
|
121 |
+
reference_format="{delta_relative:+.1%} vs. LY (${reference:.2f})",
|
122 |
+
icon="price_change",
|
123 |
+
)
|
124 |
+
),
|
125 |
+
],
|
126 |
+
)
|
127 |
+
return container
|
128 |
+
|
129 |
+
|
130 |
+
def create_customer_df(orders_cy: pd.DataFrame, survey: pd.DataFrame):
|
131 |
+
"""Creates aggregated customer dataframe with socioeconomic columns added."""
|
132 |
+
df_customer = (
|
133 |
+
orders_cy.groupby(["Year", "Survey ResponseID", "Region", "Shipping Address State"])
|
134 |
+
.agg(
|
135 |
+
{
|
136 |
+
"Order_Value": "sum",
|
137 |
+
"Category": pd.Series.nunique,
|
138 |
+
"ASIN/ISBN (Product Code)": pd.Series.nunique,
|
139 |
+
"Order Date": pd.Series.nunique,
|
140 |
+
"Quantity": "sum",
|
141 |
+
}
|
142 |
+
)
|
143 |
+
.reset_index()
|
144 |
+
)
|
145 |
+
df_customer = df_customer.merge(survey, on="Survey ResponseID", how="left")
|
146 |
+
|
147 |
+
# Filter and rename columns for better understanding
|
148 |
+
df_customer = df_customer[SELECTED_CUSTOMER_COLUMNS]
|
149 |
+
df_customer = df_customer.rename(
|
150 |
+
columns={
|
151 |
+
"Order_Value": "Total order value",
|
152 |
+
"Category": "Number of unique categories",
|
153 |
+
"ASIN/ISBN (Product Code)": "Number of unique products",
|
154 |
+
"Order Date": "Number of unique order dates",
|
155 |
+
"Quantity": "Total units ordered",
|
156 |
+
}
|
157 |
+
)
|
158 |
+
|
159 |
+
# Create new metrics
|
160 |
+
df_customer["Avg unit price"] = df_customer["Total order value"] / df_customer["Total units ordered"]
|
161 |
+
df_customer["Avg order value"] = df_customer["Total order value"] / df_customer["Number of unique order dates"]
|
162 |
+
df_customer["Quintiles"] = pd.qcut(df_customer["Total order value"], 5, labels=False)
|
163 |
+
return df_customer
|