maxschulz-COL commited on
Commit
7002c2f
·
verified ·
1 Parent(s): a881520
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/images/kpi_dashboard.gif filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,11 +1,36 @@
1
  ---
2
  title: KPI Demo
3
- emoji: 🏃
4
- colorFrom: pink
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  license: mit
 
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: KPI Demo
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ short_description: Example of a Key Performance Indicator (KPI) dashboard
10
  ---
11
 
12
+ # KPI dashboard
13
+
14
+ This demo dashboard provides an example of a Key Performance Indicator (KPI) dashboard, designed to help users get started and extend further.
15
+ It uses fictional budget data to demonstrate the capabilities of Vizro using real world applications.
16
+
17
+ Special thanks to the [#RWFD Real World Fake Data initiative](https://opendatainitiative.io/), a community project that
18
+ provides high-quality fake data for creating realistic dashboard examples for real-world applications.
19
+
20
+ Note: The data has been additionally edited for the purpose of this example.
21
+
22
+ <img src="./assets/images/kpi_dashboard.gif" alt="Gif to KPI dashboard">
23
+
24
+ ## Possible future iterations
25
+
26
+ - Enable selection of year filter
27
+ - Enable current year vs. past year comparison
28
+ - Enable dynamic KPI Cards
29
+ - Bar - Enable drill-downs from Issue to Sub-issue and Product to Sub-product
30
+ - Bar - Reformat numbers with commas in bar chart
31
+ - Bar - Left-align y-axis labels
32
+ - Bar - Shorten labels
33
+ - Line - Customize function to always show selected year vs. past year
34
+ - Table-view - Check why date format does not work on `Date Received`
35
+ - Table-view - Add icons to `On time?` column
36
+ - Table-view - Improve speed by applying cache or overcome limitation that entire data set is loaded in
app.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Example to show dashboard configuration."""
2
+
3
+ import pandas as pd
4
+ import vizro.models as vm
5
+ from utils._charts import COLUMN_DEFS, KPI, bar, choropleth, line, pie
6
+ from utils._helper import clean_data_and_add_columns
7
+ from vizro import Vizro
8
+ from vizro.actions import filter_interaction
9
+ from vizro.tables import dash_ag_grid
10
+
11
+ # DATA --------------------------------------------------------------------------------------------
12
+ df_complaints = pd.read_csv("https://query.data.world/s/glbdstahsuw3hjgunz3zssggk7dsfu?dws=00000")
13
+ df_complaints = clean_data_and_add_columns(df_complaints)
14
+ vm.Container.add_type("components", KPI)
15
+
16
+ # SUB-SECTIONS ------------------------------------------------------------------------------------
17
+ kpi_banner = vm.Container(
18
+ id="kpi-banner",
19
+ title="",
20
+ components=[
21
+ # Note: For some KPIs the icon/sign go in opposite directions as an increase e.g. in complaints is negative
22
+ KPI(
23
+ title="Total Complaints",
24
+ value="75.513",
25
+ icon="arrow_circle_up",
26
+ sign="delta-neg",
27
+ ref_value="6.8% vs. LY",
28
+ ),
29
+ KPI(
30
+ title="Closed Complaints",
31
+ value="99.6%",
32
+ icon="arrow_circle_up",
33
+ sign="delta-pos",
34
+ ref_value="+0.2% vs. LY",
35
+ ),
36
+ KPI(
37
+ title="Open Complaints",
38
+ value="0.4%",
39
+ icon="arrow_circle_down",
40
+ sign="delta-pos",
41
+ ref_value="-0.2% vs. LY",
42
+ ),
43
+ KPI(
44
+ title="Timely Response",
45
+ value="98.1%",
46
+ icon="arrow_circle_up",
47
+ sign="delta-pos",
48
+ ref_value="+10.5% vs. LY",
49
+ ),
50
+ KPI(
51
+ title="Closed w/o cost",
52
+ value="84.5%",
53
+ icon="arrow_circle_down",
54
+ sign="delta-neg",
55
+ ref_value="-8.5% vs. LY",
56
+ ),
57
+ KPI(
58
+ title="Consumer disputed",
59
+ value="9.5%",
60
+ icon="arrow_circle_up",
61
+ sign="delta-neg",
62
+ ref_value="+2.3% vs. LY",
63
+ ),
64
+ ],
65
+ )
66
+
67
+ bar_charts_tabbed = vm.Tabs(
68
+ tabs=[
69
+ vm.Container(
70
+ title="By Issue",
71
+ components=[
72
+ vm.Graph(
73
+ figure=bar(
74
+ data_frame=df_complaints,
75
+ y="Issue",
76
+ x="Complaint ID",
77
+ ),
78
+ )
79
+ ],
80
+ ),
81
+ vm.Container(
82
+ title="By Product",
83
+ components=[
84
+ vm.Graph(
85
+ figure=bar(
86
+ data_frame=df_complaints,
87
+ y="Product",
88
+ x="Complaint ID",
89
+ ),
90
+ )
91
+ ],
92
+ ),
93
+ vm.Container(
94
+ title="By Channel",
95
+ components=[
96
+ vm.Graph(
97
+ figure=bar(
98
+ data_frame=df_complaints,
99
+ y="Channel",
100
+ x="Complaint ID",
101
+ ),
102
+ )
103
+ ],
104
+ ),
105
+ vm.Container(
106
+ title="By Region",
107
+ components=[
108
+ vm.Graph(
109
+ figure=bar(
110
+ data_frame=df_complaints,
111
+ y="Region",
112
+ x="Complaint ID",
113
+ ),
114
+ )
115
+ ],
116
+ ),
117
+ ],
118
+ )
119
+
120
+ # PAGES --------------------------------------------------------------------------------------
121
+ page_exec = vm.Page(
122
+ title="Executive View",
123
+ layout=vm.Layout(
124
+ grid=[
125
+ [0, 0],
126
+ [1, 2],
127
+ [1, 2],
128
+ [1, 3],
129
+ [1, 3],
130
+ ],
131
+ ),
132
+ components=[
133
+ kpi_banner,
134
+ bar_charts_tabbed,
135
+ vm.Graph(figure=line(data_frame=df_complaints, y="Complaint ID", x="Year-Month Received")),
136
+ vm.Graph(
137
+ figure=pie(
138
+ data_frame=df_complaints[df_complaints["Company response - Closed"] != "Not closed"],
139
+ custom_order=[
140
+ "Closed with explanation",
141
+ "Closed without relief",
142
+ "Closed with non-monetary relief",
143
+ "Closed with relief",
144
+ "Closed with monetary relief",
145
+ ],
146
+ values="Complaint ID",
147
+ names="Company response - Closed",
148
+ title="Closed company responses",
149
+ )
150
+ ),
151
+ ],
152
+ )
153
+
154
+ page_region = vm.Page(
155
+ title="Regional View",
156
+ layout=vm.Layout(grid=[[0, 0]] + [[1, 2]] * 4),
157
+ components=[
158
+ vm.Card(
159
+ text="""
160
+ ##### Click on a state inside the map to filter the bar charts on the right.
161
+
162
+ - Which state has the most complaints?
163
+ - What are the three biggest issues in California?
164
+ - What is the product with the most complaints in Texas?
165
+ """
166
+ ),
167
+ vm.Graph(
168
+ figure=choropleth(
169
+ data_frame=df_complaints,
170
+ locations="State",
171
+ color="Complaint ID",
172
+ title="Complaints by State",
173
+ custom_data=["State"],
174
+ ),
175
+ actions=[
176
+ vm.Action(
177
+ function=filter_interaction(targets=["regional-issue", "regional-product"]),
178
+ )
179
+ ],
180
+ ),
181
+ vm.Tabs(
182
+ tabs=[
183
+ vm.Container(
184
+ title="By Issue",
185
+ components=[
186
+ vm.Graph(
187
+ id="regional-issue",
188
+ figure=bar(
189
+ data_frame=df_complaints,
190
+ y="Issue",
191
+ x="Complaint ID",
192
+ ),
193
+ )
194
+ ],
195
+ ),
196
+ vm.Container(
197
+ title="By Product",
198
+ components=[
199
+ vm.Graph(
200
+ id="regional-product",
201
+ figure=bar(
202
+ data_frame=df_complaints,
203
+ y="Product",
204
+ x="Complaint ID",
205
+ ),
206
+ )
207
+ ],
208
+ ),
209
+ ],
210
+ ),
211
+ ],
212
+ controls=[
213
+ vm.Filter(column="Region", selector=vm.Checklist()),
214
+ vm.Filter(column="State"),
215
+ vm.Filter(column="Product"),
216
+ vm.Filter(column="Issue"),
217
+ ],
218
+ )
219
+
220
+ page_table = vm.Page(
221
+ title="List of complaints",
222
+ components=[
223
+ vm.AgGrid(
224
+ figure=dash_ag_grid(
225
+ data_frame=df_complaints,
226
+ columnDefs=COLUMN_DEFS,
227
+ dashGridOptions={"pagination": True},
228
+ )
229
+ )
230
+ ],
231
+ )
232
+
233
+ dashboard = vm.Dashboard(
234
+ pages=[page_exec, page_region, page_table],
235
+ title="Cumulus Financial Corporation",
236
+ navigation=vm.Navigation(
237
+ nav_selector=vm.NavBar(
238
+ items=[
239
+ vm.NavLink(label="Executive View", icon="Leaderboard", pages=["Executive View"]),
240
+ vm.NavLink(label="Regional View", icon="South America", pages=["Regional View"]),
241
+ vm.NavLink(label="Table View", icon="Table View", pages=["List of complaints"]),
242
+ ]
243
+ )
244
+ ),
245
+ )
246
+
247
+ if __name__ == "__main__":
248
+ Vizro().build(dashboard).run()
assets/css/custom.css ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #page-header {
2
+ padding-left: 4px;
3
+ }
4
+
5
+ .card {
6
+ padding: 8px;
7
+ }
8
+
9
+ .kpi-card-ref {
10
+ min-width: 168px;
11
+ }
12
+
13
+ .kpi-card-ref h4 {
14
+ margin: 0;
15
+ }
16
+
17
+ .kpi-card-ref > span {
18
+ display: flex;
19
+ font-weight: 600;
20
+ gap: 4px;
21
+ }
22
+
23
+ .kpi-card-ref .delta-pos {
24
+ color: #1a85ff;
25
+ }
26
+
27
+ .kpi-card-ref .delta-neg {
28
+ color: #d41159;
29
+ }
30
+
31
+ .kpi-card-ref .material-symbols-outlined {
32
+ font-size: 16px;
33
+ line-height: 20px;
34
+ }
35
+
36
+ .kpi-card-ref > div {
37
+ display: flex;
38
+ flex-direction: row;
39
+ gap: 8px;
40
+ margin: 0;
41
+ }
42
+
43
+ .kpi-card-ref > p {
44
+ align-items: center;
45
+ color: var(--text-secondary);
46
+ display: flex;
47
+ flex-grow: 1;
48
+ font-size: 1rem;
49
+ font-size: 3.6vh;
50
+ font-weight: 600;
51
+ line-height: unset;
52
+ }
53
+
54
+ .kpi-card-ref:has(.delta-pos) {
55
+ border-left: 4px solid #1a85ff;
56
+ }
57
+
58
+ .kpi-card-ref:has(.delta-neg) {
59
+ border-left: 4px solid #d41159;
60
+ }
61
+
62
+ .card ul {
63
+ margin-bottom: 0;
64
+ }
65
+
66
+ #kpi-banner .container__title {
67
+ display: none;
68
+ }
69
+
70
+ #kpi-banner .grid-layout {
71
+ display: flex;
72
+ flex-direction: row;
73
+ overflow: auto;
74
+ }
75
+
76
+ #kpi-banner > ::-webkit-scrollbar-thumb {
77
+ border: 6px solid;
78
+ border-color: var(--main-container-bg-color);
79
+ }
assets/favicon.ico ADDED
assets/images/app.svg ADDED
assets/images/kpi_dashboard.gif ADDED

Git LFS Details

  • SHA256: 142385ec4872ebb32fe7405c8df2c5e93915fde660c053c374e2b27bd0c45ee5
  • Pointer size: 133 Bytes
  • Size of remote file: 15.6 MB
assets/images/logo.svg ADDED
utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Utils folder to contain helper functions and custom charts/components."""
utils/_charts.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Contains custom components and charts used inside the dashboard."""
2
+
3
+ from typing import List, Literal, Optional
4
+
5
+ import dash_bootstrap_components as dbc
6
+ import pandas as pd
7
+ import vizro.models as vm
8
+ import vizro.plotly.express as px
9
+ from dash import html
10
+ from vizro.models.types import capture
11
+
12
+
13
+ # CUSTOM COMPONENTS -------------------------------------------------------------
14
+ # Note: This is a static KPI Card only (it will not be reactive to controls). A new dynamic KPI Card component
15
+ # is currently in development.
16
+ class KPI(vm.VizroBaseModel):
17
+ """Static custom `KPI` Card."""
18
+
19
+ type: Literal["kpi"] = "kpi"
20
+ title: str
21
+ value: str
22
+ icon: str
23
+ sign: Literal["delta-pos", "delta-neg"]
24
+ ref_value: str
25
+
26
+ def build(self):
27
+ return dbc.Card(
28
+ [
29
+ html.H4(self.title),
30
+ html.P(self.value),
31
+ html.Span(
32
+ [
33
+ html.Span(self.icon, className="material-symbols-outlined"),
34
+ html.Span(self.ref_value),
35
+ ],
36
+ className=self.sign,
37
+ ),
38
+ ],
39
+ className="kpi-card-ref",
40
+ )
41
+
42
+
43
+ # CUSTOM CHARTS ----------------------------------------------------------------
44
+ @capture("graph")
45
+ def bar(
46
+ x: str,
47
+ y: str,
48
+ data_frame: pd.DataFrame,
49
+ top_n: int = 15,
50
+ custom_data: Optional[List[str]] = None,
51
+ ):
52
+ df_agg = data_frame.groupby(y).agg({x: "count"}).sort_values(by=x, ascending=False).reset_index()
53
+
54
+ fig = px.bar(
55
+ data_frame=df_agg.head(top_n),
56
+ x=x,
57
+ y=y,
58
+ orientation="h",
59
+ text=x,
60
+ color_discrete_sequence=["#1A85FF"],
61
+ custom_data=custom_data,
62
+ )
63
+ fig.update_layout(xaxis_title="# of Complaints", yaxis=dict(title="", autorange="reversed")) # noqa: C408
64
+ return fig
65
+
66
+
67
+ @capture("graph")
68
+ def line(x: str, y: str, data_frame: pd.DataFrame):
69
+ df_agg = data_frame.groupby(x).agg({y: "count"}).reset_index()
70
+ fig = px.area(
71
+ data_frame=df_agg,
72
+ x=x,
73
+ y=y,
74
+ color_discrete_sequence=["#1A85FF"],
75
+ title="Complaints over time",
76
+ )
77
+ fig.update_layout(xaxis_title="Date Received", yaxis_title="# of Complaints", title_pad_t=4)
78
+ return fig
79
+
80
+
81
+ @capture("graph")
82
+ def pie(
83
+ names: str,
84
+ values: str,
85
+ data_frame: pd.DataFrame = None,
86
+ title: Optional[str] = None,
87
+ custom_order: Optional[List[str]] = None,
88
+ ):
89
+ df_agg = data_frame.groupby(names).agg({values: "count"}).reset_index()
90
+
91
+ # Apply custom order so colors are applied correctly to the pie chart
92
+ order_mapping = {category: index for index, category in enumerate(custom_order)}
93
+ df_sorted = df_agg.sort_values(by=names, key=lambda names: names.map(order_mapping))
94
+
95
+ fig = px.pie(
96
+ data_frame=df_sorted,
97
+ names=names,
98
+ values=values,
99
+ color_discrete_sequence=["#1a85ff", "#7ea1ee", "#adbedc", "#df658c", "#d41159"],
100
+ title=title,
101
+ hole=0.4,
102
+ )
103
+
104
+ fig.update_layout(legend_x=1, legend_y=1, title_pad_t=2, margin=dict(l=0, r=0, t=60, b=0)) # noqa: C408
105
+ fig.update_traces(sort=False)
106
+ return fig
107
+
108
+
109
+ @capture("graph")
110
+ def choropleth(
111
+ locations: str,
112
+ color: str,
113
+ data_frame: pd.DataFrame = None,
114
+ title: Optional[str] = None,
115
+ custom_data: Optional[List[str]] = None,
116
+ ):
117
+ df_agg = data_frame.groupby(locations).agg({color: "count"}).reset_index()
118
+
119
+ fig = px.choropleth(
120
+ data_frame=df_agg,
121
+ locations=locations,
122
+ color=color,
123
+ color_continuous_scale=[
124
+ "#ded6d8",
125
+ "#f3bdcb",
126
+ "#f7a9be",
127
+ "#f894b1",
128
+ "#f780a3",
129
+ "#f46b94",
130
+ "#ee517f",
131
+ "#e94777",
132
+ "#e43d70",
133
+ "#df3168",
134
+ "#d92460",
135
+ "#d41159",
136
+ ],
137
+ scope="usa",
138
+ locationmode="USA-states",
139
+ title=title,
140
+ custom_data=custom_data,
141
+ )
142
+
143
+ fig.update_coloraxes(colorbar={"thickness": 10, "title": {"side": "right"}})
144
+ return fig
145
+
146
+
147
+ # TABLE CONFIGURATIONS ---------------------------------------------------------
148
+ CELL_STYLE = {
149
+ "styleConditions": [
150
+ {
151
+ "condition": "params.value == 'Closed with explanation'",
152
+ "style": {"backgroundColor": "#1a85ff"},
153
+ },
154
+ {
155
+ "condition": "params.value == 'Closed with monetary relief'",
156
+ "style": {"backgroundColor": "#d41159"},
157
+ },
158
+ {
159
+ "condition": "params.value == 'Closed with non-monetary relief'",
160
+ "style": {"backgroundColor": "#adbedc"},
161
+ },
162
+ {
163
+ "condition": "params.value == 'Closed without relief'",
164
+ "style": {"backgroundColor": "#7ea1ee"},
165
+ },
166
+ {
167
+ "condition": "params.value == 'Closed with relief'",
168
+ "style": {"backgroundColor": "#df658c"},
169
+ },
170
+ {
171
+ "condition": "params.value == 'Closed'",
172
+ "style": {"backgroundColor": "#1a85ff"},
173
+ },
174
+ ]
175
+ }
176
+
177
+
178
+ COLUMN_DEFS = [
179
+ {"field": "Complaint ID", "cellDataType": "text", "headerName": "ID", "flex": 3},
180
+ {"field": "Date Received", "cellDataType": "text", "headerName": "Date", "flex": 3},
181
+ {"field": "Channel", "cellDataType": "text", "flex": 3},
182
+ {"field": "State", "cellDataType": "text", "flex": 2},
183
+ {"field": "Product", "cellDataType": "text", "flex": 5},
184
+ {"field": "Issue", "cellDataType": "text", "flex": 5},
185
+ {
186
+ "field": "Company response - detailed",
187
+ "cellDataType": "text",
188
+ "cellStyle": CELL_STYLE,
189
+ "headerName": "Company response",
190
+ "flex": 6,
191
+ },
192
+ {"field": "Timely response?", "cellRenderer": "markdown", "headerName": "On time?", "flex": 3},
193
+ ]
utils/_helper.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Contains helper functions and variables."""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ REGION_MAPPING = {
7
+ **dict.fromkeys(["CT", "ME", "MA", "NH", "RI", "VT", "NJ", "NY", "PA"], "North East"),
8
+ **dict.fromkeys(
9
+ ["IL", "IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD"], "Mid West" # codespell:ignore
10
+ ),
11
+ **dict.fromkeys(
12
+ ["DE", "FL", "GA", "MD", "NC", "SC", "VA", "WV", "DC", "AL", "KY", "MS", "TN", "AR", "LA"], "South"
13
+ ),
14
+ **dict.fromkeys(["AZ", "NM", "OK", "TX"], "South West"),
15
+ **dict.fromkeys(["CO", "ID", "MT", "NV", "UT", "WY", "AK", "CA", "HI", "OR", "WA"], "West"),
16
+ **dict.fromkeys(["UM", "PR", "AP", "VI", "AE", "AS", "GU", "FM", "PW", "MP"], "Other"),
17
+ }
18
+
19
+
20
+ def fill_na_with_random(df, column):
21
+ """Fills missing values in a column with random values from the same column."""
22
+ non_na_values = df[column].dropna().values
23
+ df[column] = df[column].apply(lambda x: np.random.choice(non_na_values) if pd.isna(x) else x)
24
+ return df[column]
25
+
26
+
27
+ def clean_data_and_add_columns(data: pd.DataFrame):
28
+ """Tidies the original data set, adds new columns, and changes cell values for the purpose of this example."""
29
+ data = data.rename(
30
+ columns={
31
+ "Date Sumbited": "Date Submitted",
32
+ "Submitted via": "Channel",
33
+ "Company response to consumer": "Company response - detailed",
34
+ },
35
+ )
36
+
37
+ # Clean cell values and/or assign different values for the purpose of this example
38
+ data["Company response - detailed"] = data["Company response - detailed"].replace("Closed", "Closed without relief")
39
+ data["State"] = data["State"].replace("UNITED STATES MINOR OUTLYING ISLANDS", "UM")
40
+ data["State"] = fill_na_with_random(data, "State")
41
+
42
+ # Convert to correct data type
43
+ data["Date Received"] = pd.to_datetime(data["Date Received"], format="%m/%d/%y").dt.strftime("%Y-%m-%d")
44
+ data["Date Submitted"] = pd.to_datetime(data["Date Submitted"], format="%m/%d/%y").dt.strftime("%Y-%m-%d")
45
+
46
+ # Create additional columns
47
+ data["Year-Month Received"] = pd.to_datetime(data["Date Received"], format="%Y-%m-%d").dt.strftime("%Y-%m")
48
+ data["Region"] = data["State"].map(REGION_MAPPING)
49
+ data["Company response"] = np.where(
50
+ data["Company response - detailed"].str.contains("Closed"), "Closed", data["Company response - detailed"]
51
+ )
52
+ data["Company response - Closed"] = np.where(
53
+ data["Company response - detailed"].str.contains("Closed"), data["Company response - detailed"], "Not closed"
54
+ )
55
+ return data