File size: 16,742 Bytes
f8ab25d
 
638eb56
f8ab25d
638eb56
 
f8ab25d
638eb56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8ab25d
638eb56
 
 
f8ab25d
638eb56
 
f8ab25d
638eb56
 
 
 
 
 
 
 
 
 
f8ab25d
638eb56
 
f8ab25d
638eb56
 
 
 
f8ab25d
638eb56
 
 
f8ab25d
638eb56
 
 
f8ab25d
638eb56
 
 
f8ab25d
 
638eb56
 
 
 
 
 
f8ab25d
638eb56
 
f8ab25d
638eb56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e3c2c5
 
 
638eb56
9e3c2c5
 
 
 
638eb56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e3c2c5
 
638eb56
9e3c2c5
 
638eb56
9e3c2c5
 
638eb56
 
 
 
9e3c2c5
 
 
638eb56
9e3c2c5
638eb56
 
 
9e3c2c5
 
638eb56
 
 
9e3c2c5
638eb56
 
 
 
9e3c2c5
 
638eb56
 
 
9e3c2c5
638eb56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8ab25d
638eb56
 
 
f8ab25d
638eb56
 
f8ab25d
638eb56
 
 
 
 
f8ab25d
638eb56
 
f8ab25d
638eb56
 
f8ab25d
638eb56
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import plotly.express as px
import re
from datetime import datetime, timedelta
import warnings
# Load the data
df = pd.read_csv(r"fy21-24.csv", 
                 dtype={"FyWeek": "string",
                        "Fy": "category", 
                        "Chaincode": "category", 
                        "Store": "category", 
                        "Address": "string", 
                        "Zipcode": "float", 
                        "City": "category", 
                        "State": "category", 
                        "Containercode": "category", 
                        "Itemtype": "category", 
                        "SalesVolume":"float", 
                        "UnitPrice":"float", 
                        "Sales":"float"})

# Convert columns
df["Zipcode"] = df["Zipcode"].convert_dtypes()
df["SalesVolume"] = df["SalesVolume"].convert_dtypes()

# Title for the app
st.title('Sales Data Dashboard')

# Initialize session state for storing which card was clicked and item type
if 'active_card' not in st.session_state:
    st.session_state['active_card'] = None
if 'selected_item_type' not in st.session_state:
    st.session_state['selected_item_type'] = 'CORE'  # Set default to 'CORE'
# Initialize session state for storing the selected state and feature
if 'selected_state' not in st.session_state:
    st.session_state['selected_state'] = df['State'].unique()[0]  # Default to the first state
if 'selected_feature' not in st.session_state:
    st.session_state['selected_feature'] = 'Chaincode'  # Default to 'Chain Code'

# Two columns for the card buttons
col1, col2, col3, col4 = st.columns(4)

# Define buttons for plot categories, update session state when clicked
with col1:
    if st.button("Sales Volume Trend for Item Category"):
        st.session_state['active_card'] = 'card1'

with col2:
    if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
        st.session_state['active_card'] = 'card2'

with col3:
    if st.button("Price vs Sales Trend by Year"):
        st.session_state['active_card'] = 'card3'

with col4:
    if st.button("Total Sales Volume by Price Band"):
        st.session_state['active_card'] = 'card4'


########################################### CARD #1 ####################################################
if st.session_state['active_card'] == 'card1':
    # Create short fiscal week display
    df['Fiscal Year'] = df['FyWeek'].apply(lambda x: int(x.split(' ')[1]))  
    df['Week Number'] = df['FyWeek'].apply(lambda x: int(x.split('Week ')[1]))
    df = df.sort_values(by=['Fiscal Year', 'Week Number'])

    # Reformat 'Fiscal Week' for display (e.g., 'FY21W51')
    df['Fiscal Week Short'] = df.apply(lambda x: f"FY{x['Fiscal Year']%100}W{x['Week Number']}", axis=1)

    # Ensure the short fiscal week column is treated as a categorical variable and sorted by the order of appearance
    df['Fiscal Week Short'] = pd.Categorical(df['Fiscal Week Short'], categories=df['Fiscal Week Short'].unique(), ordered=True)
    # Dropdown for selecting the state (using session_state)
    st.session_state['selected_state'] = st.selectbox('Select State', df['State'].unique(),
                                                    index=list(df['State'].unique()).index(st.session_state['selected_state']))

    # Dropdown for selecting the feature for grouping (using session_state)
    st.session_state['selected_feature'] = st.selectbox('Select Feature for Grouping', 
                                                        ['Chaincode', 'Itemtype', 'FyWeek'], 
                                                        index=['Chaincode', 'Itemtype', 'FyWeek'].index(st.session_state['selected_feature']))

    # Filter the dataframe based on selected state
    filtered_df = df[df['State'] == st.session_state['selected_state']]

    # Plot based on user's selected feature
    if st.session_state['selected_feature'] == 'Itemtype':
        st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Item Type')
        group_data = filtered_df.groupby(['FyWeek', 'Itemtype'])['SalesVolume'].sum().reset_index()
        fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Itemtype',
                    title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Item Type',
                    labels={'SalesVolume': 'Sales Volume'})

    elif st.session_state['selected_feature'] == 'Chaincode':
        st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Chain Code')
        group_data = filtered_df.groupby(['FyWeek', 'Chaincode'])['SalesVolume'].sum().reset_index()
        fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Chaincode',
                    title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Chain Code',
                    labels={'SalesVolume': 'Sales Volume'})

    elif st.session_state['selected_feature'] == 'FyWeek':
        st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Fiscal Week')
        group_data = filtered_df.groupby(['FyWeek'])['SalesVolume'].sum().reset_index()
        fig = px.bar(group_data, x='FyWeek', y='SalesVolume',
                    title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]}',
                    labels={'SalesVolume': 'Sales Volume'})

    # Display the interactive plot
    st.plotly_chart(fig)
##########################################################################################################

########################################### CARD #2 ####################################################
# Card 2: Sales Volume & Unit Price Correlation plot for Item Category and Container Code
if st.session_state['active_card'] == 'card2':
    # Dropdown to select item type (using session_state)
    st.session_state['selected_item_type'] = st.selectbox(
        'Select Item Type', df['Itemtype'].unique(),
        index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))

    # Dropdown to select the grouping category (container code, chain code, or state)
    group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State'])

    st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option}")

    # Group the dataframe and prepare for plotting
    df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
        'SalesVolume': 'sum',
        'UnitPrice': 'mean',
        'Sales': 'sum'
    }).reset_index()

    # Function to extract date from fiscal week
    def dt_from_fy_week(fyweek):
        fy, w = re.findall(r'\d+', fyweek)
        week1_start = datetime.strptime("{}-08-01".format(int(fy) - 1), "%Y-%m-%d")
        return (week1_start + timedelta(weeks=int(w) - 1)).date()

    # Add columns for date and promo to data
    df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
    df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
    df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
    df["Promo"] = df["Promo"].astype("category")
    
    # Split FyWeek into fiscal year and week number
    df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
    df['Year'] = df['FyWeek'].str.split().str[1].astype(int)

    # Filter the dataframe based on the selected item type
    filtered_df = df[df['Itemtype'] == st.session_state['selected_item_type']]

    # Find the top 3 values based on total SalesVolume in the selected grouping category
    top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index

    # Filter the data for only the top 3 values
    top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]

    # Group by Year, Week, Dt, and the selected category and aggregate SalesVolume and UnitPrice
    agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
        'SalesVolume': 'sum',
        'UnitPrice': 'mean'
    }).reset_index()

    # Loop through the top 3 values and create separate plots
    for value in top_3_values:
        value_data = agg_df[agg_df[group_by_option] == value]

        # Create a new figure for each group
        fig, (axd, axp) = plt.subplots(2, 1, figsize=(10, 6))

        # Plot SalesVolume
        sns.lineplot(data=value_data, x='Dt', y='SalesVolume', ax=axd)
        axd.set_title(f"SalesVolume - {value} ({group_by_option})")
        axd.grid(True, linestyle='--', color='gray', alpha=0.7)

        # Plot mean line for SalesVolume
        axd.axhline(value_data['SalesVolume'].mean(), ls="--", color="r")
        axd.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
        axd.set_xticklabels([])

        # Plot UnitPrice
        sns.lineplot(data=value_data, x='Dt', y='UnitPrice', ax=axp, color='green', errorbar='sd')
        axp.set_title(f"UnitPrice - {value} ({group_by_option})")
        axp.grid(True, linestyle='--', color='gray', alpha=0.7)

        # Plot mean line for UnitPrice
        axp.axhline(value_data['UnitPrice'].mean(), ls="--", color="r")
        axp.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
        axp.tick_params(axis='x', rotation=90)

        # Adjust layout for each figure
        plt.tight_layout()

        # Display the plot in Streamlit
        st.pyplot(fig)
###############################################################################################

########################################### CARD #3 ####################################################
# Check which card was selected using session state
if st.session_state['active_card'] == 'card3':
    # Dropdown for selecting the Item Type
    st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(), 
                                                          index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
    df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
    'SalesVolume': 'sum',
    'UnitPrice':'mean',
    'Sales': 'sum'}).reset_index()
    # add promo and date columns to data
    def dt_from_fy_week(fyweek):

        fy, w = re.findall(r'\d+', fyweek)

        week1_start = datetime.strptime("{}-08-01".format(int(fy)-1), "%Y-%m-%d")

        return (week1_start + timedelta(weeks=int(w)-1)).date()

    df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
    df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
    df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3','4','5','6']) , 'Promo', 'NoPromo')
    df["Promo"] = df["Promo"].astype("category")
    # Split FyWeek into fiscal year and week number
    df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
    # df_21['Year'] = df_21['Fy'].str.extract(r'(\d+)').astype(int)
    df['Year'] = df['FyWeek'].str.split().str[1].astype(int)

    # Define the fiscal years
    years = ["FY 2021", "FY 2022", "FY 2023", "FY 2024"]

    # Set up a 2x2 grid of subplots for the four years
    fig, axs = plt.subplots(2, 2, figsize=(12, 8))

    # Loop through each year and create a plot in the grid
    for i, fy in enumerate(years):
        ax = axs.flat[i]

        # Plot Promo data
        sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "Promo")],
                    x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='.', line_kws=dict(color="r"), ax=ax, label="Promo")

        # Plot NoPromo data
        sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "NoPromo")],
                    x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='x', line_kws=dict(color="g"), ax=ax, label="NoPromo")

        # Set the title of each subplot
        ax.set_title(f"{st.session_state['selected_item_type']} - {fy}")
        ax.legend(loc="best")

    # Set the overall title for the figure
    fig.suptitle(f"Price vs SalesVolume for {st.session_state['selected_item_type']} across years")

    # Adjust layout to prevent overlap
    fig.tight_layout(rect=[0, 0, 1, 0.95])

    # Display the plot
    st.pyplot(fig)

###############################################################################################

########################################### CARD #4 ####################################################
if st.session_state['active_card'] == 'card4':
    # Define the fiscal years
    years = ['FY 2021', 'FY 2022', 'FY 2023', 'FY 2024']
    df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
    'SalesVolume': 'sum',
    'UnitPrice':'mean',
    'Sales': 'sum'}).reset_index()

    # Dropdown for selecting the Item Type (using session_state)
    st.session_state['selected_item_type'] = st.selectbox('Select Item Type', 
                                                                df['Itemtype'].unique(),
                                                                index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))

    # Set up a 2x2 grid of subplots for the four years
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    axes = axes.flatten()  # To access axes easily in a loop

    # Loop through each year and plot the data
    for i, year in enumerate(years):
        # print(st.session_state['selected_item_type'])
        # Filter data for the specific year and item type selected
        cage_data = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Fy'] == year)]
        cage_data['Itemtype'] = cage_data['Itemtype'].cat.remove_unused_categories()
        cage_data['Containercode'] = cage_data['Containercode'].cat.remove_unused_categories()
        # print(cage_data['Itemtype'].unique())
        relevant_container_codes = cage_data['Containercode'].unique()
        print(relevant_container_codes)
        # Calculate price bands
        lower_band = cage_data['UnitPrice'].quantile(0.25)
        median_band = cage_data['UnitPrice'].quantile(0.50)
        higher_band = cage_data['UnitPrice'].quantile(0.75)

        # Get data for each price band
        lower_band_data = cage_data[cage_data['UnitPrice'] <= lower_band]
        median_band_data = cage_data[(cage_data['UnitPrice'] > lower_band) & (cage_data['UnitPrice'] <= median_band)]
        higher_band_data = cage_data[cage_data['UnitPrice'] > higher_band]
        # print(lower_band_data['Containercode'].unique())
        # Aggregate SalesVolume and average UnitPrice for each container code in each pricing band
        lower_band_agg = lower_band_data.groupby('Containercode',observed=True).agg(
            total_sales_volume=('SalesVolume', 'sum'),
            avg_unit_price=('UnitPrice', 'mean')
        ).reset_index()

        median_band_agg = median_band_data.groupby('Containercode',observed=True).agg(
            total_sales_volume=('SalesVolume', 'sum'),
            avg_unit_price=('UnitPrice', 'mean')
        ).reset_index()

        higher_band_agg = higher_band_data.groupby('Containercode',observed=True).agg(
            total_sales_volume=('SalesVolume', 'sum'),
            avg_unit_price=('UnitPrice', 'mean')
        ).reset_index()

        # Add the price band labels
        lower_band_agg['PriceBand'] = 'Lower Band'
        median_band_agg['PriceBand'] = 'Median Band'
        higher_band_agg['PriceBand'] = 'Higher Band'

        # Combine the data for plotting
        combined_data = pd.concat([lower_band_agg, median_band_agg, higher_band_agg])
        combined_data = combined_data[combined_data['Containercode'].isin(relevant_container_codes)]  

        # Plot Total Sales Volume for each price band in the current subplot
        sns.barplot(x='Containercode', y='total_sales_volume', hue='PriceBand', data=combined_data, ax=axes[i])

        # Set the title and customize x-axis for each subplot
        axes[i].set_title(f"Total Sales Volume by Container Code and Price Band for {year}")
        axes[i].set_xlabel('Container Code')
        axes[i].set_ylabel('Total Sales Volume')
        axes[i].tick_params(axis='x', rotation=45)

    # Adjust the layout so titles and labels don't overlap
    plt.tight_layout()

    # Display the plot in Streamlit
    st.pyplot(fig)

###############################################################################################