File size: 13,777 Bytes
f8ab25d
 
638eb56
f8ab25d
638eb56
 
f8ab25d
136ada0
638eb56
 
 
136ada0
 
8776749
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136ada0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cdf9bf
136ada0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cdf9bf
136ada0
4cdf9bf
136ada0
 
 
 
 
 
 
 
 
8776749
 
 
136ada0
 
4cdf9bf
 
 
 
136ada0
 
 
 
4cdf9bf
136ada0
4cdf9bf
 
136ada0
 
638eb56
 
 
 
136ada0
638eb56
 
f8ab25d
136ada0
4cdf9bf
638eb56
 
 
 
f8ab25d
638eb56
 
 
8776749
 
 
4cdf9bf
 
136ada0
 
 
 
 
f8ab25d
 
136ada0
 
 
f8ab25d
136ada0
638eb56
136ada0
 
 
 
 
 
638eb56
136ada0
 
 
 
 
 
4cdf9bf
136ada0
 
 
 
 
638eb56
136ada0
638eb56
 
4cdf9bf
136ada0
638eb56
 
9e3c2c5
 
 
638eb56
9e3c2c5
4cdf9bf
9e3c2c5
136ada0
 
638eb56
136ada0
638eb56
136ada0
638eb56
 
 
 
136ada0
 
638eb56
9e3c2c5
 
638eb56
9e3c2c5
 
4cdf9bf
136ada0
9e3c2c5
638eb56
 
 
 
136ada0
 
 
 
9e3c2c5
 
136ada0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cdf9bf
 
136ada0
 
 
 
 
 
4cdf9bf
8776749
4cdf9bf
8776749
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import plotly.express as px
import plotly.graph_objects as go
import re
from datetime import datetime, timedelta
import warnings
import time
import dask.dataframe as dd
state_to_region = {
    # WEST
    'AK': 'WEST', 'CA': 'WEST', 'CO': 'WEST', 'HI': 'WEST', 'ID': 'WEST', 
    'MT': 'WEST', 'NV': 'WEST', 'OR': 'WEST', 'UT': 'WEST', 'WA': 'WEST', 'WY': 'WEST',
    
    # SOUTHWEST
    'AZ': 'SOUTHWEST', 'NM': 'SOUTHWEST', 'OK': 'SOUTHWEST', 'TX': 'SOUTHWEST',
    
    # MIDWEST
    'IL': 'MIDWEST', 'IN': 'MIDWEST', 'IA': 'MIDWEST', 'KS': 'MIDWEST', 'MI': 'MIDWEST',
    'MN': 'MIDWEST', 'MO': 'MIDWEST', 'NE': 'MIDWEST', 'ND': 'MIDWEST', 'OH': 'MIDWEST', 
    'SD': 'MIDWEST', 'WI': 'MIDWEST',
    
    # SOUTHEAST
    'AL': 'SOUTHEAST', 'AR': 'SOUTHEAST', 'DE': 'SOUTHEAST', 'FL': 'SOUTHEAST', 
    'GA': 'SOUTHEAST', 'KY': 'SOUTHEAST', 'LA': 'SOUTHEAST', 'MD': 'SOUTHEAST', 
    'MS': 'SOUTHEAST', 'NC': 'SOUTHEAST', 'SC': 'SOUTHEAST', 'TN': 'SOUTHEAST', 
    'VA': 'SOUTHEAST', 'WV': 'SOUTHEAST',
    
    # NORTHEAST
    'CT': 'NORTHEAST', 'ME': 'NORTHEAST', 'MA': 'NORTHEAST', 'NH': 'NORTHEAST', 
    'NJ': 'NORTHEAST', 'NY': 'NORTHEAST', 'PA': 'NORTHEAST', 'RI': 'NORTHEAST', 
    'VT': 'NORTHEAST'
}
@st.cache_data
def date_from_week(year, week):
    # Assuming the fiscal year starts in August and the week starts from August 1st
    base_date = pd.to_datetime((year - 1).astype(str) + '-08-01')
    dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days')
    return dates

@st.cache_data
def load_data(active_card):
    # st.write(f"{active_card}")
    # Define columns common to multiple cards if there are any
    common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales']

    # Columns specific to cards
    card_specific_cols = {
        'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'],
        'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'],
        'card3': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'] # Added for PE calculation card
    }

    # Choose columns based on the active card
    required_columns = card_specific_cols.get(active_card, common_cols)

    # Define the data types for efficient memory usage
    dtype_spec = {
        'FyWeek': 'string',
        'Fy': 'category',  # Add data type for 'Fy' if it's used
        'Itemtype': 'category',
        'Chaincode': 'category',
        'State': 'category',
        "Store": "category", 
        'Containercode': 'category',
        "Address": "string",
        "Zipcode": "float",
        "City": "category", 
        'SalesVolume': 'float',
        'UnitPrice': 'float',
        'Sales': 'float'
    }

    # Read only the necessary columns
    # st.write(required_columns)
    ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec)
    df = ddf.compute()

    # st.write("+++++++++++++++++++++++")

    if active_card in ['card2', 'card3', 'card4']:
        df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
        'SalesVolume': 'sum',
        'UnitPrice': 'mean',
        'Sales': 'sum'
        }).reset_index()
        df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True)
        df['Week'] = df['Week'].astype(int)  # Convert 'Week' to int
        df['Year'] = df['FY'].str[2:].astype(int)  # Extract year part and convert to int
        df['Dt'] = date_from_week(df['Year'], df['Week'])
    # Add the region column based on state
    df['Region'] = df['State'].map(state_to_region)

    return df

# st.image("bonnie.png", width=200) 
# # Main interactive section
# st.title('Bonnnie Plants Price vs Sales Volume Trcaker')

# Display logo
st.image("bonnie.png", width=150)  # Adjust width as needed

# Display title
st.title("Bonnie Plants Pricing & Sales Analytics Dashboard")

# Close the div for logo and title
# st.markdown('</div>', unsafe_allow_html=True)

#  Initialize session state for storing which card was clicked and item type
if 'active_card' not in st.session_state:
    st.session_state['active_card'] = None
if 'selected_item_type' not in st.session_state:
    st.session_state['selected_item_type'] = 'CORE'  # Set default to 'CORE'

if 'selected_feature' not in st.session_state:
    st.session_state['selected_feature'] = 'Chaincode'  # Default to 'Chain Code'

# Card selection buttons
col1, col2, col3= st.columns(3)
# Define buttons for plot categories, update session state when clicked
with col1:
    if st.button("Sales Volume Trend for Item Category"):
        st.session_state['active_card'] = 'card1'

with col2:
    if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
        st.session_state['active_card'] = 'card2'
with col3:
    if st.button("PE Coefficient Calculation for Regions & Item Categories"):
        st.session_state['active_card'] = 'card3'


start_time=time.time()
# st.write(st.session_state['active_card'])
df = load_data(st.session_state['active_card'])
time_taken = time.time() - start_time
st.write(f"Data loaded in {time_taken:.2f} seconds")


# Initialize session state for storing the selected state and feature
if 'selected_state' not in st.session_state:
    st.session_state['selected_state'] = df['State'].unique()[0]  # Default to the first state

############################################ CARD #1 ####################################################
if st.session_state['active_card'] == 'card1':
    # st.write("Processing card1...")
    # Dropdown for selecting the state
    selected_state = st.selectbox('Select State', df['State'].unique())
    # Dropdown for selecting the feature for grouping
    selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',])
    
    # Filter the dataframe based on selected state
    filtered_df = df[df['State'] == selected_state]
    
    # Time the grouping operation
    start_time = time.time()
    group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index()
    time_taken = time.time() - start_time
    # st.write(f"Data grouped in {time_taken:.2f} seconds")

    # Plotting
    fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature,
                 title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}',
                 labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'})
    st.plotly_chart(fig)

##########################################################################################################

##########################################################################################################

if st.session_state['active_card'] == 'card2':
    # Dropdown to select item type (using session_state)
    st.session_state['selected_item_type'] = st.selectbox(
        'Select Item Type', df['Itemtype'].unique(),
        index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))

    # Dropdown to select the grouping category (container code, chain code, or state)
    group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State','Region'])

    # Multi-select checkbox to select multiple years
    selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021])

    st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}")

    # Convert 'Dt' column to datetime
    df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
    df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
    df["Promo"] = df["Promo"].astype("category")

    # Filter the dataframe based on the selected item type and selected years
    filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))]

    # Find the top 3 values based on total SalesVolume in the selected grouping category
    top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index

    # Filter the data for only the top 3 values
    top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]
    
    # Aggregate data
    agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
        'SalesVolume': 'sum',
        'UnitPrice': 'mean'
    }).reset_index()

    # Create a new column 'week-year' for X-axis labels
    agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y')

    # Loop through the top 3 values and create separate plots using Plotly
    for value in top_3_values:
        value_data = agg_df[agg_df[group_by_option] == value]
        # Assuming you have 'value_data' from your previous code
        mean_sales_volume = value_data['SalesVolume'].mean()
        mean_unit_price = value_data['UnitPrice'].mean()

        # Create a Plotly figure
        fig = go.Figure()

        # Add SalesVolume trace
        fig.add_trace(go.Scatter(
            x=value_data['week-year'],
            y=value_data['SalesVolume'],
            mode='lines+markers',
            name='SalesVolume',
            line=dict(color='blue'),
            hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}'
        ))

        # Add UnitPrice trace on a secondary Y-axis
        fig.add_trace(go.Scatter(
            x=value_data['week-year'],
            y=value_data['UnitPrice'],
            mode='lines+markers',
            name='UnitPrice',
            line=dict(color='green'),
            yaxis='y2',
            hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}'
        ))
        # Add mean line for SalesVolume
        fig.add_shape(type="line",
                    x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
                    y0=mean_sales_volume, y1=mean_sales_volume,
                    line=dict(color="blue", width=2, dash="dash"),
                    xref='x', yref='y')

        # Add mean line for UnitPrice (on secondary Y-axis)
        fig.add_shape(type="line",
                    x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
                    y0=mean_unit_price, y1=mean_unit_price,
                    line=dict(color="green", width=2, dash="dash"),
                    xref='x', yref='y2')

        # Update layout for dual axes
        fig.update_layout(
            template='plotly_white',
            title=f"SalesVolume and UnitPrice - {value} ({group_by_option})",
            xaxis_title='Week-Year',
            yaxis_title='Sales Volume',
            yaxis2=dict(title='UnitPrice', overlaying='y', side='right'),
            legend=dict(x=0.9, y=1.15),
            hovermode="x unified",  # Show both values in a tooltip
            height=600,
            margin=dict(l=50, r=50, t=50, b=50)
        )

        # Rotate X-axis labels
        fig.update_xaxes(tickangle=90)

        # Display the Plotly figure in Streamlit
        st.plotly_chart(fig, use_container_width=True)

################################
if st.session_state['active_card'] == 'card3':
    # Dropdown for selecting the item type
    item_type_options = df['Itemtype'].unique()
    selected_item_type = st.selectbox("Select Item Type", item_type_options)

    # Dropdown for selecting the region (multiple selection allowed)
    region_options = df['Region'].unique()
    selected_regions = st.multiselect("Select Region(s)", region_options, default=region_options)

    # Filter data based on selected item type and selected regions
    filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))]

    # Group by Year, Region, Itemtype and Promo, and aggregate SalesVolume and UnitPrice
    agg_df = filtered_df.groupby(['Fy', 'Region', 'Itemtype',]).agg({
        'SalesVolume': 'sum',
        'UnitPrice': 'mean'
    }).reset_index()

    # Sort values by Region, Itemtype, Fy, and Promo for YOY calculation
    agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'Fy',])

    # Calculate YOY percentage changes in Sales Volume and Unit Price
    agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype',])['SalesVolume'].pct_change().round(3) * 100
    agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype', ])['UnitPrice'].pct_change().round(3) * 100

    # Calculate Price Elasticity Coefficient (PE)
    agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2)

    # Exclude FY 2025 but keep FY 2021 even with NaN values
    agg_df_filtered = agg_df[agg_df['Fy'] != 'FY 2025']

    # Drop rows where PE_Coeff is NaN (optional)
    agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff'])
    st.dataframe(agg_df_filtered)
    st.write(agg_df_filtered.shape)
    # Plot the PE Coefficient with Plotly
    fig = px.line(
        agg_df_filtered, 
        x='Fy', 
        y='PE_Coeff',  # Differentiate between Promo and NoPromo
        line_dash='Region',  # Differentiate lines by Region
        title=f"Price Elasticity Coefficient (PE) by Year for {selected_item_type}",
        labels={'Fy': 'Fiscal Year', 'PE_Coeff': 'Price Elasticity Coefficient'},
        markers=True
    )

    # Customize layout and show plot
    fig.update_layout(
        height=600,
        width=1000,
    )

    st.plotly_chart(fig, use_container_width=True)