Spaces:
Sleeping
Sleeping
File size: 16,742 Bytes
f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 9e3c2c5 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 f8ab25d 638eb56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import plotly.express as px
import re
from datetime import datetime, timedelta
import warnings
# Load the data
df = pd.read_csv(r"fy21-24.csv",
dtype={"FyWeek": "string",
"Fy": "category",
"Chaincode": "category",
"Store": "category",
"Address": "string",
"Zipcode": "float",
"City": "category",
"State": "category",
"Containercode": "category",
"Itemtype": "category",
"SalesVolume":"float",
"UnitPrice":"float",
"Sales":"float"})
# Convert columns
df["Zipcode"] = df["Zipcode"].convert_dtypes()
df["SalesVolume"] = df["SalesVolume"].convert_dtypes()
# Title for the app
st.title('Sales Data Dashboard')
# Initialize session state for storing which card was clicked and item type
if 'active_card' not in st.session_state:
st.session_state['active_card'] = None
if 'selected_item_type' not in st.session_state:
st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE'
# Initialize session state for storing the selected state and feature
if 'selected_state' not in st.session_state:
st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state
if 'selected_feature' not in st.session_state:
st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code'
# Two columns for the card buttons
col1, col2, col3, col4 = st.columns(4)
# Define buttons for plot categories, update session state when clicked
with col1:
if st.button("Sales Volume Trend for Item Category"):
st.session_state['active_card'] = 'card1'
with col2:
if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
st.session_state['active_card'] = 'card2'
with col3:
if st.button("Price vs Sales Trend by Year"):
st.session_state['active_card'] = 'card3'
with col4:
if st.button("Total Sales Volume by Price Band"):
st.session_state['active_card'] = 'card4'
########################################### CARD #1 ####################################################
if st.session_state['active_card'] == 'card1':
# Create short fiscal week display
df['Fiscal Year'] = df['FyWeek'].apply(lambda x: int(x.split(' ')[1]))
df['Week Number'] = df['FyWeek'].apply(lambda x: int(x.split('Week ')[1]))
df = df.sort_values(by=['Fiscal Year', 'Week Number'])
# Reformat 'Fiscal Week' for display (e.g., 'FY21W51')
df['Fiscal Week Short'] = df.apply(lambda x: f"FY{x['Fiscal Year']%100}W{x['Week Number']}", axis=1)
# Ensure the short fiscal week column is treated as a categorical variable and sorted by the order of appearance
df['Fiscal Week Short'] = pd.Categorical(df['Fiscal Week Short'], categories=df['Fiscal Week Short'].unique(), ordered=True)
# Dropdown for selecting the state (using session_state)
st.session_state['selected_state'] = st.selectbox('Select State', df['State'].unique(),
index=list(df['State'].unique()).index(st.session_state['selected_state']))
# Dropdown for selecting the feature for grouping (using session_state)
st.session_state['selected_feature'] = st.selectbox('Select Feature for Grouping',
['Chaincode', 'Itemtype', 'FyWeek'],
index=['Chaincode', 'Itemtype', 'FyWeek'].index(st.session_state['selected_feature']))
# Filter the dataframe based on selected state
filtered_df = df[df['State'] == st.session_state['selected_state']]
# Plot based on user's selected feature
if st.session_state['selected_feature'] == 'Itemtype':
st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Item Type')
group_data = filtered_df.groupby(['FyWeek', 'Itemtype'])['SalesVolume'].sum().reset_index()
fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Itemtype',
title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Item Type',
labels={'SalesVolume': 'Sales Volume'})
elif st.session_state['selected_feature'] == 'Chaincode':
st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Chain Code')
group_data = filtered_df.groupby(['FyWeek', 'Chaincode'])['SalesVolume'].sum().reset_index()
fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Chaincode',
title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Chain Code',
labels={'SalesVolume': 'Sales Volume'})
elif st.session_state['selected_feature'] == 'FyWeek':
st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Fiscal Week')
group_data = filtered_df.groupby(['FyWeek'])['SalesVolume'].sum().reset_index()
fig = px.bar(group_data, x='FyWeek', y='SalesVolume',
title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]}',
labels={'SalesVolume': 'Sales Volume'})
# Display the interactive plot
st.plotly_chart(fig)
##########################################################################################################
########################################### CARD #2 ####################################################
# Card 2: Sales Volume & Unit Price Correlation plot for Item Category and Container Code
if st.session_state['active_card'] == 'card2':
# Dropdown to select item type (using session_state)
st.session_state['selected_item_type'] = st.selectbox(
'Select Item Type', df['Itemtype'].unique(),
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
# Dropdown to select the grouping category (container code, chain code, or state)
group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State'])
st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option}")
# Group the dataframe and prepare for plotting
df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
'SalesVolume': 'sum',
'UnitPrice': 'mean',
'Sales': 'sum'
}).reset_index()
# Function to extract date from fiscal week
def dt_from_fy_week(fyweek):
fy, w = re.findall(r'\d+', fyweek)
week1_start = datetime.strptime("{}-08-01".format(int(fy) - 1), "%Y-%m-%d")
return (week1_start + timedelta(weeks=int(w) - 1)).date()
# Add columns for date and promo to data
df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
df["Promo"] = df["Promo"].astype("category")
# Split FyWeek into fiscal year and week number
df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
df['Year'] = df['FyWeek'].str.split().str[1].astype(int)
# Filter the dataframe based on the selected item type
filtered_df = df[df['Itemtype'] == st.session_state['selected_item_type']]
# Find the top 3 values based on total SalesVolume in the selected grouping category
top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index
# Filter the data for only the top 3 values
top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]
# Group by Year, Week, Dt, and the selected category and aggregate SalesVolume and UnitPrice
agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
'SalesVolume': 'sum',
'UnitPrice': 'mean'
}).reset_index()
# Loop through the top 3 values and create separate plots
for value in top_3_values:
value_data = agg_df[agg_df[group_by_option] == value]
# Create a new figure for each group
fig, (axd, axp) = plt.subplots(2, 1, figsize=(10, 6))
# Plot SalesVolume
sns.lineplot(data=value_data, x='Dt', y='SalesVolume', ax=axd)
axd.set_title(f"SalesVolume - {value} ({group_by_option})")
axd.grid(True, linestyle='--', color='gray', alpha=0.7)
# Plot mean line for SalesVolume
axd.axhline(value_data['SalesVolume'].mean(), ls="--", color="r")
axd.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
axd.set_xticklabels([])
# Plot UnitPrice
sns.lineplot(data=value_data, x='Dt', y='UnitPrice', ax=axp, color='green', errorbar='sd')
axp.set_title(f"UnitPrice - {value} ({group_by_option})")
axp.grid(True, linestyle='--', color='gray', alpha=0.7)
# Plot mean line for UnitPrice
axp.axhline(value_data['UnitPrice'].mean(), ls="--", color="r")
axp.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
axp.tick_params(axis='x', rotation=90)
# Adjust layout for each figure
plt.tight_layout()
# Display the plot in Streamlit
st.pyplot(fig)
###############################################################################################
########################################### CARD #3 ####################################################
# Check which card was selected using session state
if st.session_state['active_card'] == 'card3':
# Dropdown for selecting the Item Type
st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(),
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
'SalesVolume': 'sum',
'UnitPrice':'mean',
'Sales': 'sum'}).reset_index()
# add promo and date columns to data
def dt_from_fy_week(fyweek):
fy, w = re.findall(r'\d+', fyweek)
week1_start = datetime.strptime("{}-08-01".format(int(fy)-1), "%Y-%m-%d")
return (week1_start + timedelta(weeks=int(w)-1)).date()
df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3','4','5','6']) , 'Promo', 'NoPromo')
df["Promo"] = df["Promo"].astype("category")
# Split FyWeek into fiscal year and week number
df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
# df_21['Year'] = df_21['Fy'].str.extract(r'(\d+)').astype(int)
df['Year'] = df['FyWeek'].str.split().str[1].astype(int)
# Define the fiscal years
years = ["FY 2021", "FY 2022", "FY 2023", "FY 2024"]
# Set up a 2x2 grid of subplots for the four years
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
# Loop through each year and create a plot in the grid
for i, fy in enumerate(years):
ax = axs.flat[i]
# Plot Promo data
sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "Promo")],
x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='.', line_kws=dict(color="r"), ax=ax, label="Promo")
# Plot NoPromo data
sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "NoPromo")],
x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='x', line_kws=dict(color="g"), ax=ax, label="NoPromo")
# Set the title of each subplot
ax.set_title(f"{st.session_state['selected_item_type']} - {fy}")
ax.legend(loc="best")
# Set the overall title for the figure
fig.suptitle(f"Price vs SalesVolume for {st.session_state['selected_item_type']} across years")
# Adjust layout to prevent overlap
fig.tight_layout(rect=[0, 0, 1, 0.95])
# Display the plot
st.pyplot(fig)
###############################################################################################
########################################### CARD #4 ####################################################
if st.session_state['active_card'] == 'card4':
# Define the fiscal years
years = ['FY 2021', 'FY 2022', 'FY 2023', 'FY 2024']
df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
'SalesVolume': 'sum',
'UnitPrice':'mean',
'Sales': 'sum'}).reset_index()
# Dropdown for selecting the Item Type (using session_state)
st.session_state['selected_item_type'] = st.selectbox('Select Item Type',
df['Itemtype'].unique(),
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
# Set up a 2x2 grid of subplots for the four years
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.flatten() # To access axes easily in a loop
# Loop through each year and plot the data
for i, year in enumerate(years):
# print(st.session_state['selected_item_type'])
# Filter data for the specific year and item type selected
cage_data = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Fy'] == year)]
cage_data['Itemtype'] = cage_data['Itemtype'].cat.remove_unused_categories()
cage_data['Containercode'] = cage_data['Containercode'].cat.remove_unused_categories()
# print(cage_data['Itemtype'].unique())
relevant_container_codes = cage_data['Containercode'].unique()
print(relevant_container_codes)
# Calculate price bands
lower_band = cage_data['UnitPrice'].quantile(0.25)
median_band = cage_data['UnitPrice'].quantile(0.50)
higher_band = cage_data['UnitPrice'].quantile(0.75)
# Get data for each price band
lower_band_data = cage_data[cage_data['UnitPrice'] <= lower_band]
median_band_data = cage_data[(cage_data['UnitPrice'] > lower_band) & (cage_data['UnitPrice'] <= median_band)]
higher_band_data = cage_data[cage_data['UnitPrice'] > higher_band]
# print(lower_band_data['Containercode'].unique())
# Aggregate SalesVolume and average UnitPrice for each container code in each pricing band
lower_band_agg = lower_band_data.groupby('Containercode',observed=True).agg(
total_sales_volume=('SalesVolume', 'sum'),
avg_unit_price=('UnitPrice', 'mean')
).reset_index()
median_band_agg = median_band_data.groupby('Containercode',observed=True).agg(
total_sales_volume=('SalesVolume', 'sum'),
avg_unit_price=('UnitPrice', 'mean')
).reset_index()
higher_band_agg = higher_band_data.groupby('Containercode',observed=True).agg(
total_sales_volume=('SalesVolume', 'sum'),
avg_unit_price=('UnitPrice', 'mean')
).reset_index()
# Add the price band labels
lower_band_agg['PriceBand'] = 'Lower Band'
median_band_agg['PriceBand'] = 'Median Band'
higher_band_agg['PriceBand'] = 'Higher Band'
# Combine the data for plotting
combined_data = pd.concat([lower_band_agg, median_band_agg, higher_band_agg])
combined_data = combined_data[combined_data['Containercode'].isin(relevant_container_codes)]
# Plot Total Sales Volume for each price band in the current subplot
sns.barplot(x='Containercode', y='total_sales_volume', hue='PriceBand', data=combined_data, ax=axes[i])
# Set the title and customize x-axis for each subplot
axes[i].set_title(f"Total Sales Volume by Container Code and Price Band for {year}")
axes[i].set_xlabel('Container Code')
axes[i].set_ylabel('Total Sales Volume')
axes[i].tick_params(axis='x', rotation=45)
# Adjust the layout so titles and labels don't overlap
plt.tight_layout()
# Display the plot in Streamlit
st.pyplot(fig)
###############################################################################################
|