Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import matplotlib.dates as mdates | |
import plotly.express as px | |
import re | |
from datetime import datetime, timedelta | |
import warnings | |
# Load the data | |
df = pd.read_csv(r"fy21-24.csv", | |
dtype={"FyWeek": "string", | |
"Fy": "category", | |
"Chaincode": "category", | |
"Store": "category", | |
"Address": "string", | |
"Zipcode": "float", | |
"City": "category", | |
"State": "category", | |
"Containercode": "category", | |
"Itemtype": "category", | |
"SalesVolume":"float", | |
"UnitPrice":"float", | |
"Sales":"float"}) | |
# Convert columns | |
df["Zipcode"] = df["Zipcode"].convert_dtypes() | |
df["SalesVolume"] = df["SalesVolume"].convert_dtypes() | |
# Title for the app | |
st.title('Sales Data Dashboard') | |
# Initialize session state for storing which card was clicked and item type | |
if 'active_card' not in st.session_state: | |
st.session_state['active_card'] = None | |
if 'selected_item_type' not in st.session_state: | |
st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE' | |
# Initialize session state for storing the selected state and feature | |
if 'selected_state' not in st.session_state: | |
st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state | |
if 'selected_feature' not in st.session_state: | |
st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code' | |
# Two columns for the card buttons | |
col1, col2, col3, col4 = st.columns(4) | |
# Define buttons for plot categories, update session state when clicked | |
with col1: | |
if st.button("Sales Volume Trend for Item Category"): | |
st.session_state['active_card'] = 'card1' | |
with col2: | |
if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"): | |
st.session_state['active_card'] = 'card2' | |
with col3: | |
if st.button("Price vs Sales Trend by Year"): | |
st.session_state['active_card'] = 'card3' | |
with col4: | |
if st.button("Total Sales Volume by Price Band"): | |
st.session_state['active_card'] = 'card4' | |
########################################### CARD #1 #################################################### | |
if st.session_state['active_card'] == 'card1': | |
# Create short fiscal week display | |
df['Fiscal Year'] = df['FyWeek'].apply(lambda x: int(x.split(' ')[1])) | |
df['Week Number'] = df['FyWeek'].apply(lambda x: int(x.split('Week ')[1])) | |
df = df.sort_values(by=['Fiscal Year', 'Week Number']) | |
# Reformat 'Fiscal Week' for display (e.g., 'FY21W51') | |
df['Fiscal Week Short'] = df.apply(lambda x: f"FY{x['Fiscal Year']%100}W{x['Week Number']}", axis=1) | |
# Ensure the short fiscal week column is treated as a categorical variable and sorted by the order of appearance | |
df['Fiscal Week Short'] = pd.Categorical(df['Fiscal Week Short'], categories=df['Fiscal Week Short'].unique(), ordered=True) | |
# Dropdown for selecting the state (using session_state) | |
st.session_state['selected_state'] = st.selectbox('Select State', df['State'].unique(), | |
index=list(df['State'].unique()).index(st.session_state['selected_state'])) | |
# Dropdown for selecting the feature for grouping (using session_state) | |
st.session_state['selected_feature'] = st.selectbox('Select Feature for Grouping', | |
['Chaincode', 'Itemtype', 'FyWeek'], | |
index=['Chaincode', 'Itemtype', 'FyWeek'].index(st.session_state['selected_feature'])) | |
# Filter the dataframe based on selected state | |
filtered_df = df[df['State'] == st.session_state['selected_state']] | |
# Plot based on user's selected feature | |
if st.session_state['selected_feature'] == 'Itemtype': | |
st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Item Type') | |
group_data = filtered_df.groupby(['FyWeek', 'Itemtype'])['SalesVolume'].sum().reset_index() | |
fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Itemtype', | |
title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Item Type', | |
labels={'SalesVolume': 'Sales Volume'}) | |
elif st.session_state['selected_feature'] == 'Chaincode': | |
st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Chain Code') | |
group_data = filtered_df.groupby(['FyWeek', 'Chaincode'])['SalesVolume'].sum().reset_index() | |
fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Chaincode', | |
title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Chain Code', | |
labels={'SalesVolume': 'Sales Volume'}) | |
elif st.session_state['selected_feature'] == 'FyWeek': | |
st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Fiscal Week') | |
group_data = filtered_df.groupby(['FyWeek'])['SalesVolume'].sum().reset_index() | |
fig = px.bar(group_data, x='FyWeek', y='SalesVolume', | |
title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]}', | |
labels={'SalesVolume': 'Sales Volume'}) | |
# Display the interactive plot | |
st.plotly_chart(fig) | |
########################################################################################################## | |
########################################### CARD #2 #################################################### | |
# Card 2: Sales Volume & Unit Price Correlation plot for Item Category and Container Code | |
if st.session_state['active_card'] == 'card2': | |
# Dropdown to select item type (using session_state) | |
st.session_state['selected_item_type'] = st.selectbox( | |
'Select Item Type', df['Itemtype'].unique(), | |
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) | |
# Dropdown to select the grouping category (container code, chain code, or state) | |
group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State']) | |
st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option}") | |
# Group the dataframe and prepare for plotting | |
df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean', | |
'Sales': 'sum' | |
}).reset_index() | |
# Function to extract date from fiscal week | |
def dt_from_fy_week(fyweek): | |
fy, w = re.findall(r'\d+', fyweek) | |
week1_start = datetime.strptime("{}-08-01".format(int(fy) - 1), "%Y-%m-%d") | |
return (week1_start + timedelta(weeks=int(w) - 1)).date() | |
# Add columns for date and promo to data | |
df['Dt'] = df['FyWeek'].apply(dt_from_fy_week) | |
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') | |
df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo') | |
df["Promo"] = df["Promo"].astype("category") | |
# Split FyWeek into fiscal year and week number | |
df['Week'] = df['FyWeek'].str.split().str[-1].astype(int) | |
df['Year'] = df['FyWeek'].str.split().str[1].astype(int) | |
# Filter the dataframe based on the selected item type | |
filtered_df = df[df['Itemtype'] == st.session_state['selected_item_type']] | |
# Find the top 3 values based on total SalesVolume in the selected grouping category | |
top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index | |
# Filter the data for only the top 3 values | |
top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)] | |
# Group by Year, Week, Dt, and the selected category and aggregate SalesVolume and UnitPrice | |
agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean' | |
}).reset_index() | |
# Loop through the top 3 values and create separate plots | |
for value in top_3_values: | |
value_data = agg_df[agg_df[group_by_option] == value] | |
# Create a new figure for each group | |
fig, (axd, axp) = plt.subplots(2, 1, figsize=(10, 6)) | |
# Plot SalesVolume | |
sns.lineplot(data=value_data, x='Dt', y='SalesVolume', ax=axd) | |
axd.set_title(f"SalesVolume - {value} ({group_by_option})") | |
axd.grid(True, linestyle='--', color='gray', alpha=0.7) | |
# Plot mean line for SalesVolume | |
axd.axhline(value_data['SalesVolume'].mean(), ls="--", color="r") | |
axd.xaxis.set_major_locator(mdates.MonthLocator(interval=2)) | |
axd.set_xticklabels([]) | |
# Plot UnitPrice | |
sns.lineplot(data=value_data, x='Dt', y='UnitPrice', ax=axp, color='green', errorbar='sd') | |
axp.set_title(f"UnitPrice - {value} ({group_by_option})") | |
axp.grid(True, linestyle='--', color='gray', alpha=0.7) | |
# Plot mean line for UnitPrice | |
axp.axhline(value_data['UnitPrice'].mean(), ls="--", color="r") | |
axp.xaxis.set_major_locator(mdates.MonthLocator(interval=2)) | |
axp.tick_params(axis='x', rotation=90) | |
# Adjust layout for each figure | |
plt.tight_layout() | |
# Display the plot in Streamlit | |
st.pyplot(fig) | |
############################################################################################### | |
########################################### CARD #3 #################################################### | |
# Check which card was selected using session state | |
if st.session_state['active_card'] == 'card3': | |
# Dropdown for selecting the Item Type | |
st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(), | |
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) | |
df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice':'mean', | |
'Sales': 'sum'}).reset_index() | |
# add promo and date columns to data | |
def dt_from_fy_week(fyweek): | |
fy, w = re.findall(r'\d+', fyweek) | |
week1_start = datetime.strptime("{}-08-01".format(int(fy)-1), "%Y-%m-%d") | |
return (week1_start + timedelta(weeks=int(w)-1)).date() | |
df['Dt'] = df['FyWeek'].apply(dt_from_fy_week) | |
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') | |
df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3','4','5','6']) , 'Promo', 'NoPromo') | |
df["Promo"] = df["Promo"].astype("category") | |
# Split FyWeek into fiscal year and week number | |
df['Week'] = df['FyWeek'].str.split().str[-1].astype(int) | |
# df_21['Year'] = df_21['Fy'].str.extract(r'(\d+)').astype(int) | |
df['Year'] = df['FyWeek'].str.split().str[1].astype(int) | |
# Define the fiscal years | |
years = ["FY 2021", "FY 2022", "FY 2023", "FY 2024"] | |
# Set up a 2x2 grid of subplots for the four years | |
fig, axs = plt.subplots(2, 2, figsize=(12, 8)) | |
# Loop through each year and create a plot in the grid | |
for i, fy in enumerate(years): | |
ax = axs.flat[i] | |
# Plot Promo data | |
sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "Promo")], | |
x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='.', line_kws=dict(color="r"), ax=ax, label="Promo") | |
# Plot NoPromo data | |
sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "NoPromo")], | |
x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='x', line_kws=dict(color="g"), ax=ax, label="NoPromo") | |
# Set the title of each subplot | |
ax.set_title(f"{st.session_state['selected_item_type']} - {fy}") | |
ax.legend(loc="best") | |
# Set the overall title for the figure | |
fig.suptitle(f"Price vs SalesVolume for {st.session_state['selected_item_type']} across years") | |
# Adjust layout to prevent overlap | |
fig.tight_layout(rect=[0, 0, 1, 0.95]) | |
# Display the plot | |
st.pyplot(fig) | |
############################################################################################### | |
########################################### CARD #4 #################################################### | |
if st.session_state['active_card'] == 'card4': | |
# Define the fiscal years | |
years = ['FY 2021', 'FY 2022', 'FY 2023', 'FY 2024'] | |
df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice':'mean', | |
'Sales': 'sum'}).reset_index() | |
# Dropdown for selecting the Item Type (using session_state) | |
st.session_state['selected_item_type'] = st.selectbox('Select Item Type', | |
df['Itemtype'].unique(), | |
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) | |
# Set up a 2x2 grid of subplots for the four years | |
fig, axes = plt.subplots(2, 2, figsize=(16, 12)) | |
axes = axes.flatten() # To access axes easily in a loop | |
# Loop through each year and plot the data | |
for i, year in enumerate(years): | |
# print(st.session_state['selected_item_type']) | |
# Filter data for the specific year and item type selected | |
cage_data = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Fy'] == year)] | |
cage_data['Itemtype'] = cage_data['Itemtype'].cat.remove_unused_categories() | |
cage_data['Containercode'] = cage_data['Containercode'].cat.remove_unused_categories() | |
# print(cage_data['Itemtype'].unique()) | |
relevant_container_codes = cage_data['Containercode'].unique() | |
print(relevant_container_codes) | |
# Calculate price bands | |
lower_band = cage_data['UnitPrice'].quantile(0.25) | |
median_band = cage_data['UnitPrice'].quantile(0.50) | |
higher_band = cage_data['UnitPrice'].quantile(0.75) | |
# Get data for each price band | |
lower_band_data = cage_data[cage_data['UnitPrice'] <= lower_band] | |
median_band_data = cage_data[(cage_data['UnitPrice'] > lower_band) & (cage_data['UnitPrice'] <= median_band)] | |
higher_band_data = cage_data[cage_data['UnitPrice'] > higher_band] | |
# print(lower_band_data['Containercode'].unique()) | |
# Aggregate SalesVolume and average UnitPrice for each container code in each pricing band | |
lower_band_agg = lower_band_data.groupby('Containercode',observed=True).agg( | |
total_sales_volume=('SalesVolume', 'sum'), | |
avg_unit_price=('UnitPrice', 'mean') | |
).reset_index() | |
median_band_agg = median_band_data.groupby('Containercode',observed=True).agg( | |
total_sales_volume=('SalesVolume', 'sum'), | |
avg_unit_price=('UnitPrice', 'mean') | |
).reset_index() | |
higher_band_agg = higher_band_data.groupby('Containercode',observed=True).agg( | |
total_sales_volume=('SalesVolume', 'sum'), | |
avg_unit_price=('UnitPrice', 'mean') | |
).reset_index() | |
# Add the price band labels | |
lower_band_agg['PriceBand'] = 'Lower Band' | |
median_band_agg['PriceBand'] = 'Median Band' | |
higher_band_agg['PriceBand'] = 'Higher Band' | |
# Combine the data for plotting | |
combined_data = pd.concat([lower_band_agg, median_band_agg, higher_band_agg]) | |
combined_data = combined_data[combined_data['Containercode'].isin(relevant_container_codes)] | |
# Plot Total Sales Volume for each price band in the current subplot | |
sns.barplot(x='Containercode', y='total_sales_volume', hue='PriceBand', data=combined_data, ax=axes[i]) | |
# Set the title and customize x-axis for each subplot | |
axes[i].set_title(f"Total Sales Volume by Container Code and Price Band for {year}") | |
axes[i].set_xlabel('Container Code') | |
axes[i].set_ylabel('Total Sales Volume') | |
axes[i].tick_params(axis='x', rotation=45) | |
# Adjust the layout so titles and labels don't overlap | |
plt.tight_layout() | |
# Display the plot in Streamlit | |
st.pyplot(fig) | |
############################################################################################### | |