Spaces:

Sk4467
/

Bonnie-Internal-demo

Sleeping

App Files Files Community

Bonnie-Internal-demo / app.py

ProtonDataLabs

Update app.py

9e3c2c5 unverified 6 months ago

raw

history blame

16.7 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import matplotlib.dates as mdates
	import plotly.express as px
	import re
	from datetime import datetime, timedelta
	import warnings
	# Load the data
	df = pd.read_csv(r"fy21-24.csv",
	dtype={"FyWeek": "string",
	"Fy": "category",
	"Chaincode": "category",
	"Store": "category",
	"Address": "string",
	"Zipcode": "float",
	"City": "category",
	"State": "category",
	"Containercode": "category",
	"Itemtype": "category",
	"SalesVolume":"float",
	"UnitPrice":"float",
	"Sales":"float"})

	# Convert columns
	df["Zipcode"] = df["Zipcode"].convert_dtypes()
	df["SalesVolume"] = df["SalesVolume"].convert_dtypes()

	# Title for the app
	st.title('Sales Data Dashboard')

	# Initialize session state for storing which card was clicked and item type
	if 'active_card' not in st.session_state:
	st.session_state['active_card'] = None
	if 'selected_item_type' not in st.session_state:
	st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE'
	# Initialize session state for storing the selected state and feature
	if 'selected_state' not in st.session_state:
	st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state
	if 'selected_feature' not in st.session_state:
	st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code'

	# Two columns for the card buttons
	col1, col2, col3, col4 = st.columns(4)

	# Define buttons for plot categories, update session state when clicked
	with col1:
	if st.button("Sales Volume Trend for Item Category"):
	st.session_state['active_card'] = 'card1'

	with col2:
	if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
	st.session_state['active_card'] = 'card2'

	with col3:
	if st.button("Price vs Sales Trend by Year"):
	st.session_state['active_card'] = 'card3'

	with col4:
	if st.button("Total Sales Volume by Price Band"):
	st.session_state['active_card'] = 'card4'


	########################################### CARD #1 ####################################################
	if st.session_state['active_card'] == 'card1':
	# Create short fiscal week display
	df['Fiscal Year'] = df['FyWeek'].apply(lambda x: int(x.split(' ')[1]))
	df['Week Number'] = df['FyWeek'].apply(lambda x: int(x.split('Week ')[1]))
	df = df.sort_values(by=['Fiscal Year', 'Week Number'])

	# Reformat 'Fiscal Week' for display (e.g., 'FY21W51')
	df['Fiscal Week Short'] = df.apply(lambda x: f"FY{x['Fiscal Year']%100}W{x['Week Number']}", axis=1)

	# Ensure the short fiscal week column is treated as a categorical variable and sorted by the order of appearance
	df['Fiscal Week Short'] = pd.Categorical(df['Fiscal Week Short'], categories=df['Fiscal Week Short'].unique(), ordered=True)
	# Dropdown for selecting the state (using session_state)
	st.session_state['selected_state'] = st.selectbox('Select State', df['State'].unique(),
	index=list(df['State'].unique()).index(st.session_state['selected_state']))

	# Dropdown for selecting the feature for grouping (using session_state)
	st.session_state['selected_feature'] = st.selectbox('Select Feature for Grouping',
	['Chaincode', 'Itemtype', 'FyWeek'],
	index=['Chaincode', 'Itemtype', 'FyWeek'].index(st.session_state['selected_feature']))

	# Filter the dataframe based on selected state
	filtered_df = df[df['State'] == st.session_state['selected_state']]

	# Plot based on user's selected feature
	if st.session_state['selected_feature'] == 'Itemtype':
	st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Item Type')
	group_data = filtered_df.groupby(['FyWeek', 'Itemtype'])['SalesVolume'].sum().reset_index()
	fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Itemtype',
	title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Item Type',
	labels={'SalesVolume': 'Sales Volume'})

	elif st.session_state['selected_feature'] == 'Chaincode':
	st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Chain Code')
	group_data = filtered_df.groupby(['FyWeek', 'Chaincode'])['SalesVolume'].sum().reset_index()
	fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Chaincode',
	title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Chain Code',
	labels={'SalesVolume': 'Sales Volume'})

	elif st.session_state['selected_feature'] == 'FyWeek':
	st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Fiscal Week')
	group_data = filtered_df.groupby(['FyWeek'])['SalesVolume'].sum().reset_index()
	fig = px.bar(group_data, x='FyWeek', y='SalesVolume',
	title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]}',
	labels={'SalesVolume': 'Sales Volume'})

	# Display the interactive plot
	st.plotly_chart(fig)
	##########################################################################################################

	########################################### CARD #2 ####################################################
	# Card 2: Sales Volume & Unit Price Correlation plot for Item Category and Container Code
	if st.session_state['active_card'] == 'card2':
	# Dropdown to select item type (using session_state)
	st.session_state['selected_item_type'] = st.selectbox(
	'Select Item Type', df['Itemtype'].unique(),
	index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))

	# Dropdown to select the grouping category (container code, chain code, or state)
	group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State'])

	st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option}")

	# Group the dataframe and prepare for plotting
	df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
	'SalesVolume': 'sum',
	'UnitPrice': 'mean',
	'Sales': 'sum'
	}).reset_index()

	# Function to extract date from fiscal week
	def dt_from_fy_week(fyweek):
	fy, w = re.findall(r'\d+', fyweek)
	week1_start = datetime.strptime("{}-08-01".format(int(fy) - 1), "%Y-%m-%d")
	return (week1_start + timedelta(weeks=int(w) - 1)).date()

	# Add columns for date and promo to data
	df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
	df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
	df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
	df["Promo"] = df["Promo"].astype("category")

	# Split FyWeek into fiscal year and week number
	df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
	df['Year'] = df['FyWeek'].str.split().str[1].astype(int)

	# Filter the dataframe based on the selected item type
	filtered_df = df[df['Itemtype'] == st.session_state['selected_item_type']]

	# Find the top 3 values based on total SalesVolume in the selected grouping category
	top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index

	# Filter the data for only the top 3 values
	top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]

	# Group by Year, Week, Dt, and the selected category and aggregate SalesVolume and UnitPrice
	agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
	'SalesVolume': 'sum',
	'UnitPrice': 'mean'
	}).reset_index()

	# Loop through the top 3 values and create separate plots
	for value in top_3_values:
	value_data = agg_df[agg_df[group_by_option] == value]

	# Create a new figure for each group
	fig, (axd, axp) = plt.subplots(2, 1, figsize=(10, 6))

	# Plot SalesVolume
	sns.lineplot(data=value_data, x='Dt', y='SalesVolume', ax=axd)
	axd.set_title(f"SalesVolume - {value} ({group_by_option})")
	axd.grid(True, linestyle='--', color='gray', alpha=0.7)

	# Plot mean line for SalesVolume
	axd.axhline(value_data['SalesVolume'].mean(), ls="--", color="r")
	axd.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
	axd.set_xticklabels([])

	# Plot UnitPrice
	sns.lineplot(data=value_data, x='Dt', y='UnitPrice', ax=axp, color='green', errorbar='sd')
	axp.set_title(f"UnitPrice - {value} ({group_by_option})")
	axp.grid(True, linestyle='--', color='gray', alpha=0.7)

	# Plot mean line for UnitPrice
	axp.axhline(value_data['UnitPrice'].mean(), ls="--", color="r")
	axp.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
	axp.tick_params(axis='x', rotation=90)

	# Adjust layout for each figure
	plt.tight_layout()

	# Display the plot in Streamlit
	st.pyplot(fig)
	###############################################################################################

	########################################### CARD #3 ####################################################
	# Check which card was selected using session state
	if st.session_state['active_card'] == 'card3':
	# Dropdown for selecting the Item Type
	st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(),
	index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
	df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
	'SalesVolume': 'sum',
	'UnitPrice':'mean',
	'Sales': 'sum'}).reset_index()
	# add promo and date columns to data
	def dt_from_fy_week(fyweek):

	fy, w = re.findall(r'\d+', fyweek)

	week1_start = datetime.strptime("{}-08-01".format(int(fy)-1), "%Y-%m-%d")

	return (week1_start + timedelta(weeks=int(w)-1)).date()

	df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
	df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
	df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3','4','5','6']) , 'Promo', 'NoPromo')
	df["Promo"] = df["Promo"].astype("category")
	# Split FyWeek into fiscal year and week number
	df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
	# df_21['Year'] = df_21['Fy'].str.extract(r'(\d+)').astype(int)
	df['Year'] = df['FyWeek'].str.split().str[1].astype(int)

	# Define the fiscal years
	years = ["FY 2021", "FY 2022", "FY 2023", "FY 2024"]

	# Set up a 2x2 grid of subplots for the four years
	fig, axs = plt.subplots(2, 2, figsize=(12, 8))

	# Loop through each year and create a plot in the grid
	for i, fy in enumerate(years):
	ax = axs.flat[i]

	# Plot Promo data
	sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "Promo")],
	x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='.', line_kws=dict(color="r"), ax=ax, label="Promo")

	# Plot NoPromo data
	sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "NoPromo")],
	x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='x', line_kws=dict(color="g"), ax=ax, label="NoPromo")

	# Set the title of each subplot
	ax.set_title(f"{st.session_state['selected_item_type']} - {fy}")
	ax.legend(loc="best")

	# Set the overall title for the figure
	fig.suptitle(f"Price vs SalesVolume for {st.session_state['selected_item_type']} across years")

	# Adjust layout to prevent overlap
	fig.tight_layout(rect=[0, 0, 1, 0.95])

	# Display the plot
	st.pyplot(fig)

	###############################################################################################

	########################################### CARD #4 ####################################################
	if st.session_state['active_card'] == 'card4':
	# Define the fiscal years
	years = ['FY 2021', 'FY 2022', 'FY 2023', 'FY 2024']
	df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
	'SalesVolume': 'sum',
	'UnitPrice':'mean',
	'Sales': 'sum'}).reset_index()

	# Dropdown for selecting the Item Type (using session_state)
	st.session_state['selected_item_type'] = st.selectbox('Select Item Type',
	df['Itemtype'].unique(),
	index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))

	# Set up a 2x2 grid of subplots for the four years
	fig, axes = plt.subplots(2, 2, figsize=(16, 12))
	axes = axes.flatten() # To access axes easily in a loop

	# Loop through each year and plot the data
	for i, year in enumerate(years):
	# print(st.session_state['selected_item_type'])
	# Filter data for the specific year and item type selected
	cage_data = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Fy'] == year)]
	cage_data['Itemtype'] = cage_data['Itemtype'].cat.remove_unused_categories()
	cage_data['Containercode'] = cage_data['Containercode'].cat.remove_unused_categories()
	# print(cage_data['Itemtype'].unique())
	relevant_container_codes = cage_data['Containercode'].unique()
	print(relevant_container_codes)
	# Calculate price bands
	lower_band = cage_data['UnitPrice'].quantile(0.25)
	median_band = cage_data['UnitPrice'].quantile(0.50)
	higher_band = cage_data['UnitPrice'].quantile(0.75)

	# Get data for each price band
	lower_band_data = cage_data[cage_data['UnitPrice'] <= lower_band]
	median_band_data = cage_data[(cage_data['UnitPrice'] > lower_band) & (cage_data['UnitPrice'] <= median_band)]
	higher_band_data = cage_data[cage_data['UnitPrice'] > higher_band]
	# print(lower_band_data['Containercode'].unique())
	# Aggregate SalesVolume and average UnitPrice for each container code in each pricing band
	lower_band_agg = lower_band_data.groupby('Containercode',observed=True).agg(
	total_sales_volume=('SalesVolume', 'sum'),
	avg_unit_price=('UnitPrice', 'mean')
	).reset_index()

	median_band_agg = median_band_data.groupby('Containercode',observed=True).agg(
	total_sales_volume=('SalesVolume', 'sum'),
	avg_unit_price=('UnitPrice', 'mean')
	).reset_index()

	higher_band_agg = higher_band_data.groupby('Containercode',observed=True).agg(
	total_sales_volume=('SalesVolume', 'sum'),
	avg_unit_price=('UnitPrice', 'mean')
	).reset_index()

	# Add the price band labels
	lower_band_agg['PriceBand'] = 'Lower Band'
	median_band_agg['PriceBand'] = 'Median Band'
	higher_band_agg['PriceBand'] = 'Higher Band'

	# Combine the data for plotting
	combined_data = pd.concat([lower_band_agg, median_band_agg, higher_band_agg])
	combined_data = combined_data[combined_data['Containercode'].isin(relevant_container_codes)]

	# Plot Total Sales Volume for each price band in the current subplot
	sns.barplot(x='Containercode', y='total_sales_volume', hue='PriceBand', data=combined_data, ax=axes[i])

	# Set the title and customize x-axis for each subplot
	axes[i].set_title(f"Total Sales Volume by Container Code and Price Band for {year}")
	axes[i].set_xlabel('Container Code')
	axes[i].set_ylabel('Total Sales Volume')
	axes[i].tick_params(axis='x', rotation=45)

	# Adjust the layout so titles and labels don't overlap
	plt.tight_layout()

	# Display the plot in Streamlit
	st.pyplot(fig)

	###############################################################################################