Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import matplotlib.dates as mdates | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import re | |
from datetime import datetime, timedelta | |
import warnings | |
import time | |
import dask.dataframe as dd | |
def date_from_week(year, week): | |
# Assuming the fiscal year starts in August and the week starts from August 1st | |
base_date = pd.to_datetime((year - 1).astype(str) + '-08-01') | |
dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days') | |
return dates | |
def load_data(active_card): | |
# st.write(f"{active_card}") | |
# Define columns common to multiple cards if there are any | |
common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales'] | |
# Columns specific to cards | |
card_specific_cols = { | |
'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'], | |
'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'], | |
} | |
# Choose columns based on the active card | |
required_columns = card_specific_cols.get(active_card, common_cols) | |
# Define the data types for efficient memory usage | |
dtype_spec = { | |
'FyWeek': 'string', | |
'Fy': 'category', # Add data type for 'Fy' if it's used | |
'Itemtype': 'category', | |
'Chaincode': 'category', | |
'State': 'category', | |
"Store": "category", | |
'Containercode': 'category', | |
"Address": "string", | |
"Zipcode": "float", | |
"City": "category", | |
'SalesVolume': 'float', | |
'UnitPrice': 'float', | |
'Sales': 'float' | |
} | |
# Read only the necessary columns | |
# st.write(required_columns) | |
ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec) | |
df = ddf.compute() | |
if active_card in ['card2']: | |
df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean', | |
'Sales': 'sum' | |
}).reset_index() | |
df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True) | |
df['Week'] = df['Week'].astype(int) # Convert 'Week' to int | |
df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int | |
df['Dt'] = date_from_week(df['Year'], df['Week']) | |
# st.write(df.columns) | |
return df | |
# Display logo | |
st.image("bonnie.png", width=150) # Adjust width as needed | |
# Display title | |
st.title("Bonnie Plants Pricing & Sales Analytics Dashboard") | |
# Close the div for logo and title | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Initialize session state for storing which card was clicked and item type | |
if 'active_card' not in st.session_state: | |
st.session_state['active_card'] = None | |
if 'selected_item_type' not in st.session_state: | |
st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE' | |
if 'selected_feature' not in st.session_state: | |
st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code' | |
# Card selection buttons | |
col1, col2 = st.columns(2) | |
# Define buttons for plot categories, update session state when clicked | |
with col1: | |
if st.button("Sales Volume Trend for Item Category"): | |
st.session_state['active_card'] = 'card1' | |
with col2: | |
if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"): | |
st.session_state['active_card'] = 'card2' | |
start_time=time.time() | |
# st.write(st.session_state['active_card']) | |
df = load_data(st.session_state['active_card']) | |
time_taken = time.time() - start_time | |
st.write(f"Data loaded in {time_taken:.2f} seconds") | |
# Initialize session state for storing the selected state and feature | |
if 'selected_state' not in st.session_state: | |
st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state | |
############################################ CARD #1 #################################################### | |
if st.session_state['active_card'] == 'card1': | |
# st.write("Processing card1...") | |
# Dropdown for selecting the state | |
selected_state = st.selectbox('Select State', df['State'].unique()) | |
# Dropdown for selecting the feature for grouping | |
selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',]) | |
# Filter the dataframe based on selected state | |
filtered_df = df[df['State'] == selected_state] | |
# Time the grouping operation | |
start_time = time.time() | |
group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index() | |
time_taken = time.time() - start_time | |
# Plotting | |
fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature, | |
title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}', | |
labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'}) | |
st.plotly_chart(fig) | |
########################################################################################################## | |
########################################### CARD #2 #################################################### | |
if st.session_state['active_card'] == 'card2': | |
# Dropdown to select item type (using session_state) | |
st.session_state['selected_item_type'] = st.selectbox( | |
'Select Item Type', df['Itemtype'].unique(), | |
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) | |
# Dropdown to select the grouping category (container code, chain code, or state) | |
group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State']) | |
# Multi-select checkbox to select multiple years | |
selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021]) | |
st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}") | |
# Convert 'Dt' column to datetime | |
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') | |
df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo') | |
df["Promo"] = df["Promo"].astype("category") | |
# Filter the dataframe based on the selected item type and selected years | |
filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))] | |
# Find the top 3 values based on total SalesVolume in the selected grouping category | |
top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index | |
# Filter the data for only the top 3 values | |
top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)] | |
# Aggregate data | |
agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean' | |
}).reset_index() | |
# Create a new column 'week-year' for X-axis labels | |
agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y') | |
# Loop through the top 3 values and create separate plots using Plotly | |
for value in top_3_values: | |
value_data = agg_df[agg_df[group_by_option] == value] | |
# Assuming you have 'value_data' from your previous code | |
mean_sales_volume = value_data['SalesVolume'].mean() | |
mean_unit_price = value_data['UnitPrice'].mean() | |
# Create a Plotly figure | |
fig = go.Figure() | |
# Add SalesVolume trace | |
fig.add_trace(go.Scatter( | |
x=value_data['week-year'], | |
y=value_data['SalesVolume'], | |
mode='lines+markers', | |
name='SalesVolume', | |
line=dict(color='blue'), | |
hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}' | |
)) | |
# Add UnitPrice trace on a secondary Y-axis | |
fig.add_trace(go.Scatter( | |
x=value_data['week-year'], | |
y=value_data['UnitPrice'], | |
mode='lines+markers', | |
name='UnitPrice', | |
line=dict(color='green'), | |
yaxis='y2', | |
hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}' | |
)) | |
# Add mean line for SalesVolume | |
fig.add_shape(type="line", | |
x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
y0=mean_sales_volume, y1=mean_sales_volume, | |
line=dict(color="blue", width=2, dash="dash"), | |
xref='x', yref='y') | |
# Add mean line for UnitPrice (on secondary Y-axis) | |
fig.add_shape(type="line", | |
x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
y0=mean_unit_price, y1=mean_unit_price, | |
line=dict(color="green", width=2, dash="dash"), | |
xref='x', yref='y2') | |
# Update layout for dual axes | |
fig.update_layout( | |
template='plotly_white', | |
title=f"SalesVolume and UnitPrice - {value} ({group_by_option})", | |
xaxis_title='Week-Year', | |
yaxis_title='Sales Volume', | |
yaxis2=dict(title='UnitPrice', overlaying='y', side='right'), | |
legend=dict(x=0.9, y=1.15), | |
hovermode="x unified", # Show both values in a tooltip | |
height=600, | |
margin=dict(l=50, r=50, t=50, b=50) | |
) | |
# Rotate X-axis labels | |
fig.update_xaxes(tickangle=90) | |
# Display the Plotly figure in Streamlit | |
st.plotly_chart(fig, use_container_width=True) | |
########################################################################################################## | |