import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import matplotlib.dates as mdates import plotly.express as px import plotly.graph_objects as go import re from datetime import datetime, timedelta import warnings import time import dask.dataframe as dd @st.cache_data def date_from_week(year, week): # Assuming the fiscal year starts in August and the week starts from August 1st base_date = pd.to_datetime((year - 1).astype(str) + '-08-01') dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days') return dates @st.cache_data def load_data(active_card): # st.write(f"{active_card}") # Define columns common to multiple cards if there are any common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales'] # Columns specific to cards card_specific_cols = { 'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'], 'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'], } # Choose columns based on the active card required_columns = card_specific_cols.get(active_card, common_cols) # Define the data types for efficient memory usage dtype_spec = { 'FyWeek': 'string', 'Fy': 'category', # Add data type for 'Fy' if it's used 'Itemtype': 'category', 'Chaincode': 'category', 'State': 'category', "Store": "category", 'Containercode': 'category', "Address": "string", "Zipcode": "float", "City": "category", 'SalesVolume': 'float', 'UnitPrice': 'float', 'Sales': 'float' } # Read only the necessary columns # st.write(required_columns) ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec) df = ddf.compute() if active_card in ['card2']: df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({ 'SalesVolume': 'sum', 'UnitPrice': 'mean', 'Sales': 'sum' }).reset_index() df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True) df['Week'] = df['Week'].astype(int) # Convert 'Week' to int df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int df['Dt'] = date_from_week(df['Year'], df['Week']) # st.write(df.columns) return df # Display logo st.image("bonnie.png", width=150) # Adjust width as needed # Display title st.title("Bonnie Plants Pricing & Sales Analytics Dashboard") # Close the div for logo and title st.markdown('', unsafe_allow_html=True) # Initialize session state for storing which card was clicked and item type if 'active_card' not in st.session_state: st.session_state['active_card'] = None if 'selected_item_type' not in st.session_state: st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE' if 'selected_feature' not in st.session_state: st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code' # Card selection buttons col1, col2 = st.columns(2) # Define buttons for plot categories, update session state when clicked with col1: if st.button("Sales Volume Trend for Item Category"): st.session_state['active_card'] = 'card1' with col2: if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"): st.session_state['active_card'] = 'card2' start_time=time.time() # st.write(st.session_state['active_card']) df = load_data(st.session_state['active_card']) time_taken = time.time() - start_time st.write(f"Data loaded in {time_taken:.2f} seconds") # Initialize session state for storing the selected state and feature if 'selected_state' not in st.session_state: st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state ############################################ CARD #1 #################################################### if st.session_state['active_card'] == 'card1': # st.write("Processing card1...") # Dropdown for selecting the state selected_state = st.selectbox('Select State', df['State'].unique()) # Dropdown for selecting the feature for grouping selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',]) # Filter the dataframe based on selected state filtered_df = df[df['State'] == selected_state] # Time the grouping operation start_time = time.time() group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index() time_taken = time.time() - start_time # Plotting fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature, title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}', labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'}) st.plotly_chart(fig) ########################################################################################################## ########################################### CARD #2 #################################################### if st.session_state['active_card'] == 'card2': # Dropdown to select item type (using session_state) st.session_state['selected_item_type'] = st.selectbox( 'Select Item Type', df['Itemtype'].unique(), index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) # Dropdown to select the grouping category (container code, chain code, or state) group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State']) # Multi-select checkbox to select multiple years selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021]) st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}") # Convert 'Dt' column to datetime df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo') df["Promo"] = df["Promo"].astype("category") # Filter the dataframe based on the selected item type and selected years filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))] # Find the top 3 values based on total SalesVolume in the selected grouping category top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index # Filter the data for only the top 3 values top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)] # Aggregate data agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({ 'SalesVolume': 'sum', 'UnitPrice': 'mean' }).reset_index() # Create a new column 'week-year' for X-axis labels agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y') # Loop through the top 3 values and create separate plots using Plotly for value in top_3_values: value_data = agg_df[agg_df[group_by_option] == value] # Assuming you have 'value_data' from your previous code mean_sales_volume = value_data['SalesVolume'].mean() mean_unit_price = value_data['UnitPrice'].mean() # Create a Plotly figure fig = go.Figure() # Add SalesVolume trace fig.add_trace(go.Scatter( x=value_data['week-year'], y=value_data['SalesVolume'], mode='lines+markers', name='SalesVolume', line=dict(color='blue'), hovertemplate='SalesVolume: %{y}
Week-Year: %{x}' )) # Add UnitPrice trace on a secondary Y-axis fig.add_trace(go.Scatter( x=value_data['week-year'], y=value_data['UnitPrice'], mode='lines+markers', name='UnitPrice', line=dict(color='green'), yaxis='y2', hovertemplate='UnitPrice: %{y}
Week-Year: %{x}' )) # Add mean line for SalesVolume fig.add_shape(type="line", x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), y0=mean_sales_volume, y1=mean_sales_volume, line=dict(color="blue", width=2, dash="dash"), xref='x', yref='y') # Add mean line for UnitPrice (on secondary Y-axis) fig.add_shape(type="line", x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), y0=mean_unit_price, y1=mean_unit_price, line=dict(color="green", width=2, dash="dash"), xref='x', yref='y2') # Update layout for dual axes fig.update_layout( template='plotly_white', title=f"SalesVolume and UnitPrice - {value} ({group_by_option})", xaxis_title='Week-Year', yaxis_title='Sales Volume', yaxis2=dict(title='UnitPrice', overlaying='y', side='right'), legend=dict(x=0.9, y=1.15), hovermode="x unified", # Show both values in a tooltip height=600, margin=dict(l=50, r=50, t=50, b=50) ) # Rotate X-axis labels fig.update_xaxes(tickangle=90) # Display the Plotly figure in Streamlit st.plotly_chart(fig, use_container_width=True) ##########################################################################################################