Spaces:

Sk4467
/

Bonnie-Internal-demo

Sleeping

File size: 9,853 Bytes

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import plotly.express as px
import plotly.graph_objects as go
import re
from datetime import datetime, timedelta
import warnings
import time
import dask.dataframe as dd

@st.cache_data
def date_from_week(year, week):
    # Assuming the fiscal year starts in August and the week starts from August 1st
    base_date = pd.to_datetime((year - 1).astype(str) + '-08-01')
    dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days')
    return dates

@st.cache_data
def load_data(active_card):
    # st.write(f"{active_card}")
    # Define columns common to multiple cards if there are any
    common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales']

    # Columns specific to cards
    card_specific_cols = {
        'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'],
        'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'],
    }

    # Choose columns based on the active card
    required_columns = card_specific_cols.get(active_card, common_cols)

    # Define the data types for efficient memory usage
    dtype_spec = {
        'FyWeek': 'string',
        'Fy': 'category',  # Add data type for 'Fy' if it's used
        'Itemtype': 'category',
        'Chaincode': 'category',
        'State': 'category',
        "Store": "category", 
        'Containercode': 'category',
        "Address": "string",
        "Zipcode": "float",
        "City": "category", 
        'SalesVolume': 'float',
        'UnitPrice': 'float',
        'Sales': 'float'
    }

    # Read only the necessary columns
    # st.write(required_columns)
    ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec)
    df = ddf.compute()


    if active_card in ['card2']:
        df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
        'SalesVolume': 'sum',
        'UnitPrice': 'mean',
        'Sales': 'sum'
        }).reset_index()
        df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True)
        df['Week'] = df['Week'].astype(int)  # Convert 'Week' to int
        df['Year'] = df['FY'].str[2:].astype(int)  # Extract year part and convert to int
        df['Dt'] = date_from_week(df['Year'], df['Week'])

    # st.write(df.columns)
    return df

# Display logo
st.image("bonnie.png", width=150)  # Adjust width as needed

# Display title
st.title("Bonnie Plants Pricing & Sales Analytics Dashboard")

# Close the div for logo and title
st.markdown('</div>', unsafe_allow_html=True)

#  Initialize session state for storing which card was clicked and item type
if 'active_card' not in st.session_state:
    st.session_state['active_card'] = None
if 'selected_item_type' not in st.session_state:
    st.session_state['selected_item_type'] = 'CORE'  # Set default to 'CORE'

if 'selected_feature' not in st.session_state:
    st.session_state['selected_feature'] = 'Chaincode'  # Default to 'Chain Code'

# Card selection buttons
col1, col2 = st.columns(2)
# Define buttons for plot categories, update session state when clicked
with col1:
    if st.button("Sales Volume Trend for Item Category"):
        st.session_state['active_card'] = 'card1'

with col2:
    if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
        st.session_state['active_card'] = 'card2'

start_time=time.time()
# st.write(st.session_state['active_card'])
df = load_data(st.session_state['active_card'])
time_taken = time.time() - start_time
st.write(f"Data loaded in {time_taken:.2f} seconds")


# Initialize session state for storing the selected state and feature
if 'selected_state' not in st.session_state:
    st.session_state['selected_state'] = df['State'].unique()[0]  # Default to the first state

############################################ CARD #1 ####################################################
if st.session_state['active_card'] == 'card1':
    # st.write("Processing card1...")
    # Dropdown for selecting the state
    selected_state = st.selectbox('Select State', df['State'].unique())
    # Dropdown for selecting the feature for grouping
    selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',])
    
    # Filter the dataframe based on selected state
    filtered_df = df[df['State'] == selected_state]
    
    # Time the grouping operation
    start_time = time.time()
    group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index()
    time_taken = time.time() - start_time

    # Plotting
    fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature,
                 title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}',
                 labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'})
    st.plotly_chart(fig)

##########################################################################################################


########################################### CARD #2 ####################################################





if st.session_state['active_card'] == 'card2':
    # Dropdown to select item type (using session_state)
    st.session_state['selected_item_type'] = st.selectbox(
        'Select Item Type', df['Itemtype'].unique(),
        index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))

    # Dropdown to select the grouping category (container code, chain code, or state)
    group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State'])

    # Multi-select checkbox to select multiple years
    selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021])

    st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}")

    # Convert 'Dt' column to datetime
    df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
    df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
    df["Promo"] = df["Promo"].astype("category")

    # Filter the dataframe based on the selected item type and selected years
    filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))]

    # Find the top 3 values based on total SalesVolume in the selected grouping category
    top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index

    # Filter the data for only the top 3 values
    top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]

    # Aggregate data
    agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
        'SalesVolume': 'sum',
        'UnitPrice': 'mean'
    }).reset_index()

    # Create a new column 'week-year' for X-axis labels
    agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y')

    # Loop through the top 3 values and create separate plots using Plotly
    for value in top_3_values:
        value_data = agg_df[agg_df[group_by_option] == value]
        # Assuming you have 'value_data' from your previous code
        mean_sales_volume = value_data['SalesVolume'].mean()
        mean_unit_price = value_data['UnitPrice'].mean()

        # Create a Plotly figure
        fig = go.Figure()

        # Add SalesVolume trace
        fig.add_trace(go.Scatter(
            x=value_data['week-year'],
            y=value_data['SalesVolume'],
            mode='lines+markers',
            name='SalesVolume',
            line=dict(color='blue'),
            hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}'
        ))

        # Add UnitPrice trace on a secondary Y-axis
        fig.add_trace(go.Scatter(
            x=value_data['week-year'],
            y=value_data['UnitPrice'],
            mode='lines+markers',
            name='UnitPrice',
            line=dict(color='green'),
            yaxis='y2',
            hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}'
        ))
        # Add mean line for SalesVolume
        fig.add_shape(type="line",
                    x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
                    y0=mean_sales_volume, y1=mean_sales_volume,
                    line=dict(color="blue", width=2, dash="dash"),
                    xref='x', yref='y')

        # Add mean line for UnitPrice (on secondary Y-axis)
        fig.add_shape(type="line",
                    x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
                    y0=mean_unit_price, y1=mean_unit_price,
                    line=dict(color="green", width=2, dash="dash"),
                    xref='x', yref='y2')

        # Update layout for dual axes
        fig.update_layout(
            template='plotly_white',
            title=f"SalesVolume and UnitPrice - {value} ({group_by_option})",
            xaxis_title='Week-Year',
            yaxis_title='Sales Volume',
            yaxis2=dict(title='UnitPrice', overlaying='y', side='right'),
            legend=dict(x=0.9, y=1.15),
            hovermode="x unified",  # Show both values in a tooltip
            height=500,
            autosize=True,
            margin=dict(l=50, r=50, t=50, b=50)
        )

        # Rotate X-axis labels
        fig.update_xaxes(tickangle=90)

        # Display the Plotly figure in Streamlit
        st.plotly_chart(fig, use_container_width=True)



##########################################################################################################