Spaces:

BlendMMM
/

v6Mastercardapp

Sleeping

App Files Files Community

BlendMMM commited on May 14, 2024

Commit

fdbbbbf

verified ·

1 Parent(s): ef850e9

Delete pages

Browse files

Files changed (11) hide show

pages/10_Saved_Scenarios.py +0 -407
pages/11_Optimized_Result_Analysis.py +0 -453
pages/1_Data_Import.py +0 -1547
pages/2_Data_Validation.py +0 -509
pages/3_Transformations.py +0 -686
pages/4_Model_Build.py +0 -1062
pages/5_Model_Tuning.py +0 -912
pages/6_AI_Model_Results.py +0 -728
pages/7_Current_Media_Performance.py +0 -573
pages/8_Build_Response_Curves.py +0 -596
pages/9_Scenario_Planner.py +0 -1712

pages/10_Saved_Scenarios.py DELETED Viewed

@@ -1,407 +0,0 @@
-import streamlit as st
-from numerize.numerize import numerize
-import io
-import pandas as pd
-from utilities import (
-    format_numbers,
-    decimal_formater,
-    channel_name_formating,
-    load_local_css,
-    set_header,
-    initialize_data,
-    load_authenticator,
-)
-from openpyxl import Workbook
-from openpyxl.styles import Alignment, Font, PatternFill
-import pickle
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-from classes import class_from_dict
-from utilities import update_db
-st.set_page_config(layout="wide")
-load_local_css("styles.css")
-set_header()
-# for k, v in st.session_state.items():
-#     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-#         st.session_state[k] = v
-def create_scenario_summary(scenario_dict):
-    summary_rows = []
-    for channel_dict in scenario_dict["channels"]:
-        name_mod = channel_name_formating(channel_dict["name"])
-        summary_rows.append(
-            [
-                name_mod,
-                channel_dict.get("actual_total_spends")
-                * channel_dict.get("conversion_rate"),
-                channel_dict.get("modified_total_spends")
-                * channel_dict.get("conversion_rate"),
-                channel_dict.get("actual_total_sales"),
-                channel_dict.get("modified_total_sales"),
-                channel_dict.get("actual_total_sales")
-                / (
-                    channel_dict.get("actual_total_spends")
-                    * channel_dict.get("conversion_rate")
-                ),
-                channel_dict.get("modified_total_sales")
-                / (
-                    channel_dict.get("modified_total_spends")
-                    * channel_dict.get("conversion_rate")
-                ),
-                channel_dict.get("actual_mroi"),
-                channel_dict.get("modified_mroi"),
-                channel_dict.get("actual_total_spends")
-                * channel_dict.get("conversion_rate")
-                / channel_dict.get("actual_total_sales"),
-                channel_dict.get("modified_total_spends")
-                * channel_dict.get("conversion_rate")
-                / channel_dict.get("modified_total_sales"),
-            ]
-        )
-    summary_rows.append(
-        [
-            "Total",
-            scenario_dict.get("actual_total_spends"),
-            scenario_dict.get("modified_total_spends"),
-            scenario_dict.get("actual_total_sales"),
-            scenario_dict.get("modified_total_sales"),
-            scenario_dict.get("actual_total_sales")
-            / scenario_dict.get("actual_total_spends"),
-            scenario_dict.get("modified_total_sales")
-            / scenario_dict.get("modified_total_spends"),
-            "-",
-            "-",
-            scenario_dict.get("actual_total_spends")
-            / scenario_dict.get("actual_total_sales"),
-            scenario_dict.get("modified_total_spends")
-            / scenario_dict.get("modified_total_sales"),
-        ]
-    )
-    columns_index = pd.MultiIndex.from_product(
-        [[""], ["Channel"]], names=["first", "second"]
-    )
-    columns_index = columns_index.append(
-        pd.MultiIndex.from_product(
-            [
-                ["Spends", "NRPU", "ROI", "MROI", "Spend per NRPU"],
-                ["Actual", "Simulated"],
-            ],
-            names=["first", "second"],
-        )
-    )
-    return pd.DataFrame(summary_rows, columns=columns_index)
-def summary_df_to_worksheet(df, ws):
-    heading_fill = PatternFill(
-        fill_type="solid", start_color="FF11B6BD", end_color="FF11B6BD"
-    )
-    for j, header in enumerate(df.columns.values):
-        col = j + 1
-        for i in range(1, 3):
-            ws.cell(row=i, column=j + 1, value=header[i - 1]).font = Font(
-                bold=True, color="FF11B6BD"
-            )
-            ws.cell(row=i, column=j + 1).fill = heading_fill
-        if col > 1 and (col - 6) % 5 == 0:
-            ws.merge_cells(start_row=1, end_row=1, start_column=col - 3, end_column=col)
-            ws.cell(row=1, column=col).alignment = Alignment(horizontal="center")
-    for i, row in enumerate(df.itertuples()):
-        for j, value in enumerate(row):
-            if j == 0:
-                continue
-            elif (j - 2) % 4 == 0 or (j - 3) % 4 == 0:
-                ws.cell(row=i + 3, column=j, value=value).number_format = "$#,##0.0"
-            else:
-                ws.cell(row=i + 3, column=j, value=value)
-from openpyxl.utils import get_column_letter
-from openpyxl.styles import Font, PatternFill
-import logging
-def scenario_df_to_worksheet(df, ws):
-    heading_fill = PatternFill(
-        start_color="FF11B6BD", end_color="FF11B6BD", fill_type="solid"
-    )
-    for j, header in enumerate(df.columns.values):
-        cell = ws.cell(row=1, column=j + 1, value=header)
-        cell.font = Font(bold=True, color="FF11B6BD")
-        cell.fill = heading_fill
-    for i, row in enumerate(df.itertuples()):
-        for j, value in enumerate(
-            row[1:], start=1
-        ):  # Start from index 1 to skip the index column
-            try:
-                cell = ws.cell(row=i + 2, column=j, value=value)
-                if isinstance(value, (int, float)):
-                    cell.number_format = "$#,##0.0"
-                elif isinstance(value, str):
-                    cell.value = value[:32767]
-                else:
-                    cell.value = str(value)
-            except ValueError as e:
-                logging.error(
-                    f"Error assigning value '{value}' to cell {get_column_letter(j)}{i+2}: {e}"
-                )
-                cell.value = None  # Assign None to the cell where the error occurred
-    return ws
-def download_scenarios():
-    """
-    Makes a excel with all saved scenarios and saves it locally
-    """
-    ## create summary page
-    if len(scenarios_to_download) == 0:
-        return
-    wb = Workbook()
-    wb.iso_dates = True
-    wb.remove(wb.active)
-    st.session_state["xlsx_buffer"] = io.BytesIO()
-    summary_df = None
-    # print(scenarios_to_download)
-    for scenario_name in scenarios_to_download:
-        scenario_dict = st.session_state["saved_scenarios"][scenario_name]
-        _spends = []
-        column_names = ["Date"]
-        _sales = None
-        dates = None
-        summary_rows = []
-        for channel in scenario_dict["channels"]:
-            if dates is None:
-                dates = channel.get("dates")
-                _spends.append(dates)
-            if _sales is None:
-                _sales = channel.get("modified_sales")
-            else:
-                _sales += channel.get("modified_sales")
-            _spends.append(
-                channel.get("modified_spends") * channel.get("conversion_rate")
-            )
-            column_names.append(channel.get("name"))
-            name_mod = channel_name_formating(channel["name"])
-            summary_rows.append(
-                [
-                    name_mod,
-                    channel.get("modified_total_spends")
-                    * channel.get("conversion_rate"),
-                    channel.get("modified_total_sales"),
-                    channel.get("modified_total_sales")
-                    / channel.get("modified_total_spends")
-                    * channel.get("conversion_rate"),
-                    channel.get("modified_mroi"),
-                    channel.get("modified_total_sales")
-                    / channel.get("modified_total_spends")
-                    * channel.get("conversion_rate"),
-                ]
-            )
-        _spends.append(_sales)
-        column_names.append("NRPU")
-        scenario_df = pd.DataFrame(_spends).T
-        scenario_df.columns = column_names
-        ## write to sheet
-        ws = wb.create_sheet(scenario_name)
-        scenario_df_to_worksheet(scenario_df, ws)
-        summary_rows.append(
-            [
-                "Total",
-                scenario_dict.get("modified_total_spends"),
-                scenario_dict.get("modified_total_sales"),
-                scenario_dict.get("modified_total_sales")
-                / scenario_dict.get("modified_total_spends"),
-                "-",
-                scenario_dict.get("modified_total_spends")
-                / scenario_dict.get("modified_total_sales"),
-            ]
-        )
-        columns_index = pd.MultiIndex.from_product(
-            [[""], ["Channel"]], names=["first", "second"]
-        )
-        columns_index = columns_index.append(
-            pd.MultiIndex.from_product(
-                [[scenario_name], ["Spends", "NRPU", "ROI", "MROI", "Spends per NRPU"]],
-                names=["first", "second"],
-            )
-        )
-        if summary_df is None:
-            summary_df = pd.DataFrame(summary_rows, columns=columns_index)
-            summary_df = summary_df.set_index(("", "Channel"))
-        else:
-            _df = pd.DataFrame(summary_rows, columns=columns_index)
-            _df = _df.set_index(("", "Channel"))
-            summary_df = summary_df.merge(_df, left_index=True, right_index=True)
-    ws = wb.create_sheet("Summary", 0)
-    summary_df_to_worksheet(summary_df.reset_index(), ws)
-    wb.save(st.session_state["xlsx_buffer"])
-    st.session_state["disable_download_button"] = False
-def disable_download_button():
-    st.session_state["disable_download_button"] = True
-def transform(x):
-    if x.name == ("", "Channel"):
-        return x
-    elif x.name[0] == "ROI" or x.name[0] == "MROI":
-        return x.apply(
-            lambda y: (
-                y
-                if isinstance(y, str)
-                else decimal_formater(
-                    format_numbers(y, include_indicator=False, n_decimals=4),
-                    n_decimals=4,
-                )
-            )
-        )
-    else:
-        return x.apply(lambda y: y if isinstance(y, str) else format_numbers(y))
-def delete_scenario():
-    if selected_scenario in st.session_state["saved_scenarios"]:
-        del st.session_state["saved_scenarios"][selected_scenario]
-        with open("../saved_scenarios.pkl", "wb") as f:
-            pickle.dump(st.session_state["saved_scenarios"], f)
-def load_scenario():
-    if selected_scenario in st.session_state["saved_scenarios"]:
-        st.session_state["scenario"] = class_from_dict(selected_scenario_details)
-authenticator = st.session_state.get("authenticator")
-if authenticator is None:
-    authenticator = load_authenticator()
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    is_state_initiaized = st.session_state.get("initialized", False)
-    if not is_state_initiaized:
-        # print("Scenario page state reloaded")
-        initialize_data()
-    saved_scenarios = st.session_state["saved_scenarios"]
-    if len(saved_scenarios) == 0:
-        st.header("No saved scenarios")
-    else:
-        selected_scenario_list = list(saved_scenarios.keys())
-        if "selected_scenario_selectbox_key" not in st.session_state:
-            st.session_state["selected_scenario_selectbox_key"] = (
-                selected_scenario_list[
-                    st.session_state["project_dct"]["saved_scenarios"][
-                        "selected_scenario_selectbox_key"
-                    ]
-                ]
-            )
-        col_a, col_b = st.columns(2)
-        selected_scenario = col_a.selectbox(
-            "Pick a scenario to view details",
-            selected_scenario_list,
-            # key="selected_scenario_selectbox_key",
-            index=st.session_state["project_dct"]["saved_scenarios"][
-                "selected_scenario_selectbox_key"
-            ],
-        )
-        st.session_state["project_dct"]["saved_scenarios"][
-            "selected_scenario_selectbox_key"
-        ] = selected_scenario_list.index(selected_scenario)
-        scenarios_to_download = col_b.multiselect(
-            "Select scenarios to download",
-            list(saved_scenarios.keys()),
-            on_change=disable_download_button,
-        )
-        with col_a:
-            col3, col4 = st.columns(2)
-            col4.button(
-                "Delete scenarios",
-                on_click=delete_scenario,
-                use_container_width=True,
-            )
-            col3.button(
-                "Load Scenario",
-                on_click=load_scenario,
-                use_container_width=True,
-            )
-        with col_b:
-            col1, col2 = st.columns(2)
-            col1.button(
-                "Prepare download",
-                on_click=download_scenarios,
-                use_container_width=True,
-            )
-            col2.download_button(
-                label="Download Scenarios",
-                data=st.session_state["xlsx_buffer"].getvalue(),
-                file_name="scenarios.xlsx",
-                mime="application/vnd.ms-excel",
-                disabled=st.session_state["disable_download_button"],
-                on_click=disable_download_button,
-                use_container_width=True,
-            )
-        # column_1, column_2, column_3 = st.columns((6, 1, 1))
-        # with column_1:
-        #     st.header(selected_scenario)
-        # with column_2:
-        #     st.button("Delete scenarios", on_click=delete_scenario)
-        # with column_3:
-        #     st.button("Load Scenario", on_click=load_scenario)
-        selected_scenario_details = saved_scenarios[selected_scenario]
-        pd.set_option("display.max_colwidth", 100)
-        st.markdown(
-            create_scenario_summary(selected_scenario_details)
-            .transform(transform)
-            .style.set_table_styles(
-                [
-                    {"selector": "th", "props": [("background-color", "#11B6BD")]},
-                    {
-                        "selector": "tr:nth-child(even)",
-                        "props": [("background-color", "#11B6BD")],
-                    },
-                ]
-            )
-            .to_html(),
-            unsafe_allow_html=True,
-        )
-elif auth_status == False:
-    st.error("Username/Password is incorrect")
-if auth_status != True:
-    try:
-        username_forgot_pw, email_forgot_password, random_password = (
-            authenticator.forgot_password("Forgot password")
-        )
-        if username_forgot_pw:
-            st.success("New password sent securely")
-            # Random password to be transferred to user securely
-        elif username_forgot_pw == False:
-            st.error("Username not found")
-    except Exception as e:
-        st.error(e)

pages/11_Optimized_Result_Analysis.py DELETED Viewed

@@ -1,453 +0,0 @@
-import streamlit as st
-from numerize.numerize import numerize
-import pandas as pd
-from utilities import (format_numbers,decimal_formater,
-                       load_local_css,set_header,
-                       initialize_data,
-                       load_authenticator)
-import pickle
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-from classes import class_from_dict
-import plotly.express as px
-import numpy as np
-import plotly.graph_objects as go
-import pandas as pd
-from plotly.subplots import make_subplots
-import sqlite3
-from utilities import update_db
-def format_number(x):
-    if x >= 1_000_000:
-        return f'{x / 1_000_000:.2f}M'
-    elif x >= 1_000:
-        return f'{x / 1_000:.2f}K'
-    else:
-        return f'{x:.2f}'
-def summary_plot(data, x, y, title, text_column, color, format_as_percent=False, format_as_decimal=False):
-    fig = px.bar(data, x=x, y=y, orientation='h',
-                 title=title, text=text_column, color=color)
-    fig.update_layout(showlegend=False)
-    data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
-    # Update the format of the displayed text based on the chosen format
-    if format_as_percent:
-        fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
-    elif format_as_decimal:
-        fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
-    else:
-        fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
-    fig.update_layout(xaxis_title=x, yaxis_title='Channel Name', showlegend=False)
-    return fig
-def stacked_summary_plot(data, x, y, title, text_column, color_column, stack_column=None, format_as_percent=False, format_as_decimal=False):
-    fig = px.bar(data, x=x, y=y, orientation='h',
-                 title=title, text=text_column, color=color_column, facet_col=stack_column)
-    fig.update_layout(showlegend=False)
-    data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
-    # Update the format of the displayed text based on the chosen format
-    if format_as_percent:
-        fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
-    elif format_as_decimal:
-        fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
-    else:
-        fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
-    fig.update_layout(xaxis_title=x, yaxis_title='', showlegend=False)
-    return fig
-def funnel_plot(data, x, y, title, text_column, color_column, format_as_percent=False, format_as_decimal=False):
-    data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
-    # Round the numeric values in the text column to two decimal points
-    data[text_column] = data[text_column].round(2)
-    # Create a color map for categorical data
-    color_map = {category: f'rgb({i * 30 % 255},{i * 50 % 255},{i * 70 % 255})' for i, category in enumerate(data[color_column].unique())}
-    fig = go.Figure(go.Funnel(
-        y=data[y],
-        x=data[x],
-        text=data[text_column],
-        marker=dict(color=data[color_column].map(color_map)),
-        textinfo="value",
-        hoverinfo='y+x+text'
-    ))
-    # Update the format of the displayed text based on the chosen format
-    if format_as_percent:
-        fig.update_layout(title=title, funnelmode="percent")
-    elif format_as_decimal:
-        fig.update_layout(title=title, funnelmode="overlay")
-    else:
-        fig.update_layout(title=title, funnelmode="group")
-    return fig
-st.set_page_config(layout='wide')
-load_local_css('styles.css')
-set_header()
-# for k, v in st.session_state.items():
-#     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-#         st.session_state[k] = v
-st.empty()
-st.header('Model Result Analysis')
-spends_data=pd.read_excel('Overview_data_test.xlsx')
-with open('summary_df.pkl', 'rb') as file:
-  summary_df_sorted = pickle.load(file)
-  #st.write(summary_df_sorted)
-selected_scenario= st.selectbox('Select Saved Scenarios',['S1','S2'])
-summary_df_sorted=summary_df_sorted.sort_values(by=['Optimized_spend'],ascending=False)
-st.header('Optimized Spends Overview')
-___columns=st.columns(3)
-with ___columns[2]:
-    fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent',color='Channel_name')
-    st.plotly_chart(fig,use_container_width=True)
-with ___columns[0]:
-    fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend',color='Channel_name')
-    st.plotly_chart(fig,use_container_width=True)
-with ___columns[1]:
-    fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
-    st.plotly_chart(fig,use_container_width=False)
-st.header(' Budget Allocation')
-summary_df_sorted['Perc_alloted']=np.round(summary_df_sorted['Optimized_spend']/summary_df_sorted['Optimized_spend'].sum(),2)
-columns2=st.columns(2)
-with columns2[0]:
-    fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
-    st.plotly_chart(fig,use_container_width=True)
-with columns2[1]:
-    fig=summary_plot(summary_df_sorted, x='Perc_alloted', y='Channel_name', title='% Split', text_column='Perc_alloted',color='Channel_name',format_as_percent=True)
-    st.plotly_chart(fig,use_container_width=True)
-if 'raw_data' not in st.session_state:
-    st.session_state['raw_data']=pd.read_excel('raw_data_nov7_combined1.xlsx')
-    st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['MediaChannelName'].isin(summary_df_sorted['Channel_name'].unique())]
-    st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['Date'].isin(spends_data["Date"].unique())]
-#st.write(st.session_state['raw_data']['ResponseMetricName'])
-# st.write(st.session_state['raw_data'])
-st.header('Response Forecast Overview')
-raw_data=st.session_state['raw_data']
-effectiveness_overall=raw_data.groupby('ResponseMetricName').agg({'ResponseMetricValue': 'sum'}).reset_index()
-effectiveness_overall['Efficiency']=effectiveness_overall['ResponseMetricValue'].map(lambda x: x/raw_data['Media Spend'].sum() )
-# st.write(effectiveness_overall)
-columns6=st.columns(3)
-effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False,inplace=True)
-effectiveness_overall=np.round(effectiveness_overall,2)
-effectiveness_overall['ResponseMetric'] = effectiveness_overall['ResponseMetricName'].apply(lambda x: 'BAU' if 'BAU' in x else ('Gamified' if 'Gamified' in x else x))
-# effectiveness_overall=np.where(effectiveness_overall[effectiveness_overall['ResponseMetricName']=="Adjusted Account Approval BAU"],"Adjusted Account Approval BAU",effectiveness_overall['ResponseMetricName'])
-effectiveness_overall.replace({'ResponseMetricName':{'BAU approved clients - Appsflyer':'Approved clients - Appsflyer',
-                                                     'Gamified approved clients - Appsflyer':'Approved clients - Appsflyer'}},inplace=True)
-# st.write(effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False))
-condition = effectiveness_overall['ResponseMetricName'] == "Adjusted Account Approval BAU"
-condition1= effectiveness_overall['ResponseMetricName'] == "Approved clients - Appsflyer"
-effectiveness_overall['ResponseMetric'] = np.where(condition, "Adjusted Account Approval BAU", effectiveness_overall['ResponseMetric'])
-effectiveness_overall['ResponseMetricName'] = np.where(condition1, "Approved clients - Appsflyer (BAU, Gamified)", effectiveness_overall['ResponseMetricName'])
-# effectiveness_overall=pd.DataFrame({'ResponseMetricName':["App Installs - Appsflyer",'Account Requests - Appsflyer',
-#                                                           'Total Adjusted Account Approval','Adjusted Account Approval BAU',
-#                                                           'Approved clients - Appsflyer','Approved clients - Appsflyer'],
-#                                     'ResponseMetricValue':[683067,367020,112315,79768,36661,16834],
-#                                     'Efficiency':[1.24,0.67,0.2,0.14,0.07,0.03],
-custom_colors = {
-    'App Installs - Appsflyer': 'rgb(255, 135, 0)',       # Steel Blue (Blue)
-    'Account Requests - Appsflyer': 'rgb(125, 239, 161)',  # Cornflower Blue (Blue)
-    'Adjusted Account Approval': 'rgb(129, 200, 255)',      # Dodger Blue (Blue)
-    'Adjusted Account Approval BAU': 'rgb(255, 207, 98)',  # Light Sky Blue (Blue)
-    'Approved clients - Appsflyer': 'rgb(0, 97, 198)',  # Light Blue (Blue)
-    "BAU": 'rgb(41, 176, 157)',                              # Steel Blue (Blue)
-     "Gamified": 'rgb(213, 218, 229)'                      # Silver (Gray)
-    # Add more categories and their respective shades of blue as needed
-}
-with columns6[0]:
-    revenue=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Total Approved Accounts - Revenue']['ResponseMetricValue']).iloc[0]
-    revenue=round(revenue / 1_000_000, 2)
-#     st.metric('Total Revenue', f"${revenue} M")
-# with columns6[1]:
-#     BAU=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='BAU approved clients - Revenue']['ResponseMetricValue']).iloc[0]
-#     BAU=round(BAU / 1_000_000, 2)
-#     st.metric('BAU approved clients - Revenue', f"${BAU} M")
-# with columns6[2]:
-#     Gam=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Gamified approved clients - Revenue']['ResponseMetricValue']).iloc[0]
-#     Gam=round(Gam / 1_000_000, 2)
-#     st.metric('Gamified approved clients - Revenue', f"${Gam} M")
-# st.write(effectiveness_overall)
-data = {'Revenue': ['BAU approved clients - Revenue', 'Gamified approved clients- Revenue'],
-        'ResponseMetricValue': [70200000, 1770000],
-        'Efficiency':[127.54,3.21]}
-df = pd.DataFrame(data)
-columns9=st.columns([0.60,0.40])
-with columns9[0]:
-    figd = px.pie(df,
-              names='Revenue',
-              values='ResponseMetricValue',
-              hole=0.3,  # set the size of the hole in the donut
-              title='Effectiveness')
-    figd.update_layout(
-        margin=dict(l=0, r=0, b=0, t=0),width=100, height=180,legend=dict(
-        orientation='v',  # set orientation to horizontal
-        x=0,  # set x to 0 to move to the left
-        y=0.8  # adjust y as needed
-    )
-    )
-    st.plotly_chart(figd, use_container_width=True)
-with columns9[1]:
-    figd1 = px.pie(df,
-              names='Revenue',
-              values='Efficiency',
-              hole=0.3,  # set the size of the hole in the donut
-              title='Efficiency')
-    figd1.update_layout(
-    margin=dict(l=0, r=0, b=0, t=0),width=100,height=180,showlegend=False
-)
-    st.plotly_chart(figd1, use_container_width=True)
-effectiveness_overall['Response Metric Name']=effectiveness_overall['ResponseMetricName']
-columns4= st.columns([0.55,0.45])
-with columns4[0]:
-    fig=px.funnel(effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
-                                                                                          'BAU approved clients - Revenue',
-                                                                                          'Gamified approved clients - Revenue',
-                                                                                          "Total Approved Accounts - Appsflyer"]))],
-                                                                                            x='ResponseMetricValue', y='Response Metric Name',color='ResponseMetric',
-                                                                                            color_discrete_map=custom_colors,title='Effectiveness',
-                                                                                            labels=None)
-    custom_y_labels=['App Installs - Appsflyer','Account Requests - Appsflyer','Adjusted Account Approval','Adjusted Account Approval BAU',
-                     "Approved clients - Appsflyer (BAU, Gamified)"
-                     ]
-    fig.update_layout(showlegend=False,
-    yaxis=dict(
-        tickmode='array',
-        ticktext=custom_y_labels,
-        )
-        )
-    fig.update_traces(textinfo='value', textposition='inside', texttemplate='%{x:.2s} ', hoverinfo='y+x+percent initial')
-    last_trace_index = len(fig.data) - 1
-    fig.update_traces(marker=dict(line=dict(color='black', width=2)), selector=dict(marker=dict(color='blue')))
-    st.plotly_chart(fig,use_container_width=True)
-with columns4[1]:
-# Your existing code for creating the bar chart
-    fig1 = px.bar((effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
-                                                                                            'BAU approved clients - Revenue',
-                                                                                            'Gamified approved clients - Revenue',
-                                                                                            "Total Approved Accounts - Appsflyer"]))]).sort_values(by='ResponseMetricValue'),
-                x='Efficiency', y='Response Metric Name',
-                color_discrete_map=custom_colors, color='ResponseMetric',
-                labels=None,text_auto=True,title='Efficiency'
-                )
-    # Update layout and traces
-    fig1.update_traces(customdata=effectiveness_overall['Efficiency'],
-                   textposition='auto')
-    fig1.update_layout(showlegend=False)
-    fig1.update_yaxes(title='',showticklabels=False)
-    fig1.update_xaxes(title='',showticklabels=False)
-    fig1.update_xaxes(tickfont=dict(size=20))
-    fig1.update_yaxes(tickfont=dict(size=20))
-    st.plotly_chart(fig1, use_container_width=True)
-effectiveness_overall_revenue=pd.DataFrame({'ResponseMetricName':['Approved Clients','Approved Clients'],
-                                            'ResponseMetricValue':[70201070,1768900],
-                                            'Efficiency':[127.54,3.21],
-                                            'ResponseMetric':['BAU','Gamified']
-                                            })
-# from plotly.subplots import make_subplots
-# fig = make_subplots(rows=1, cols=2,
-#                     subplot_titles=["Effectiveness", "Efficiency"])
-# # Add first plot as subplot
-# fig.add_trace(go.Funnel(
-#     x = fig.data[0].x,
-#     y = fig.data[0].y,
-#     textinfo = 'value+percent initial',
-#     hoverinfo = 'x+y+percent initial'
-# ), row=1, col=1)
-# # Update layout for first subplot
-# fig.update_xaxes(title_text="Response Metric Value", row=1, col=1)
-# fig.update_yaxes(ticktext = custom_y_labels, row=1, col=1)
-# # Add second plot as subplot
-# fig.add_trace(go.Bar(
-#     x = fig1.data[0].x,
-#     y = fig1.data[0].y,
-#     customdata = fig1.data[0].customdata,
-#     textposition = 'auto'
-# ), row=1, col=2)
-# # Update layout for second subplot
-# fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
-# fig.update_yaxes(title='', showticklabels=False, row=1, col=2)
-# fig.update_layout(height=600, width=800, title_text="Key Metrics")
-# st.plotly_chart(fig)
-st.header('Return Forecast by Media Channel')
-with st.expander("Return Forecast by Media Channel"):
-    metric_data=[val for val in list(st.session_state['raw_data']['ResponseMetricName'].unique()) if val!=np.NaN]
-    # st.write(metric_data)
-    metric=st.selectbox('Select Metric',metric_data,index=1)
-    selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
-    # st.dataframe(selected_metric.head(2))
-    selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
-    effectiveness=selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()
-    effectiveness_df=pd.DataFrame({'Channel':effectiveness.index,"ResponseMetricValue":effectiveness.values})
-    summary_df_sorted=summary_df_sorted.merge(effectiveness_df,left_on="Channel_name",right_on='Channel')
-    #
-    summary_df_sorted['Efficiency'] = summary_df_sorted['ResponseMetricValue'] / summary_df_sorted['Optimized_spend']
-    summary_df_sorted=summary_df_sorted.sort_values(by='Optimized_spend',ascending=True)
-    #st.dataframe(summary_df_sorted)
-    channel_colors = px.colors.qualitative.Plotly
-    fig = make_subplots(rows=1, cols=3, subplot_titles=('Optimized Spends', 'Effectiveness', 'Efficiency'), horizontal_spacing=0.05)
-    for i, channel in enumerate(summary_df_sorted['Channel_name'].unique()):
-        channel_df = summary_df_sorted[summary_df_sorted['Channel_name'] == channel]
-        channel_color = channel_colors[i % len(channel_colors)]
-        fig.add_trace(go.Bar(x=channel_df['Optimized_spend'],
-                            y=channel_df['Channel_name'],
-                            text=channel_df['Optimized_spend'].apply(format_number),
-                            marker_color=channel_color,
-                            orientation='h'), row=1, col=1)
-        fig.add_trace(go.Bar(x=channel_df['ResponseMetricValue'],
-                            y=channel_df['Channel_name'],
-                            text=channel_df['ResponseMetricValue'].apply(format_number),
-                            marker_color=channel_color,
-                            orientation='h', showlegend=False), row=1, col=2)
-        fig.add_trace(go.Bar(x=channel_df['Efficiency'],
-                            y=channel_df['Channel_name'],
-                            text=channel_df['Efficiency'].apply(format_number),
-                            marker_color=channel_color,
-                            orientation='h', showlegend=False), row=1, col=3)
-    fig.update_layout(
-        height=600,
-        width=900,
-        title='Media Channel Performance',
-        showlegend=False
-    )
-    fig.update_yaxes(showticklabels=False ,row=1, col=2 )
-    fig.update_yaxes(showticklabels=False, row=1, col=3)
-    fig.update_xaxes(showticklabels=False, row=1, col=1)
-    fig.update_xaxes(showticklabels=False, row=1, col=2)
-    fig.update_xaxes(showticklabels=False, row=1, col=3)
-    st.plotly_chart(fig, use_container_width=True)
-    # columns= st.columns(3)
-    # with columns[0]:
-    #     fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='', text_column='Optimized_spend',color='Channel_name')
-    #     st.plotly_chart(fig,use_container_width=True)
-    # with columns[1]:
-    #     # effectiveness=(selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()).values
-    #     # effectiveness_df=pd.DataFrame({'Channel':st.session_state['raw_data']['MediaChannelName'].unique(),"ResponseMetricValue":effectiveness})
-    #     # # effectiveness.reset_index(inplace=True)
-    #     # # st.dataframe(effectiveness.head())
-    #     fig=summary_plot(summary_df_sorted, x='ResponseMetricValue', y='Channel_name', title='Effectiveness', text_column='ResponseMetricValue',color='Channel_name')
-    #     st.plotly_chart(fig,use_container_width=True)
-    # with columns[2]:
-    #     fig=summary_plot(summary_df_sorted, x='Efficiency', y='Channel_name', title='Efficiency', text_column='Efficiency',color='Channel_name',format_as_decimal=True)
-    #     st.plotly_chart(fig,use_container_width=True)
-# Create figure with subplots
-# fig = make_subplots(rows=1, cols=2)
-# # Add funnel plot to subplot 1
-# fig.add_trace(
-#     go.Funnel(
-#         x=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricValue'],
-#         y=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricName'],
-#         textposition="inside",
-#         texttemplate="%{x:.2s}",
-#         customdata=effectiveness_overall['Efficiency'],
-#         hovertemplate="%{customdata:.2f}<extra></extra>"
-#     ),
-#     row=1, col=1
-# )
-# # Add bar plot to subplot 2
-# fig.add_trace(
-#     go.Bar(
-#         x=effectiveness_overall.sort_values(by='ResponseMetricValue')['Efficiency'],
-#         y=effectiveness_overall.sort_values(by='ResponseMetricValue')['ResponseMetricName'],
-#         marker_color=effectiveness_overall['ResponseMetric'],
-#         customdata=effectiveness_overall['Efficiency'],
-#         hovertemplate="%{customdata:.2f}<extra></extra>",
-#         textposition="outside"
-#     ),
-#     row=1, col=2
-# )
-# # Update layout
-# fig.update_layout(title_text="Effectiveness")
-# fig.update_yaxes(title_text="", row=1, col=1)
-# fig.update_yaxes(title_text="", showticklabels=False, row=1, col=2)
-# fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
-# # Show figure
-# st.plotly_chart(fig)

pages/1_Data_Import.py DELETED Viewed

@@ -1,1547 +0,0 @@
-# Importing necessary libraries
-import streamlit as st
-import os
-# from Home_redirecting import home
-from utilities import update_db
-st.set_page_config(
-    page_title="Data Import",
-    page_icon=":shark:",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-import pickle
-import pandas as pd
-from utilities import set_header, load_local_css
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-import sqlite3
-load_local_css("styles.css")
-set_header()
-for k, v in st.session_state.items():
-    if (
-        k not in ["logout", "login", "config"]
-        and not k.startswith("FormSubmitter")
-        and not k.startswith("data-editor")
-    ):
-        st.session_state[k] = v
-with open("config.yaml") as file:
-    config = yaml.load(file, Loader=SafeLoader)
-    st.session_state["config"] = config
-authenticator = stauth.Authenticate(
-    config["credentials"],
-    config["cookie"]["name"],
-    config["cookie"]["key"],
-    config["cookie"]["expiry_days"],
-    config["preauthorized"],
-)
-st.session_state["authenticator"] = authenticator
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    authenticator.logout("Logout", "main")
-    is_state_initiaized = st.session_state.get("initialized", False)
-    if not is_state_initiaized:
-        if "session_name" not in st.session_state:
-            st.session_state["session_name"] = None
-    # Function to validate date column in dataframe
-    if "project_dct" not in st.session_state:
-        # home()
-        st.warning("please select a project from Home page")
-        st.stop()
-    def validate_date_column(df):
-        try:
-            # Attempt to convert the 'Date' column to datetime
-            df["date"] = pd.to_datetime(df["date"], format="%d-%m-%Y")
-            return True
-        except:
-            return False
-    # Function to determine data interval
-    def determine_data_interval(common_freq):
-        if common_freq == 1:
-            return "daily"
-        elif common_freq == 7:
-            return "weekly"
-        elif 28 <= common_freq <= 31:
-            return "monthly"
-        else:
-            return "irregular"
-    # Function to read each uploaded Excel file into a pandas DataFrame and stores them in a dictionary
-    st.cache_resource(show_spinner=False)
-    def files_to_dataframes(uploaded_files):
-        df_dict = {}
-        for uploaded_file in uploaded_files:
-            # Extract file name without extension
-            file_name = uploaded_file.name.rsplit(".", 1)[0]
-            # Check for duplicate file names
-            if file_name in df_dict:
-                st.warning(
-                    f"Duplicate File: {file_name}. This file will be skipped.",
-                    icon="⚠️",
-                )
-                continue
-            # Read the file into a DataFrame
-            df = pd.read_excel(uploaded_file)
-            # Convert all column names to lowercase
-            df.columns = df.columns.str.lower().str.strip()
-            # Separate numeric and non-numeric columns
-            numeric_cols = list(df.select_dtypes(include=["number"]).columns)
-            non_numeric_cols = [
-                col
-                for col in df.select_dtypes(exclude=["number"]).columns
-                if col.lower() != "date"
-            ]
-            # Check for 'Date' column
-            if not (validate_date_column(df) and len(numeric_cols) > 0):
-                st.warning(
-                    f"File Name: {file_name} ➜ Please upload data with Date column in 'DD-MM-YYYY' format and at least one media/exogenous column. This file will be skipped.",
-                    icon="⚠️",
-                )
-                continue
-            # Check for interval
-            common_freq = common_freq = (
-                pd.Series(df["date"].unique())
-                .diff()
-                .dt.days.dropna()
-                .mode()[0]
-            )
-            # Calculate the data interval (daily, weekly, monthly or irregular)
-            interval = determine_data_interval(common_freq)
-            if interval == "irregular":
-                st.warning(
-                    f"File Name: {file_name} ➜ Please upload data in daily, weekly or monthly interval. This file will be skipped.",
-                    icon="⚠️",
-                )
-                continue
-            # Store both DataFrames in the dictionary under their respective keys
-            df_dict[file_name] = {
-                "numeric": numeric_cols,
-                "non_numeric": non_numeric_cols,
-                "interval": interval,
-                "df": df,
-            }
-        return df_dict
-    # Function to adjust dataframe granularity
-    def adjust_dataframe_granularity(
-        df, current_granularity, target_granularity
-    ):
-        # Set index
-        df.set_index("date", inplace=True)
-        # Define aggregation rules for resampling
-        aggregation_rules = {
-            col: "sum" if pd.api.types.is_numeric_dtype(df[col]) else "first"
-            for col in df.columns
-        }
-        # Initialize resampled_df
-        resampled_df = df
-        if current_granularity == "daily" and target_granularity == "weekly":
-            resampled_df = df.resample(
-                "W-MON", closed="left", label="left"
-            ).agg(aggregation_rules)
-        elif (
-            current_granularity == "daily" and target_granularity == "monthly"
-        ):
-            resampled_df = df.resample("MS", closed="left", label="left").agg(
-                aggregation_rules
-            )
-        elif current_granularity == "daily" and target_granularity == "daily":
-            resampled_df = df.resample("D").agg(aggregation_rules)
-        elif (
-            current_granularity in ["weekly", "monthly"]
-            and target_granularity == "daily"
-        ):
-            # For higher to lower granularity, distribute numeric and replicate non-numeric values equally across the new period
-            expanded_data = []
-            for _, row in df.iterrows():
-                if current_granularity == "weekly":
-                    period_range = pd.date_range(start=row.name, periods=7)
-                elif current_granularity == "monthly":
-                    period_range = pd.date_range(
-                        start=row.name, periods=row.name.days_in_month
-                    )
-                for date in period_range:
-                    new_row = {}
-                    for col in df.columns:
-                        if pd.api.types.is_numeric_dtype(df[col]):
-                            if current_granularity == "weekly":
-                                new_row[col] = row[col] / 7
-                            elif current_granularity == "monthly":
-                                new_row[col] = (
-                                    row[col] / row.name.days_in_month
-                                )
-                        else:
-                            new_row[col] = row[col]
-                    expanded_data.append((date, new_row))
-            resampled_df = pd.DataFrame(
-                [data for _, data in expanded_data],
-                index=[date for date, _ in expanded_data],
-            )
-        # Reset index
-        resampled_df = resampled_df.reset_index().rename(
-            columns={"index": "date"}
-        )
-        return resampled_df
-    # Function to clean and extract unique values of Panel_1 and Panel_2
-    st.cache_resource(show_spinner=False)
-    def clean_and_extract_unique_values(files_dict, selections):
-        all_panel1_values = set()
-        all_panel2_values = set()
-        for file_name, file_data in files_dict.items():
-            df = file_data["df"]
-            # 'Panel_1' and 'Panel_2' selections
-            selected_panel1 = selections[file_name].get("Panel_1")
-            selected_panel2 = selections[file_name].get("Panel_2")
-            # Clean and standardize Panel_1 column if it exists and is selected
-            if (
-                selected_panel1
-                and selected_panel1 != "N/A"
-                and selected_panel1 in df.columns
-            ):
-                df[selected_panel1] = (
-                    df[selected_panel1]
-                    .str.lower()
-                    .str.strip()
-                    .str.replace("_", " ")
-                )
-                all_panel1_values.update(df[selected_panel1].dropna().unique())
-            # Clean and standardize Panel_2 column if it exists and is selected
-            if (
-                selected_panel2
-                and selected_panel2 != "N/A"
-                and selected_panel2 in df.columns
-            ):
-                df[selected_panel2] = (
-                    df[selected_panel2]
-                    .str.lower()
-                    .str.strip()
-                    .str.replace("_", " ")
-                )
-                all_panel2_values.update(df[selected_panel2].dropna().unique())
-            # Update the processed DataFrame back in the dictionary
-            files_dict[file_name]["df"] = df
-        return all_panel1_values, all_panel2_values
-    # Function to format values for display
-    st.cache_resource(show_spinner=False)
-    def format_values_for_display(values_list):
-        # Capitalize the first letter of each word and replace underscores with spaces
-        formatted_list = [
-            value.replace("_", " ").title() for value in values_list
-        ]
-        # Join values with commas and 'and' before the last value
-        if len(formatted_list) > 1:
-            return (
-                ", ".join(formatted_list[:-1]) + ", and " + formatted_list[-1]
-            )
-        elif formatted_list:
-            return formatted_list[0]
-        return "No values available"
-    # Function to normalizes all data within files_dict to a daily granularity
-    st.cache(show_spinner=False, allow_output_mutation=True)
-    def standardize_data_to_daily(files_dict, selections):
-        # Normalize all data to a daily granularity using a provided function
-        files_dict = apply_granularity_to_all(files_dict, "daily", selections)
-        # Update the "interval" attribute for each dataset to indicate the new granularity
-        for files_name, files_data in files_dict.items():
-            files_data["interval"] = "daily"
-        return files_dict
-    # Function to apply granularity transformation to all DataFrames in files_dict
-    st.cache_resource(show_spinner=False)
-    def apply_granularity_to_all(
-        files_dict, granularity_selection, selections
-    ):
-        for file_name, file_data in files_dict.items():
-            df = file_data["df"].copy()
-            # Handling when Panel_1 or Panel_2 might be 'N/A'
-            selected_panel1 = selections[file_name].get("Panel_1")
-            selected_panel2 = selections[file_name].get("Panel_2")
-            # Correcting the segment selection logic & handling 'N/A'
-            if selected_panel1 != "N/A" and selected_panel2 != "N/A":
-                unique_combinations = df[
-                    [selected_panel1, selected_panel2]
-                ].drop_duplicates()
-            elif selected_panel1 != "N/A":
-                unique_combinations = df[[selected_panel1]].drop_duplicates()
-                selected_panel2 = None  # Ensure Panel_2 is ignored if N/A
-            elif selected_panel2 != "N/A":
-                unique_combinations = df[[selected_panel2]].drop_duplicates()
-                selected_panel1 = None  # Ensure Panel_1 is ignored if N/A
-            else:
-                # If both are 'N/A', process the entire dataframe as is
-                df = adjust_dataframe_granularity(
-                    df, file_data["interval"], granularity_selection
-                )
-                files_dict[file_name]["df"] = df
-                continue  # Skip to the next file
-            transformed_segments = []
-            for _, combo in unique_combinations.iterrows():
-                if selected_panel1 and selected_panel2:
-                    segment = df[
-                        (df[selected_panel1] == combo[selected_panel1])
-                        & (df[selected_panel2] == combo[selected_panel2])
-                    ]
-                elif selected_panel1:
-                    segment = df[df[selected_panel1] == combo[selected_panel1]]
-                elif selected_panel2:
-                    segment = df[df[selected_panel2] == combo[selected_panel2]]
-                # Adjust granularity of the segment
-                transformed_segment = adjust_dataframe_granularity(
-                    segment, file_data["interval"], granularity_selection
-                )
-                transformed_segments.append(transformed_segment)
-            # Combine all transformed segments into a single DataFrame for this file
-            transformed_df = pd.concat(transformed_segments, ignore_index=True)
-            files_dict[file_name]["df"] = transformed_df
-        return files_dict
-    # Function to create main dataframe structure
-    st.cache_resource(show_spinner=False)
-    def create_main_dataframe(
-        files_dict, all_panel1_values, all_panel2_values, granularity_selection
-    ):
-        # Determine the global start and end dates across all DataFrames
-        global_start = min(
-            df["df"]["date"].min() for df in files_dict.values()
-        )
-        global_end = max(df["df"]["date"].max() for df in files_dict.values())
-        # Adjust the date_range generation based on the granularity_selection
-        if granularity_selection == "weekly":
-            # Generate a weekly range, with weeks starting on Monday
-            date_range = pd.date_range(
-                start=global_start, end=global_end, freq="W-MON"
-            )
-        elif granularity_selection == "monthly":
-            # Generate a monthly range, starting from the first day of each month
-            date_range = pd.date_range(
-                start=global_start, end=global_end, freq="MS"
-            )
-        else:  # Default to daily if not weekly or monthly
-            date_range = pd.date_range(
-                start=global_start, end=global_end, freq="D"
-            )
-        # Collect all unique Panel_1 and Panel_2 values, excluding 'N/A'
-        all_panel1s = all_panel1_values
-        all_panel2s = all_panel2_values
-        # Dynamically build the list of dimensions (Panel_1, Panel_2) to include in the main DataFrame based on availability
-        dimensions, merge_keys = [], []
-        if all_panel1s:
-            dimensions.append(all_panel1s)
-            merge_keys.append("Panel_1")
-        if all_panel2s:
-            dimensions.append(all_panel2s)
-            merge_keys.append("Panel_2")
-        dimensions.append(date_range)  # Date range is always included
-        merge_keys.append("date")  # Date range is always included
-        # Create a main DataFrame template with the dimensions
-        main_df = pd.MultiIndex.from_product(
-            dimensions,
-            names=[name for name, _ in zip(merge_keys, dimensions)],
-        ).to_frame(index=False)
-        return main_df.reset_index(drop=True)
-    # Function to prepare and merge dataFrames
-    st.cache_resource(show_spinner=False)
-    def merge_into_main_df(main_df, files_dict, selections):
-        for file_name, file_data in files_dict.items():
-            df = file_data["df"].copy()
-            # Rename selected Panel_1 and Panel_2 columns if not 'N/A'
-            selected_panel1 = selections[file_name].get("Panel_1", "N/A")
-            selected_panel2 = selections[file_name].get("Panel_2", "N/A")
-            if selected_panel1 != "N/A":
-                df.rename(columns={selected_panel1: "Panel_1"}, inplace=True)
-            if selected_panel2 != "N/A":
-                df.rename(columns={selected_panel2: "Panel_2"}, inplace=True)
-            # Merge current DataFrame into main_df based on 'date', and where applicable, 'Panel_1' and 'Panel_2'
-            merge_keys = ["date"]
-            if "Panel_1" in df.columns:
-                merge_keys.append("Panel_1")
-            if "Panel_2" in df.columns:
-                merge_keys.append("Panel_2")
-            main_df = pd.merge(main_df, df, on=merge_keys, how="left")
-        # After all merges, sort by 'date' and reset index for cleanliness
-        sort_by = ["date"]
-        if "Panel_1" in main_df.columns:
-            sort_by.append("Panel_1")
-        if "Panel_2" in main_df.columns:
-            sort_by.append("Panel_2")
-        main_df.sort_values(by=sort_by, inplace=True)
-        main_df.reset_index(drop=True, inplace=True)
-        return main_df
-    # Function to categorize column
-    def categorize_column(column_name):
-        # Define keywords for each category
-        internal_keywords = [
-            "Price",
-            "Discount",
-            "product_price",
-            "cost",
-            "margin",
-            "inventory",
-            "sales",
-            "revenue",
-            "turnover",
-            "expense",
-        ]
-        exogenous_keywords = [
-            "GDP",
-            "Tax",
-            "Inflation",
-            "interest_rate",
-            "employment_rate",
-            "exchange_rate",
-            "consumer_spending",
-            "retail_sales",
-            "oil_prices",
-            "weather",
-        ]
-        # Check if the column name matches any of the keywords for Internal or Exogenous categories
-        if (
-            column_name
-            in st.session_state["project_dct"]["data_import"]["cat_dct"].keys()
-            and st.session_state["project_dct"]["data_import"]["cat_dct"][
-                column_name
-            ]
-            is not None
-        ):
-            return st.session_state["project_dct"]["data_import"]["cat_dct"][
-                column_name
-            ]  # resume project manoj
-        else:
-            for keyword in internal_keywords:
-                if keyword.lower() in column_name.lower():
-                    return "Internal"
-            for keyword in exogenous_keywords:
-                if keyword.lower() in column_name.lower():
-                    return "Exogenous"
-            # Default to Media if no match found
-            return "Media"
-    # Function to calculate missing stats and prepare for editable DataFrame
-    st.cache_resource(show_spinner=False)
-    def prepare_missing_stats_df(df):
-        missing_stats = []
-        for column in df.columns:
-            if (
-                column == "date" or column == "Panel_2" or column == "Panel_1"
-            ):  # Skip Date, Panel_1 and Panel_2 column
-                continue
-            missing = df[column].isnull().sum()
-            pct_missing = round((missing / len(df)) * 100, 2)
-            # Dynamically assign category based on column name
-            category = categorize_column(column)
-            # category = "Media"  # Keep default bin as Media
-            missing_stats.append(
-                {
-                    "Column": column,
-                    "Missing Values": missing,
-                    "Missing Percentage": pct_missing,
-                    "Impute Method": "Fill with 0",  # Default value
-                    "Category": category,
-                }
-            )
-        stats_df = pd.DataFrame(missing_stats)
-        return stats_df
-    # Function to add API DataFrame details to the files dictionary
-    st.cache_resource(show_spinner=False)
-    def add_api_dataframe_to_dict(main_df, files_dict):
-        files_dict["API"] = {
-            "numeric": list(main_df.select_dtypes(include=["number"]).columns),
-            "non_numeric": [
-                col
-                for col in main_df.select_dtypes(exclude=["number"]).columns
-                if col.lower() != "date"
-            ],
-            "interval": determine_data_interval(
-                pd.Series(main_df["date"].unique())
-                .diff()
-                .dt.days.dropna()
-                .mode()[0]
-            ),
-            "df": main_df,
-        }
-        return files_dict
-    # Function to reads an API into a DataFrame, parsing specified columns as datetime
-    @st.cache_resource(show_spinner=False)
-    def read_API_data():
-        return pd.read_excel(
-            r"./upf_data_converted_randomized_resp_metrics.xlsx",
-            parse_dates=["Date"],
-        )
-    # Function to set the 'Panel_1_Panel_2_Selected' session state variable to False
-    def set_Panel_1_Panel_2_Selected_false():
-        st.session_state["Panel_1_Panel_2_Selected"] = False
-        # restoring project_dct to default values when user modify any widjets
-        st.session_state["project_dct"]["data_import"][
-            "edited_stats_df"
-        ] = None
-        st.session_state["project_dct"]["data_import"]["merged_df"] = None
-        st.session_state["project_dct"]["data_import"][
-            "missing_stats_df"
-        ] = None
-        st.session_state["project_dct"]["data_import"]["cat_dct"] = {}
-        st.session_state["project_dct"]["data_import"][
-            "numeric_columns"
-        ] = None
-        st.session_state["project_dct"]["data_import"]["default_df"] = None
-        st.session_state["project_dct"]["data_import"]["final_df"] = None
-        st.session_state["project_dct"]["data_import"]["edited_df"] = None
-    # Function to serialize and save the objects into a pickle file
-    @st.cache_resource(show_spinner=False)
-    def save_to_pickle(file_path, final_df, bin_dict):
-        # Open the file in write-binary mode and dump the objects
-        with open(file_path, "wb") as f:
-            pickle.dump({"final_df": final_df, "bin_dict": bin_dict}, f)
-            # Data is now saved to file
-    # Function to processes the merged_df DataFrame based on operations defined in edited_df
-    @st.cache_resource(show_spinner=False)
-    def process_dataframes(merged_df, edited_df, edited_stats_df):
-        # Ensure there are operations defined by the user
-        if edited_df.empty:
-            return merged_df, edited_stats_df  # No operations to apply
-        # Perform operations as defined by the user
-        else:
-            for index, row in edited_df.iterrows():
-                result_column_name = (
-                    f"{row['Column 1']}{row['Operator']}{row['Column 2']}"
-                )
-                col1 = row["Column 1"]
-                col2 = row["Column 2"]
-                op = row["Operator"]
-                # Apply the specified operation
-                if op == "+":
-                    merged_df[result_column_name] = (
-                        merged_df[col1] + merged_df[col2]
-                    )
-                elif op == "-":
-                    merged_df[result_column_name] = (
-                        merged_df[col1] - merged_df[col2]
-                    )
-                elif op == "*":
-                    merged_df[result_column_name] = (
-                        merged_df[col1] * merged_df[col2]
-                    )
-                elif op == "/":
-                    merged_df[result_column_name] = merged_df[
-                        col1
-                    ] / merged_df[col2].replace(0, 1e-9)
-                # Add summary of operation to edited_stats_df
-                new_row = {
-                    "Column": result_column_name,
-                    "Missing Values": None,
-                    "Missing Percentage": None,
-                    "Impute Method": None,
-                    "Category": row["Category"],
-                }
-                new_row_df = pd.DataFrame([new_row])
-                # Use pd.concat to add the new_row_df to edited_stats_df
-                edited_stats_df = pd.concat(
-                    [edited_stats_df, new_row_df], ignore_index=True, axis=0
-                )
-            # Combine column names from edited_df for cleanup
-            combined_columns = set(edited_df["Column 1"]).union(
-                set(edited_df["Column 2"])
-            )
-            # Filter out rows in edited_stats_df and drop columns from merged_df
-            edited_stats_df = edited_stats_df[
-                ~edited_stats_df["Column"].isin(combined_columns)
-            ]
-            merged_df.drop(
-                columns=list(combined_columns), errors="ignore", inplace=True
-            )
-            return merged_df, edited_stats_df
-    # Function to prepare a list of numeric column names and initialize an empty DataFrame with predefined structure
-    st.cache_resource(show_spinner=False)
-    def prepare_numeric_columns_and_default_df(merged_df, edited_stats_df):
-        # Get columns categorized as 'Response Metrics'
-        columns_response_metrics = edited_stats_df[
-            edited_stats_df["Category"] == "Response Metrics"
-        ]["Column"].tolist()
-        # Filter numeric columns, excluding those categorized as 'Response Metrics'
-        numeric_columns = [
-            col
-            for col in merged_df.select_dtypes(include=["number"]).columns
-            if col not in columns_response_metrics
-        ]
-        # Define the structure of the empty DataFrame
-        data = {
-            "Column 1": pd.Series([], dtype="str"),
-            "Operator": pd.Series([], dtype="str"),
-            "Column 2": pd.Series([], dtype="str"),
-            "Category": pd.Series([], dtype="str"),
-        }
-        default_df = pd.DataFrame(data)
-        return numeric_columns, default_df
-    # function to reset to default values in project_dct:
-    # Initialize 'final_df' in session state
-    if "final_df" not in st.session_state:
-        st.session_state["final_df"] = pd.DataFrame()
-    # Initialize 'bin_dict' in session state
-    if "bin_dict" not in st.session_state:
-        st.session_state["bin_dict"] = {}
-    # Initialize 'Panel_1_Panel_2_Selected' in session state
-    if "Panel_1_Panel_2_Selected" not in st.session_state:
-        st.session_state["Panel_1_Panel_2_Selected"] = False
-    # Page Title
-    st.write("")  # Top padding
-    st.title("Data Import")
-    conn = sqlite3.connect(
-        r"DB\User.db", check_same_thread=False
-    )  # connection with sql db
-    c = conn.cursor()
-    #########################################################################################################################################################
-    # Create a dictionary to hold all DataFrames and collect user input to specify "Panel_2" and "Panel_1" columns for each file
-    #########################################################################################################################################################
-    # Read the Excel file, parsing 'Date' column as datetime
-    main_df = read_API_data()
-    # Convert all column names to lowercase
-    main_df.columns = main_df.columns.str.lower().str.strip()
-    # File uploader
-    uploaded_files = st.file_uploader(
-        "Upload additional data",
-        type=["xlsx"],
-        accept_multiple_files=True,
-        on_change=set_Panel_1_Panel_2_Selected_false,
-    )
-    # Custom HTML for upload instructions
-    recommendation_html = f"""
-    <div style="text-align: justify;">
-    <strong>Recommendation:</strong> For optimal processing, please ensure that all uploaded datasets including panel, media, internal, and exogenous data adhere to the following guidelines: Each dataset must include a <code>Date</code> column formatted as <code>DD-MM-YYYY</code>, be free of missing values.
-    </div>
-    """
-    st.markdown(recommendation_html, unsafe_allow_html=True)
-    # Choose Desired Granularity
-    st.markdown("#### Choose Desired Granularity")
-    # Granularity Selection
-    granularity_selection = st.selectbox(
-        "Choose Date Granularity",
-        ["Daily", "Weekly", "Monthly"],
-        label_visibility="collapsed",
-        on_change=set_Panel_1_Panel_2_Selected_false,
-        index=st.session_state["project_dct"]["data_import"][
-            "granularity_selection"
-        ],  # resume
-    )
-    # st.write(st.session_state['project_dct']['data_import']['granularity_selection'])
-    st.session_state["project_dct"]["data_import"]["granularity_selection"] = [
-        "Daily",
-        "Weekly",
-        "Monthly",
-    ].index(granularity_selection)
-    # st.write(st.session_state['project_dct']['data_import']['granularity_selection'])
-    granularity_selection = str(granularity_selection).lower()
-    # Convert files to dataframes
-    files_dict = files_to_dataframes(uploaded_files)
-    # Add API Dataframe
-    if main_df is not None:
-        files_dict = add_api_dataframe_to_dict(main_df, files_dict)
-    # Display a warning message if no files have been uploaded and halt further execution
-    if not files_dict:
-        st.warning(
-            "Please upload at least one file to proceed.",
-            icon="⚠️",
-        )
-        st.stop()  # Halts further execution until file is uploaded
-    # Select Panel_1 and Panel_2 columns
-    st.markdown("#### Select Panel columns")
-    selections = {}
-    with st.expander("Select Panel columns", expanded=False):
-        count = (
-            0  # Initialize counter to manage the visibility of labels and keys
-        )
-        for file_name, file_data in files_dict.items():
-            # generatimg project dct keys dynamically
-            if (
-                f"Panel_1_selectbox{file_name}"
-                not in st.session_state["project_dct"]["data_import"].keys()
-            ):
-                st.session_state["project_dct"]["data_import"][
-                    f"Panel_1_selectbox{file_name}"
-                ] = 0
-            if (
-                f"Panel_2_selectbox{file_name}"
-                not in st.session_state["project_dct"]["data_import"].keys()
-            ):
-                st.session_state["project_dct"]["data_import"][
-                    f"Panel_2_selectbox{file_name}"
-                ] = 0
-            # Determine visibility of the label based on the count
-            if count == 0:
-                label_visibility = "visible"
-            else:
-                label_visibility = "collapsed"
-            # Extract non-numeric columns
-            non_numeric_cols = file_data["non_numeric"]
-            # Prepare Panel_1 and Panel_2 values for dropdown, adding "N/A" as an option
-            panel1_values = non_numeric_cols + ["N/A"]
-            panel2_values = non_numeric_cols + ["N/A"]
-            # Skip if only one option is available
-            if len(panel1_values) == 1 and len(panel2_values) == 1:
-                selected_panel1, selected_panel2 = "N/A", "N/A"
-                # Update the selections for Panel_1 and Panel_2 for the current file
-                selections[file_name] = {
-                    "Panel_1": selected_panel1,
-                    "Panel_2": selected_panel2,
-                }
-                continue
-            # Create layout columns for File Name, Panel_2, and Panel_1 selections
-            file_name_col, Panel_1_col, Panel_2_col = st.columns([2, 4, 4])
-            with file_name_col:
-                # Display "File Name" label only for the first file
-                if count == 0:
-                    st.write("File Name")
-                else:
-                    st.write("")
-                st.write(file_name)  # Display the file name
-            with Panel_1_col:
-                # Display a selectbox for Panel_1 values
-                selected_panel1 = st.selectbox(
-                    "Select Panel Level 1",
-                    panel2_values,
-                    on_change=set_Panel_1_Panel_2_Selected_false,
-                    label_visibility=label_visibility,  # Control visibility of the label
-                    key=f"Panel_1_selectbox{count}",  # Ensure unique key for each selectbox
-                    index=st.session_state["project_dct"]["data_import"][
-                        f"Panel_1_selectbox{file_name}"
-                    ],
-                )
-                st.session_state["project_dct"]["data_import"][
-                    f"Panel_1_selectbox{file_name}"
-                ] = panel2_values.index(selected_panel1)
-            with Panel_2_col:
-                # Display a selectbox for Panel_2 values
-                selected_panel2 = st.selectbox(
-                    "Select Panel Level 2",
-                    panel1_values,
-                    on_change=set_Panel_1_Panel_2_Selected_false,
-                    label_visibility=label_visibility,  # Control visibility of the label
-                    key=f"Panel_2_selectbox{count}",  # Ensure unique key for each selectbox
-                    index=st.session_state["project_dct"]["data_import"][
-                        f"Panel_2_selectbox{file_name}"
-                    ],
-                )
-            st.session_state["project_dct"]["data_import"][
-                f"Panel_2_selectbox{file_name}"
-            ] = panel1_values.index(selected_panel2)
-            # st.write(st.session_state['project_dct']['data_import'][f"Panel_2_selectbox{file_name}"])
-            # Skip processing if the same column is selected for both Panel_1 and Panel_2 due to potential data integrity issues
-            if selected_panel2 == selected_panel1 and not (
-                selected_panel2 == "N/A" and selected_panel1 == "N/A"
-            ):
-                st.warning(
-                    f"File: {file_name} → The same column cannot serve as both Panel_1 and Panel_2. Please adjust your selections.",
-                )
-                selected_panel1, selected_panel2 = "N/A", "N/A"
-                st.stop()
-            # Update the selections for Panel_1 and Panel_2 for the current file
-            selections[file_name] = {
-                "Panel_1": selected_panel1,
-                "Panel_2": selected_panel2,
-            }
-            count += 1  # Increment the counter after processing each file
-        st.write()
-        # Accept Panel_1 and Panel_2 selection
-        accept = st.button(
-            "Accept and Process", use_container_width=True
-        )  # resume project manoj
-    if (
-        accept == False
-        and st.session_state["project_dct"]["data_import"]["edited_stats_df"]
-        is not None
-    ):
-        # st.write(st.session_state['project_dct'])
-        st.markdown("#### Unique Panel values")
-        # Display Panel_1 and Panel_2 values
-        with st.expander("Unique Panel values"):
-            st.write("")
-            st.markdown(
-                f"""
-            <style>
-            .justify-text {{
-            text-align: justify;
-            }}
-            </style>
-            <div class="justify-text">
-            <strong>Panel Level 1 Values:</strong> {st.session_state['project_dct']['data_import']['formatted_panel1_values']}<br>
-            <strong>Panel Level 2 Values:</strong> {st.session_state['project_dct']['data_import']['formatted_panel2_values']}
-            </div>
-            """,
-                unsafe_allow_html=True,
-            )
-            # Display total Panel_1 and Panel_2
-            st.write("")
-            st.markdown(
-                f"""
-            <div style="text-align: justify;">
-                <strong>Number of Level 1 Panels detected:</strong> {len(st.session_state['project_dct']['data_import']['formatted_panel2_values'])}<br>
-                <strong>Number of Level 2 Panels detected:</strong> {len(st.session_state['project_dct']['data_import']['formatted_panel2_values'])}
-            </div>
-            """,
-                unsafe_allow_html=True,
-            )
-            st.write("")
-            # Create an editable DataFrame in Streamlit
-        st.markdown("#### Select Variables Category & Impute Missing Values")
-        # data_temp_path=os.path.join(st.session_state['project_path'],"edited_stats_df.pkl")
-        # with open(data_temp_path,"rb") as f:
-        #     saved_edited_stats_df=pickle.load(f)
-        # a=st.data_editor(saved_edited_stats_df)
-        merged_df = st.session_state["project_dct"]["data_import"][
-            "merged_df"
-        ].copy()
-        missing_stats_df = st.session_state["project_dct"]["data_import"][
-            "missing_stats_df"
-        ]
-        edited_stats_df = st.data_editor(
-            st.session_state["project_dct"]["data_import"]["edited_stats_df"],
-            column_config={
-                "Impute Method": st.column_config.SelectboxColumn(
-                    options=[
-                        "Drop Column",
-                        "Fill with Mean",
-                        "Fill with Median",
-                        "Fill with 0",
-                    ],
-                    required=True,
-                    default="Fill with 0",
-                ),
-                "Category": st.column_config.SelectboxColumn(
-                    options=[
-                        "Media",
-                        "Exogenous",
-                        "Internal",
-                        "Response Metrics",
-                    ],
-                    required=True,
-                    default="Media",
-                ),
-            },
-            disabled=["Column", "Missing Values", "Missing Percentage"],
-            hide_index=True,
-            use_container_width=True,
-            key="data-editor-1",
-        )
-        st.session_state["project_dct"]["data_import"]["cat_dct"] = {
-            col: cat
-            for col, cat in zip(
-                edited_stats_df["Column"], edited_stats_df["Category"]
-            )
-        }
-        for i, row in edited_stats_df.iterrows():
-            column = row["Column"]
-            if row["Impute Method"] == "Drop Column":
-                merged_df.drop(columns=[column], inplace=True)
-            elif row["Impute Method"] == "Fill with Mean":
-                merged_df[column].fillna(
-                    st.session_state["project_dct"]["data_import"][
-                        "merged_df"
-                    ][column].mean(),
-                    inplace=True,
-                )
-            elif row["Impute Method"] == "Fill with Median":
-                merged_df[column].fillna(
-                    st.session_state["project_dct"]["data_import"][
-                        "merged_df"
-                    ][column].median(),
-                    inplace=True,
-                )
-            elif row["Impute Method"] == "Fill with 0":
-                merged_df[column].fillna(0, inplace=True)
-        # st.session_state['project_dct']['data_import']['edited_stats_df']=edited_stats_df
-        #########################################################################################################################################################
-        # Group columns
-        #########################################################################################################################################################
-        # Display Group columns header
-        numeric_columns = st.session_state["project_dct"]["data_import"][
-            "numeric_columns"
-        ]
-        default_df = st.session_state["project_dct"]["data_import"][
-            "default_df"
-        ]
-        st.markdown("#### Feature engineering")
-        edited_df = st.data_editor(
-            st.session_state["project_dct"]["data_import"]["edited_df"],
-            column_config={
-                "Column 1": st.column_config.SelectboxColumn(
-                    options=numeric_columns,
-                    required=True,
-                    width=400,
-                ),
-                "Operator": st.column_config.SelectboxColumn(
-                    options=["+", "-", "*", "/"],
-                    required=True,
-                    default="+",
-                    width=100,
-                ),
-                "Column 2": st.column_config.SelectboxColumn(
-                    options=numeric_columns,
-                    required=True,
-                    default=numeric_columns[0],
-                    width=400,
-                ),
-                "Category": st.column_config.SelectboxColumn(
-                    options=[
-                        "Media",
-                        "Exogenous",
-                        "Internal",
-                        "Response Metrics",
-                    ],
-                    required=True,
-                    default="Media",
-                    width=200,
-                ),
-            },
-            num_rows="dynamic",
-            key="data-editor-4",
-        )
-        final_df, edited_stats_df = process_dataframes(
-            merged_df, edited_df, edited_stats_df
-        )
-        st.markdown("#### Final DataFrame")
-        st.dataframe(final_df, hide_index=True)
-        # Initialize an empty dictionary to hold categories and their variables
-        category_dict = {}
-        # Iterate over each row in the edited DataFrame to populate the dictionary
-        for i, row in edited_stats_df.iterrows():
-            column = row["Column"]
-            category = row[
-                "Category"
-            ]  # The category chosen by the user for this variable
-            # Check if the category already exists in the dictionary
-            if category not in category_dict:
-                # If not, initialize it with the current column as its first element
-                category_dict[category] = [column]
-            else:
-                # If it exists, append the current column to the list of variables under this category
-                category_dict[category].append(column)
-        # Add Date, Panel_1 and Panel_12 in category dictionary
-        category_dict.update({"Date": ["date"]})
-        if "Panel_1" in final_df.columns:
-            category_dict["Panel Level 1"] = ["Panel_1"]
-        if "Panel_2" in final_df.columns:
-            category_dict["Panel Level 2"] = ["Panel_2"]
-        # Display the dictionary
-        st.markdown("#### Variable Category")
-        for category, variables in category_dict.items():
-            # Check if there are multiple variables to handle "and" insertion correctly
-            if len(variables) > 1:
-                # Join all but the last variable with ", ", then add " and " before the last variable
-                variables_str = (
-                    ", ".join(variables[:-1]) + " and " + variables[-1]
-                )
-            else:
-                # If there's only one variable, no need for "and"
-                variables_str = variables[0]
-            # Display the category and its variables in the desired format
-            st.markdown(
-                f"<div style='text-align: justify;'><strong>{category}:</strong> {variables_str}</div>",
-                unsafe_allow_html=True,
-            )
-        # Function to check if Response Metrics is selected
-        st.write("")
-        response_metrics_col = category_dict.get("Response Metrics", [])
-        if len(response_metrics_col) == 0:
-            st.warning("Please select Response Metrics column", icon="⚠️")
-            st.stop()
-        # elif len(response_metrics_col) > 1:
-        #     st.warning("Please select only one Response Metrics column", icon="⚠️")
-        #     st.stop()
-        # Store final dataframe and bin dictionary into session state
-        st.session_state["final_df"], st.session_state["bin_dict"] = (
-            final_df,
-            category_dict,
-        )
-        # Save the DataFrame and dictionary from the session state to the pickle file
-        if st.button(
-            "Accept and Save",
-            use_container_width=True,
-            key="data-editor-button",
-        ):
-            print("test*************")
-            update_db("1_Data_Import.py")
-            final_df = final_df.loc[:, ~final_df.columns.duplicated()]
-            project_dct_path = os.path.join(
-                st.session_state["project_path"], "project_dct.pkl"
-            )
-            with open(project_dct_path, "wb") as f:
-                pickle.dump(st.session_state["project_dct"], f)
-            data_path = os.path.join(
-                st.session_state["project_path"], "data_import.pkl"
-            )
-            st.session_state["data_path"] = data_path
-            save_to_pickle(
-                data_path,
-                st.session_state["final_df"],
-                st.session_state["bin_dict"],
-            )
-            st.session_state["project_dct"]["data_import"][
-                "edited_stats_df"
-            ] = edited_stats_df
-            st.session_state["project_dct"]["data_import"][
-                "merged_df"
-            ] = merged_df
-            st.session_state["project_dct"]["data_import"][
-                "missing_stats_df"
-            ] = missing_stats_df
-            st.session_state["project_dct"]["data_import"]["cat_dct"] = {
-                col: cat
-                for col, cat in zip(
-                    edited_stats_df["Column"], edited_stats_df["Category"]
-                )
-            }
-            st.session_state["project_dct"]["data_import"][
-                "numeric_columns"
-            ] = numeric_columns
-            st.session_state["project_dct"]["data_import"][
-                "default_df"
-            ] = default_df
-            st.session_state["project_dct"]["data_import"][
-                "final_df"
-            ] = final_df
-            st.session_state["project_dct"]["data_import"][
-                "edited_df"
-            ] = edited_df
-            st.toast("💾 Saved Successfully!")
-    if accept:
-        # Normalize all data to a daily granularity. This initial standardization simplifies subsequent conversions to other levels of granularity
-        with st.spinner("Processing..."):
-            files_dict = standardize_data_to_daily(files_dict, selections)
-            # Convert all data to daily level granularity
-            files_dict = apply_granularity_to_all(
-                files_dict, granularity_selection, selections
-            )
-        # Update the 'files_dict' in the session state
-        st.session_state["files_dict"] = files_dict
-        # Set a flag in the session state to indicate that selection has been made
-        st.session_state["Panel_1_Panel_2_Selected"] = True
-    #########################################################################################################################################################
-    # Display unique Panel_1 and Panel_2 values
-    #########################################################################################################################################################
-    # Halts further execution until Panel_1 and Panel_2 columns are selected
-    if (
-        st.session_state["project_dct"]["data_import"]["edited_stats_df"]
-        is None
-    ):
-        if (
-            "files_dict" in st.session_state
-            and st.session_state["Panel_1_Panel_2_Selected"]
-        ):
-            files_dict = st.session_state["files_dict"]
-            st.session_state["project_dct"]["data_import"][
-                "files_dict"
-            ] = files_dict  # resume
-        else:
-            st.stop()
-        # Set to store unique values of Panel_1 and Panel_2
-        with st.spinner("Fetching Panel values..."):
-            all_panel1_values, all_panel2_values = (
-                clean_and_extract_unique_values(files_dict, selections)
-            )
-            # List of Panel_1 and Panel_2 columns unique values
-            list_of_all_panel1_values = list(all_panel1_values)
-            list_of_all_panel2_values = list(all_panel2_values)
-            # Format Panel_1 and Panel_2 values for display
-            formatted_panel1_values = format_values_for_display(
-                list_of_all_panel1_values
-            )  ##
-            formatted_panel2_values = format_values_for_display(
-                list_of_all_panel2_values
-            )  ##
-            # storing panel values in project_dct
-            st.session_state["project_dct"]["data_import"][
-                "formatted_panel1_values"
-            ] = formatted_panel1_values
-            st.session_state["project_dct"]["data_import"][
-                "formatted_panel2_values"
-            ] = formatted_panel2_values
-        # Unique Panel_1 and Panel_2 values
-        st.markdown("#### Unique Panel values")
-        # Display Panel_1 and Panel_2 values
-        with st.expander("Unique Panel values"):
-            st.write("")
-            st.markdown(
-                f"""
-            <style>
-            .justify-text {{
-            text-align: justify;
-            }}
-            </style>
-            <div class="justify-text">
-            <strong>Panel Level 1 Values:</strong> {formatted_panel1_values}<br>
-            <strong>Panel Level 2 Values:</strong> {formatted_panel2_values}
-            </div>
-            """,
-                unsafe_allow_html=True,
-            )
-            # Display total Panel_1 and Panel_2
-            st.write("")
-            st.markdown(
-                f"""
-            <div style="text-align: justify;">
-                <strong>Number of Level 1 Panels detected:</strong> {len(list_of_all_panel1_values)}<br>
-                <strong>Number of Level 2 Panels detected:</strong> {len(list_of_all_panel2_values)}
-            </div>
-            """,
-                unsafe_allow_html=True,
-            )
-            st.write("")
-        #########################################################################################################################################################
-        # Merge all DataFrames
-        #########################################################################################################################################################
-        # Merge all DataFrames selected
-        main_df = create_main_dataframe(
-            files_dict,
-            all_panel1_values,
-            all_panel2_values,
-            granularity_selection,
-        )
-        merged_df = merge_into_main_df(main_df, files_dict, selections)  ##
-        #########################################################################################################################################################
-        # Categorize Variables and Impute Missing Values
-        #########################################################################################################################################################
-        # Create an editable DataFrame in Streamlit
-        st.markdown("#### Select Variables Category & Impute Missing Values")
-        # Prepare missing stats DataFrame for editing
-        missing_stats_df = prepare_missing_stats_df(merged_df)
-        # storing missing stats df
-        edited_stats_df = st.data_editor(
-            missing_stats_df,
-            column_config={
-                "Impute Method": st.column_config.SelectboxColumn(
-                    options=[
-                        "Drop Column",
-                        "Fill with Mean",
-                        "Fill with Median",
-                        "Fill with 0",
-                    ],
-                    required=True,
-                    default="Fill with 0",
-                ),
-                "Category": st.column_config.SelectboxColumn(
-                    options=[
-                        "Media",
-                        "Exogenous",
-                        "Internal",
-                        "Response Metrics",
-                    ],
-                    required=True,
-                    default="Media",
-                ),
-            },
-            disabled=["Column", "Missing Values", "Missing Percentage"],
-            hide_index=True,
-            use_container_width=True,
-            key="data-editor-2",
-        )
-        # edited_stats_df_path=os.path.join(st.session_state['project_path'],"edited_stats_df.pkl")
-        # edited_stats_df.to_pickle(edited_stats_df_path)
-        # Apply changes based on edited DataFrame
-        for i, row in edited_stats_df.iterrows():
-            column = row["Column"]
-            if row["Impute Method"] == "Drop Column":
-                merged_df.drop(columns=[column], inplace=True)
-            elif row["Impute Method"] == "Fill with Mean":
-                merged_df[column].fillna(
-                    merged_df[column].mean(), inplace=True
-                )
-            elif row["Impute Method"] == "Fill with Median":
-                merged_df[column].fillna(
-                    merged_df[column].median(), inplace=True
-                )
-            elif row["Impute Method"] == "Fill with 0":
-                merged_df[column].fillna(0, inplace=True)
-        # st.session_state['project_dct']['data_import']['edited_stats_df']=edited_stats_df
-        #########################################################################################################################################################
-        # Group columns
-        #########################################################################################################################################################
-        # Display Group columns header
-        st.markdown("#### Feature engineering")
-        # Prepare the numeric columns and an empty DataFrame for user input
-        numeric_columns, default_df = prepare_numeric_columns_and_default_df(
-            merged_df, edited_stats_df
-        )
-        # st.session_state['project_dct']['data_import']['edited_stats_df']=edited_stats_df
-        # Display editable Dataframe
-        edited_df = st.data_editor(
-            default_df,
-            column_config={
-                "Column 1": st.column_config.SelectboxColumn(
-                    options=numeric_columns,
-                    required=True,
-                    width=400,
-                ),
-                "Operator": st.column_config.SelectboxColumn(
-                    options=["+", "-", "*", "/"],
-                    required=True,
-                    default="+",
-                    width=100,
-                ),
-                "Column 2": st.column_config.SelectboxColumn(
-                    options=numeric_columns,
-                    required=True,
-                    default=numeric_columns[0],
-                    width=400,
-                ),
-                "Category": st.column_config.SelectboxColumn(
-                    options=[
-                        "Media",
-                        "Exogenous",
-                        "Internal",
-                        "Response Metrics",
-                    ],
-                    required=True,
-                    default="Media",
-                    width=200,
-                ),
-            },
-            num_rows="dynamic",
-            key="data-editor-3",
-        )
-        # Process the DataFrame based on user inputs and operations specified in edited_df
-        final_df, edited_stats_df = process_dataframes(
-            merged_df, edited_df, edited_stats_df
-        )
-        # edited_df_path=os.path.join(st.session_state['project_path'],'edited_df.pkl')
-        # edited_df.to_pickle(edited_df_path)
-        #########################################################################################################################################################
-        # Display the Final DataFrame and variables
-        #########################################################################################################################################################
-        # Display the Final DataFrame and variables
-        st.markdown("#### Final DataFrame")
-        st.dataframe(final_df, hide_index=True)
-        # Initialize an empty dictionary to hold categories and their variables
-        category_dict = {}
-        # Iterate over each row in the edited DataFrame to populate the dictionary
-        for i, row in edited_stats_df.iterrows():
-            column = row["Column"]
-            category = row[
-                "Category"
-            ]  # The category chosen by the user for this variable
-            # Check if the category already exists in the dictionary
-            if category not in category_dict:
-                # If not, initialize it with the current column as its first element
-                category_dict[category] = [column]
-            else:
-                # If it exists, append the current column to the list of variables under this category
-                category_dict[category].append(column)
-        # Add Date, Panel_1 and Panel_12 in category dictionary
-        category_dict.update({"Date": ["date"]})
-        if "Panel_1" in final_df.columns:
-            category_dict["Panel Level 1"] = ["Panel_1"]
-        if "Panel_2" in final_df.columns:
-            category_dict["Panel Level 2"] = ["Panel_2"]
-        # Display the dictionary
-        st.markdown("#### Variable Category")
-        for category, variables in category_dict.items():
-            # Check if there are multiple variables to handle "and" insertion correctly
-            if len(variables) > 1:
-                # Join all but the last variable with ", ", then add " and " before the last variable
-                variables_str = (
-                    ", ".join(variables[:-1]) + " and " + variables[-1]
-                )
-            else:
-                # If there's only one variable, no need for "and"
-                variables_str = variables[0]
-            # Display the category and its variables in the desired format
-            st.markdown(
-                f"<div style='text-align: justify;'><strong>{category}:</strong> {variables_str}</div>",
-                unsafe_allow_html=True,
-            )
-        # Function to check if Response Metrics is selected
-        st.write("")
-        response_metrics_col = category_dict.get("Response Metrics", [])
-        if len(response_metrics_col) == 0:
-            st.warning("Please select Response Metrics column", icon="⚠️")
-            st.stop()
-        # elif len(response_metrics_col) > 1:
-        #     st.warning("Please select only one Response Metrics column", icon="⚠️")
-        #     st.stop()
-        # Store final dataframe and bin dictionary into session state
-        st.session_state["final_df"], st.session_state["bin_dict"] = (
-            final_df,
-            category_dict,
-        )
-        # Save the DataFrame and dictionary from the session state to the pickle file
-        if st.button("Accept and Save", use_container_width=True):
-            print("test*************")
-            update_db("1_Data_Import.py")
-            project_dct_path = os.path.join(
-                st.session_state["project_path"], "project_dct.pkl"
-            )
-            with open(project_dct_path, "wb") as f:
-                pickle.dump(st.session_state["project_dct"], f)
-            data_path = os.path.join(
-                st.session_state["project_path"], "data_import.pkl"
-            )
-            st.session_state["data_path"] = data_path
-            save_to_pickle(
-                data_path,
-                st.session_state["final_df"],
-                st.session_state["bin_dict"],
-            )
-            st.session_state["project_dct"]["data_import"][
-                "edited_stats_df"
-            ] = edited_stats_df
-            st.session_state["project_dct"]["data_import"][
-                "merged_df"
-            ] = merged_df
-            st.session_state["project_dct"]["data_import"][
-                "missing_stats_df"
-            ] = missing_stats_df
-            st.session_state["project_dct"]["data_import"]["cat_dct"] = {
-                col: cat
-                for col, cat in zip(
-                    edited_stats_df["Column"], edited_stats_df["Category"]
-                )
-            }
-            st.session_state["project_dct"]["data_import"][
-                "numeric_columns"
-            ] = numeric_columns
-            st.session_state["project_dct"]["data_import"][
-                "default_df"
-            ] = default_df
-            st.session_state["project_dct"]["data_import"][
-                "final_df"
-            ] = final_df
-            st.session_state["project_dct"]["data_import"][
-                "edited_df"
-            ] = edited_df
-            st.toast("💾 Saved Successfully!")
-    # *****************************************************************
-    # *********************************Persistant flow****************

pages/2_Data_Validation.py DELETED Viewed

@@ -1,509 +0,0 @@
-import streamlit as st
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from Eda_functions import *
-import numpy as np
-import pickle
-# from streamlit_pandas_profiling import st_profile_report
-import streamlit as st
-import streamlit.components.v1 as components
-import sweetviz as sv
-from utilities import set_header, load_local_css
-from st_aggrid import GridOptionsBuilder, GridUpdateMode
-from st_aggrid import GridOptionsBuilder
-from st_aggrid import AgGrid
-import base64
-import os
-import tempfile
-#import pandas_profiling
-#from pydantic_settings import BaseSettings
-from ydata_profiling import ProfileReport
-import re
-# from pygwalker.api.streamlit import StreamlitRenderer
-# from Home_redirecting import home
-import sqlite3
-from utilities import update_db
-st.set_page_config(
-    page_title="Data Validation",
-    page_icon=":shark:",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-load_local_css("styles.css")
-set_header()
-if "project_dct" not in st.session_state:
-    # home()
-    st.warning("Please select a project from home page")
-    st.stop()
-data_path = os.path.join(st.session_state["project_path"], "data_import.pkl")
-try:
-    with open(data_path, "rb") as f:
-        data = pickle.load(f)
-except Exception as e:
-    st.error(f"Please import data from the Data Import Page")
-    st.stop()
-conn = sqlite3.connect(
-    r"DB\User.db", check_same_thread=False
-)  # connection with sql db
-c = conn.cursor()
-st.session_state["cleaned_data"] = data["final_df"]
-st.session_state["category_dict"] = data["bin_dict"]
-# st.write(st.session_state['category_dict'])
-st.title("Data Validation and Insights")
-target_variables = [
-    st.session_state["category_dict"][key]
-    for key in st.session_state["category_dict"].keys()
-    if key == "Response Metrics"
-]
-target_variables = list(*target_variables)
-target_column = st.selectbox(
-    "Select the Target Feature/Dependent Variable (will be used in all charts as reference)",
-    target_variables,
-    index=st.session_state["project_dct"]["data_validation"]["target_column"],
-)
-st.session_state["project_dct"]["data_validation"]["target_column"] = (
-    target_variables.index(target_column)
-)
-st.session_state["target_column"] = target_column
-panels = st.session_state["category_dict"]["Panel Level 1"][0]
-selected_panels = st.multiselect(
-    "Please choose the panels you wish to analyze.If no panels are selected, insights will be derived from the overall data.",
-    st.session_state["cleaned_data"][panels].unique(),
-    default=st.session_state["project_dct"]["data_validation"][
-        "selected_panels"
-    ],
-)
-st.session_state["project_dct"]["data_validation"][
-    "selected_panels"
-] = selected_panels
-aggregation_dict = {
-    item: "sum" if key == "Media" else "mean"
-    for key, value in st.session_state["category_dict"].items()
-    for item in value
-    if item not in ["date", "Panel_1"]
-}
-with st.expander("**Reponse Metric Analysis**"):
-    if len(selected_panels) > 0:
-        st.session_state["Cleaned_data_panel"] = st.session_state[
-            "cleaned_data"
-        ][st.session_state["cleaned_data"]["Panel_1"].isin(selected_panels)]
-        st.session_state["Cleaned_data_panel"] = (
-            st.session_state["Cleaned_data_panel"]
-            .groupby(by="date")
-            .agg(aggregation_dict)
-        )
-        st.session_state["Cleaned_data_panel"] = st.session_state[
-            "Cleaned_data_panel"
-        ].reset_index()
-    else:
-        # st.write(st.session_state['cleaned_data'])
-        st.session_state["Cleaned_data_panel"] = (
-            st.session_state["cleaned_data"]
-            .groupby(by="date")
-            .agg(aggregation_dict)
-        )
-        st.session_state["Cleaned_data_panel"] = st.session_state[
-            "Cleaned_data_panel"
-        ].reset_index()
-    fig = line_plot_target(
-        st.session_state["Cleaned_data_panel"],
-        target=target_column,
-        title=f"{target_column} Over Time",
-    )
-    st.plotly_chart(fig, use_container_width=True)
-    media_channel = list(
-        *[
-            st.session_state["category_dict"][key]
-            for key in st.session_state["category_dict"].keys()
-            if key == "Media"
-        ]
-    )
-    # st.write(media_channel)
-    exo_var = list(
-        *[
-            st.session_state["category_dict"][key]
-            for key in st.session_state["category_dict"].keys()
-            if key == "Exogenous"
-        ]
-    )
-    internal_var = list(
-        *[
-            st.session_state["category_dict"][key]
-            for key in st.session_state["category_dict"].keys()
-            if key == "Internal"
-        ]
-    )
-    Non_media_variables = exo_var + internal_var
-    st.markdown("### Annual Data Summary")
-    st.dataframe(
-        summary(
-            st.session_state["Cleaned_data_panel"],
-            media_channel + [target_column],
-            spends=None,
-            Target=True,
-        ),
-        use_container_width=True,
-    )
-    if st.checkbox("Show raw data"):
-        st.write(
-            pd.concat(
-                [
-                    pd.to_datetime(
-                        st.session_state["Cleaned_data_panel"]["date"]
-                    ).dt.strftime("%m/%d/%Y"),
-                    st.session_state["Cleaned_data_panel"]
-                    .select_dtypes(np.number)
-                    .applymap(format_numbers),
-                ],
-                axis=1,
-            )
-        )
-col1 = st.columns(1)
-if "selected_feature" not in st.session_state:
-    st.session_state["selected_feature"] = None
-def generate_report_with_target(channel_data, target_feature):
-    report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature)
-    temp_dir = tempfile.mkdtemp()
-    report_path = os.path.join(temp_dir, "report.html")
-    report.show_html(
-        filepath=report_path, open_browser=False
-    )  # Generate the report as an HTML file
-    return report_path
-def generate_profile_report(df):
-    pr = df.profile_report()
-    temp_dir = tempfile.mkdtemp()
-    report_path = os.path.join(temp_dir, "report.html")
-    pr.to_file(report_path)
-    return report_path
-# st.header()
-with st.expander("Univariate and Bivariate Report"):
-    eda_columns = st.columns(2)
-    with eda_columns[0]:
-        if st.button(
-            "Generate Profile Report",
-            help="Univariate report which inlcudes all statistical analysis",
-        ):
-            with st.spinner("Generating Report"):
-                report_file = generate_profile_report(
-                    st.session_state["Cleaned_data_panel"]
-                )
-                if os.path.exists(report_file):
-                    with open(report_file, "rb") as f:
-                        st.success("Report Generated")
-                        st.download_button(
-                            label="Download EDA Report",
-                            data=f.read(),
-                            file_name="pandas_profiling_report.html",
-                            mime="text/html",
-                        )
-                else:
-                    st.warning(
-                        "Report generation failed. Unable to find the report file."
-                    )
-with eda_columns[1]:
-    if st.button(
-        "Generate Sweetviz Report",
-        help="Bivariate report for selected response metric",
-    ):
-        with st.spinner("Generating Report"):
-            report_file = generate_report_with_target(
-                st.session_state["Cleaned_data_panel"], target_column
-            )
-            if os.path.exists(report_file):
-                with open(report_file, "rb") as f:
-                    st.success("Report Generated")
-                    st.download_button(
-                        label="Download EDA Report",
-                        data=f.read(),
-                        file_name="report.html",
-                        mime="text/html",
-                    )
-            else:
-                st.warning(
-                    "Report generation failed. Unable to find the report file."
-                )
-# st.warning('Work in Progress')
-with st.expander("Media Variables Analysis"):
-    # Get the selected feature
-    media_variables = [
-        col
-        for col in media_channel
-        if "cost" not in col.lower() and "spend" not in col.lower()
-    ]
-    st.session_state["selected_feature"] = st.selectbox(
-        "Select media", media_variables
-    )
-    st.session_state["project_dct"]["data_validation"]["selected_feature"] = (
-        media_variables.index(st.session_state["selected_feature"])
-    )
-    # Filter spends features based on the selected feature
-    spends_features = [
-        col
-        for col in st.session_state["Cleaned_data_panel"].columns
-        if any(keyword in col.lower() for keyword in ["cost", "spend"])
-    ]
-    spends_feature = [
-        col
-        for col in spends_features
-        if re.split(r"_cost|_spend", col.lower())[0]
-        in st.session_state["selected_feature"]
-    ]
-    if "validation" not in st.session_state:
-        st.session_state["validation"] = st.session_state["project_dct"][
-            "data_validation"
-        ]["validated_variables"]
-    val_variables = [col for col in media_channel if col != "date"]
-    if not set(
-        st.session_state["project_dct"]["data_validation"][
-            "validated_variables"
-        ]
-    ).issubset(set(val_variables)):
-        st.session_state["validation"] = []
-    if len(spends_feature) == 0:
-        st.warning(
-            "No spends varaible available for the selected metric in data"
-        )
-    else:
-        fig_row1 = line_plot(
-            st.session_state["Cleaned_data_panel"],
-            x_col="date",
-            y1_cols=[st.session_state["selected_feature"]],
-            y2_cols=[target_column],
-            title=f'Analysis of {st.session_state["selected_feature"]} and {[target_column][0]} Over Time',
-        )
-        st.plotly_chart(fig_row1, use_container_width=True)
-        st.markdown("### Summary")
-        st.dataframe(
-            summary(
-                st.session_state["cleaned_data"],
-                [st.session_state["selected_feature"]],
-                spends=spends_feature[0],
-            ),
-            use_container_width=True,
-        )
-        cols2 = st.columns(2)
-        if len(
-            set(st.session_state["validation"]).intersection(val_variables)
-        ) == len(val_variables):
-            disable = True
-            help = "All media variables are validated"
-        else:
-            disable = False
-            help = ""
-        with cols2[0]:
-            if st.button("Validate", disabled=disable, help=help):
-                st.session_state["validation"].append(
-                    st.session_state["selected_feature"]
-                )
-        with cols2[1]:
-            if st.checkbox("Validate all", disabled=disable, help=help):
-                st.session_state["validation"].extend(val_variables)
-                st.success("All media variables are validated ✅")
-        if len(
-            set(st.session_state["validation"]).intersection(val_variables)
-        ) != len(val_variables):
-            validation_data = pd.DataFrame(
-                {
-                    "Validate": [
-                        (
-                            True
-                            if col in st.session_state["validation"]
-                            else False
-                        )
-                        for col in val_variables
-                    ],
-                    "Variables": val_variables,
-                }
-            )
-            cols3 = st.columns([1, 30])
-            with cols3[1]:
-                validation_df = st.data_editor(
-                    validation_data,
-                    # column_config={
-                    # 'Validate':st.column_config.CheckboxColumn(wi)
-                    # },
-                    column_config={
-                        "Validate": st.column_config.CheckboxColumn(
-                            default=False,
-                            width=100,
-                        ),
-                        "Variables": st.column_config.TextColumn(width=1000),
-                    },
-                    hide_index=True,
-                )
-                selected_rows = validation_df[
-                    validation_df["Validate"] == True
-                ]["Variables"]
-                # st.write(selected_rows)
-                st.session_state["validation"].extend(selected_rows)
-                st.session_state["project_dct"]["data_validation"][
-                    "validated_variables"
-                ] = st.session_state["validation"]
-                not_validated_variables = [
-                    col
-                    for col in val_variables
-                    if col not in st.session_state["validation"]
-                ]
-                if not_validated_variables:
-                    not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
-                    st.warning(not_validated_message)
-with st.expander("Non Media Variables Analysis"):
-    selected_columns_row4 = st.selectbox(
-        "Select Channel",
-        Non_media_variables,
-        index=st.session_state["project_dct"]["data_validation"][
-            "Non_media_variables"
-        ],
-    )
-    st.session_state["project_dct"]["data_validation"][
-        "Non_media_variables"
-    ] = Non_media_variables.index(selected_columns_row4)
-    #     # Create the dual-axis line plot
-    fig_row4 = line_plot(
-        st.session_state["Cleaned_data_panel"],
-        x_col="date",
-        y1_cols=[selected_columns_row4],
-        y2_cols=[target_column],
-        title=f"Analysis of {selected_columns_row4} and {target_column} Over Time",
-    )
-    st.plotly_chart(fig_row4, use_container_width=True)
-    selected_non_media = selected_columns_row4
-    sum_df = st.session_state["Cleaned_data_panel"][
-        ["date", selected_non_media, target_column]
-    ]
-    sum_df["Year"] = pd.to_datetime(
-        st.session_state["Cleaned_data_panel"]["date"]
-    ).dt.year
-    # st.dataframe(df)
-    # st.dataframe(sum_df.head(2))
-    print(sum_df)
-    sum_df = sum_df.drop("date", axis=1).groupby("Year").agg("sum")
-    sum_df.loc["Grand Total"] = sum_df.sum()
-    sum_df = sum_df.applymap(format_numbers)
-    sum_df.fillna("-", inplace=True)
-    sum_df = sum_df.replace({"0.0": "-", "nan": "-"})
-    st.markdown("### Summary")
-    st.dataframe(sum_df, use_container_width=True)
-# with st.expander('Interactive Dashboard'):
-#     pygg_app=StreamlitRenderer(st.session_state['cleaned_data'])
-#     pygg_app.explorer()
-with st.expander("Correlation Analysis"):
-    options = list(
-        st.session_state["Cleaned_data_panel"].select_dtypes(np.number).columns
-    )
-    # selected_options = []
-    # num_columns = 4
-    # num_rows = -(-len(options) // num_columns)  # Ceiling division to calculate rows
-    # # Create a grid of checkboxes
-    # st.header('Select Features for Correlation Plot')
-    # tick=False
-    # if st.checkbox('Select all'):
-    #     tick=True
-    # selected_options = []
-    # for row in range(num_rows):
-    #     cols = st.columns(num_columns)
-    #     for col in cols:
-    #         if options:
-    #             option = options.pop(0)
-    #             selected = col.checkbox(option,value=tick)
-    #             if selected:
-    #                 selected_options.append(option)
-    # # Display selected options
-    selected_options = st.multiselect(
-        "Select Variables For correlation plot",
-        [var for var in options if var != target_column],
-        default=options[3],
-    )
-    st.pyplot(
-        correlation_plot(
-            st.session_state["Cleaned_data_panel"],
-            selected_options,
-            target_column,
-        )
-    )
-if st.button("Save Changes", use_container_width=True):
-    update_db("2_Data_Validation.py")
-    project_dct_path = os.path.join(
-        st.session_state["project_path"], "project_dct.pkl"
-    )
-    with open(project_dct_path, "wb") as f:
-        pickle.dump(st.session_state["project_dct"], f)
-    st.success("Changes saved")

pages/3_Transformations.py DELETED Viewed

@@ -1,686 +0,0 @@
-# Importing necessary libraries
-import streamlit as st
-st.set_page_config(
-    page_title="Transformations",
-    page_icon=":shark:",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-import pickle
-import numpy as np
-import pandas as pd
-from utilities import set_header, load_local_css
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-import os
-import sqlite3
-from utilities import update_db
-load_local_css("styles.css")
-set_header()
-# Check for authentication status
-for k, v in st.session_state.items():
-    if k not in ["logout", "login", "config"] and not k.startswith(
-        "FormSubmitter"
-    ):
-        st.session_state[k] = v
-with open("config.yaml") as file:
-    config = yaml.load(file, Loader=SafeLoader)
-    st.session_state["config"] = config
-authenticator = stauth.Authenticate(
-    config["credentials"],
-    config["cookie"]["name"],
-    config["cookie"]["key"],
-    config["cookie"]["expiry_days"],
-    config["preauthorized"],
-)
-st.session_state["authenticator"] = authenticator
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    authenticator.logout("Logout", "main")
-    is_state_initiaized = st.session_state.get("initialized", False)
-    if "project_dct" not in st.session_state:
-        st.error("Please load a project from Home page")
-        st.stop()
-    conn = sqlite3.connect(
-        r"DB/User.db", check_same_thread=False
-    )  # connection with sql db
-    c = conn.cursor()
-    if not is_state_initiaized:
-        if "session_name" not in st.session_state:
-            st.session_state["session_name"] = None
-    if not os.path.exists(
-        os.path.join(st.session_state["project_path"], "data_import.pkl")
-    ):
-        st.error("Please move to Data Import page")
-    # Deserialize and load the objects from the pickle file
-    with open(
-        os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb"
-    ) as f:
-        data = pickle.load(f)
-    # Accessing the loaded objects
-    final_df_loaded = data["final_df"]
-    bin_dict_loaded = data["bin_dict"]
-    # final_df_loaded.to_csv("Test/final_df_loaded.csv",index=False)
-    # Initialize session state==-
-    if "transformed_columns_dict" not in st.session_state:
-        st.session_state["transformed_columns_dict"] = (
-            {}
-        )  # Default empty dictionary
-    if "final_df" not in st.session_state:
-        st.session_state["final_df"] = (
-            final_df_loaded  # Default as original dataframe
-        )
-        if "summary_string" not in st.session_state:
-            st.session_state["summary_string"] = None  # Default as None
-    # Extract original columns for specified categories
-    original_columns = {
-        category: bin_dict_loaded[category]
-        for category in ["Media", "Internal", "Exogenous"]
-        if category in bin_dict_loaded
-    }
-    # Retrive Panel columns
-    panel_1 = bin_dict_loaded.get("Panel Level 1")
-    panel_2 = bin_dict_loaded.get("Panel Level 2")
-    # # For testing on non panel level
-    # final_df_loaded = final_df_loaded.drop("Panel_1", axis=1)
-    # final_df_loaded = final_df_loaded.groupby("date").mean().reset_index()
-    # panel_1 = None
-    # Apply transformations on panel level
-    if panel_1:
-        panel = panel_1 + panel_2 if panel_2 else panel_1
-    else:
-        panel = []
-    # Function to build transformation widgets
-    def transformation_widgets(category, transform_params, date_granularity):
-        if (
-            st.session_state["project_dct"]["transformations"] is None
-            or st.session_state["project_dct"]["transformations"] == {}
-        ):
-            st.session_state["project_dct"]["transformations"] = {}
-        if (
-            category
-            not in st.session_state["project_dct"]["transformations"].keys()
-        ):
-            st.session_state["project_dct"]["transformations"][category] = {}
-        # Define a dict of pre-defined default values of every transformation
-        predefined_defualts = {
-            "Lag": (1, 2),
-            "Lead": (1, 2),
-            "Moving Average": (1, 2),
-            "Saturation": (10, 20),
-            "Power": (2, 4),
-            "Adstock": (0.5, 0.7),
-        }
-        def selection_change():
-            # Handles removing transformations
-            if f"transformation_{category}" in st.session_state:
-                current_selection = st.session_state[
-                    f"transformation_{category}"
-                ]
-                past_selection = st.session_state["project_dct"][
-                    "transformations"
-                ][category][f"transformation_{category}"]
-                removed_selection = list(
-                    set(past_selection) - set(current_selection)
-                )
-                for selection in removed_selection:
-                    # Option 1 - revert to defualt
-                    # st.session_state['project_dct']['transformations'][category][selection] = predefined_defualts[selection]
-                    # option 2 - delete from dict
-                    del st.session_state["project_dct"]["transformations"][
-                        category
-                    ][selection]
-        # Transformation Options
-        transformation_options = {
-            "Media": [
-                "Lag",
-                "Moving Average",
-                "Saturation",
-                "Power",
-                "Adstock",
-            ],
-            "Internal": ["Lead", "Lag", "Moving Average"],
-            "Exogenous": ["Lead", "Lag", "Moving Average"],
-        }
-        expanded = st.session_state["project_dct"]["transformations"][
-            category
-        ].get("expanded", False)
-        st.session_state["project_dct"]["transformations"][category][
-            "expanded"
-        ] = False
-        with st.expander(f"{category} Transformations", expanded=expanded):
-            st.session_state["project_dct"]["transformations"][category][
-                "expanded"
-            ] = True
-            # Let users select which transformations to apply
-            sel_transformations = st.session_state["project_dct"][
-                "transformations"
-            ][category].get(f"transformation_{category}", [])
-            transformations_to_apply = st.multiselect(
-                "Select transformations to apply",
-                options=transformation_options[category],
-                default=sel_transformations,
-                key=f"transformation_{category}",
-                # on_change=selection_change(),
-            )
-            st.session_state["project_dct"]["transformations"][category][
-                "transformation_" + category
-            ] = transformations_to_apply
-            # Determine the number of transformations to put in each column
-            transformations_per_column = (
-                len(transformations_to_apply) // 2
-                + len(transformations_to_apply) % 2
-            )
-            # Create two columns
-            col1, col2 = st.columns(2)
-            # Assign transformations to each column
-            transformations_col1 = transformations_to_apply[
-                :transformations_per_column
-            ]
-            transformations_col2 = transformations_to_apply[
-                transformations_per_column:
-            ]
-            # Define a helper function to create widgets for each transformation
-            def create_transformation_widgets(column, transformations):
-                with column:
-                    for transformation in transformations:
-                        # Conditionally create widgets for selected transformations
-                        if transformation == "Lead":
-                            lead_default = st.session_state["project_dct"][
-                                "transformations"
-                            ][category].get(
-                                "Lead", predefined_defualts["Lead"]
-                            )
-                            st.markdown(f"**Lead ({date_granularity})**")
-                            lead = st.slider(
-                                "Lead periods",
-                                1,
-                                10,
-                                lead_default,
-                                1,
-                                key=f"lead_{category}",
-                                label_visibility="collapsed",
-                            )
-                            st.session_state["project_dct"]["transformations"][
-                                category
-                            ]["Lead"] = lead
-                            start = lead[0]
-                            end = lead[1]
-                            step = 1
-                            transform_params[category]["Lead"] = np.arange(
-                                start, end + step, step
-                            )
-                        if transformation == "Lag":
-                            lag_default = st.session_state["project_dct"][
-                                "transformations"
-                            ][category].get("Lag", predefined_defualts["Lag"])
-                            st.markdown(f"**Lag ({date_granularity})**")
-                            lag = st.slider(
-                                "Lag periods",
-                                1,
-                                10,
-                                (1, 2),  # lag_default,
-                                1,
-                                key=f"lag_{category}",
-                                label_visibility="collapsed",
-                            )
-                            st.session_state["project_dct"]["transformations"][
-                                category
-                            ]["Lag"] = lag
-                            start = lag[0]
-                            end = lag[1]
-                            step = 1
-                            transform_params[category]["Lag"] = np.arange(
-                                start, end + step, step
-                            )
-                        if transformation == "Moving Average":
-                            ma_default = st.session_state["project_dct"][
-                                "transformations"
-                            ][category].get(
-                                "MA", predefined_defualts["Moving Average"]
-                            )
-                            st.markdown(
-                                f"**Moving Average ({date_granularity})**"
-                            )
-                            window = st.slider(
-                                "Window size for Moving Average",
-                                1,
-                                10,
-                                ma_default,
-                                1,
-                                key=f"ma_{category}",
-                                label_visibility="collapsed",
-                            )
-                            st.session_state["project_dct"]["transformations"][
-                                category
-                            ]["MA"] = window
-                            start = window[0]
-                            end = window[1]
-                            step = 1
-                            transform_params[category]["Moving Average"] = (
-                                np.arange(start, end + step, step)
-                            )
-                        if transformation == "Saturation":
-                            st.markdown("**Saturation (%)**")
-                            saturation_default = st.session_state[
-                                "project_dct"
-                            ]["transformations"][category].get(
-                                "Saturation", predefined_defualts["Saturation"]
-                            )
-                            saturation_point = st.slider(
-                                f"Saturation Percentage",
-                                0,
-                                100,
-                                saturation_default,
-                                10,
-                                key=f"sat_{category}",
-                                label_visibility="collapsed",
-                            )
-                            st.session_state["project_dct"]["transformations"][
-                                category
-                            ]["Saturation"] = saturation_point
-                            start = saturation_point[0]
-                            end = saturation_point[1]
-                            step = 10
-                            transform_params[category]["Saturation"] = (
-                                np.arange(start, end + step, step)
-                            )
-                        if transformation == "Power":
-                            st.markdown("**Power**")
-                            power_default = st.session_state["project_dct"][
-                                "transformations"
-                            ][category].get(
-                                "Power", predefined_defualts["Power"]
-                            )
-                            power = st.slider(
-                                f"Power",
-                                0,
-                                10,
-                                power_default,
-                                1,
-                                key=f"power_{category}",
-                                label_visibility="collapsed",
-                            )
-                            st.session_state["project_dct"]["transformations"][
-                                category
-                            ]["Power"] = power
-                            start = power[0]
-                            end = power[1]
-                            step = 1
-                            transform_params[category]["Power"] = np.arange(
-                                start, end + step, step
-                            )
-                        if transformation == "Adstock":
-                            ads_default = st.session_state["project_dct"][
-                                "transformations"
-                            ][category].get(
-                                "Adstock", predefined_defualts["Adstock"]
-                            )
-                            st.markdown("**Adstock**")
-                            rate = st.slider(
-                                f"Factor ({category})",
-                                0.0,
-                                1.0,
-                                ads_default,
-                                0.05,
-                                key=f"adstock_{category}",
-                                label_visibility="collapsed",
-                            )
-                            st.session_state["project_dct"]["transformations"][
-                                category
-                            ]["Adstock"] = rate
-                            start = rate[0]
-                            end = rate[1]
-                            step = 0.05
-                            adstock_range = [
-                                round(a, 3)
-                                for a in np.arange(start, end + step, step)
-                            ]
-                            transform_params[category][
-                                "Adstock"
-                            ] = adstock_range
-            # Create widgets in each column
-            create_transformation_widgets(col1, transformations_col1)
-            create_transformation_widgets(col2, transformations_col2)
-    # Function to apply Lag transformation
-    def apply_lag(df, lag):
-        return df.shift(lag)
-    # Function to apply Lead transformation
-    def apply_lead(df, lead):
-        return df.shift(-lead)
-    # Function to apply Moving Average transformation
-    def apply_moving_average(df, window_size):
-        return df.rolling(window=window_size).mean()
-    # Function to apply Saturation transformation
-    def apply_saturation(df, saturation_percent_100):
-        # Convert saturation percentage from 100-based to fraction
-        saturation_percent = saturation_percent_100 / 100.0
-        # Calculate saturation point and steepness
-        column_max = df.max()
-        column_min = df.min()
-        saturation_point = (column_min + column_max) / 2
-        numerator = np.log(
-            (1 / (saturation_percent if saturation_percent != 1 else 1 - 1e-9))
-            - 1
-        )
-        denominator = np.log(saturation_point / max(column_max, 1e-9))
-        steepness = numerator / max(
-            denominator, 1e-9
-        )  # Avoid division by zero with a small constant
-        # Apply the saturation transformation
-        transformed_series = df.apply(
-            lambda x: (1 / (1 + (saturation_point / x) ** steepness)) * x
-        )
-        return transformed_series
-    # Function to apply Power transformation
-    def apply_power(df, power):
-        return df**power
-    # Function to apply Adstock transformation
-    def apply_adstock(df, factor):
-        x = 0
-        # Use the walrus operator to update x iteratively with the Adstock formula
-        adstock_var = [x := x * factor + v for v in df]
-        ans = pd.Series(adstock_var, index=df.index)
-        return ans
-    # Function to generate transformed columns names
-    @st.cache_resource(show_spinner=False)
-    def generate_transformed_columns(original_columns, transform_params):
-        transformed_columns, summary = {}, {}
-        for category, columns in original_columns.items():
-            for column in columns:
-                transformed_columns[column] = []
-                summary_details = (
-                    []
-                )  # List to hold transformation details for the current column
-                if category in transform_params:
-                    for transformation, values in transform_params[
-                        category
-                    ].items():
-                        # Generate transformed column names for each value
-                        for value in values:
-                            transformed_name = (
-                                f"{column}@{transformation}_{value}"
-                            )
-                            transformed_columns[column].append(
-                                transformed_name
-                            )
-                        # Format the values list as a string with commas and "and" before the last item
-                        if len(values) > 1:
-                            formatted_values = (
-                                ", ".join(map(str, values[:-1]))
-                                + " and "
-                                + str(values[-1])
-                            )
-                        else:
-                            formatted_values = str(values[0])
-                        # Add transformation details
-                        summary_details.append(
-                            f"{transformation} ({formatted_values})"
-                        )
-                # Only add to summary if there are transformation details for the column
-                if summary_details:
-                    formatted_summary = "⮕ ".join(summary_details)
-                    # Use <strong> tags to make the column name bold
-                    summary[column] = (
-                        f"<strong>{column}</strong>: {formatted_summary}"
-                    )
-        # Generate a comprehensive summary string for all columns
-        summary_items = [
-            f"{idx + 1}. {details}"
-            for idx, details in enumerate(summary.values())
-        ]
-        summary_string = "\n".join(summary_items)
-        return transformed_columns, summary_string
-    # Function to apply transformations to DataFrame slices based on specified categories and parameters
-    @st.cache_resource(show_spinner=False)
-    def apply_category_transformations(df, bin_dict, transform_params, panel):
-        # Dictionary for function mapping
-        transformation_functions = {
-            "Lead": apply_lead,
-            "Lag": apply_lag,
-            "Moving Average": apply_moving_average,
-            "Saturation": apply_saturation,
-            "Power": apply_power,
-            "Adstock": apply_adstock,
-        }
-        # Initialize category_df as an empty DataFrame
-        category_df = pd.DataFrame()
-        # Iterate through each category specified in transform_params
-        for category in ["Media", "Internal", "Exogenous"]:
-            if (
-                category not in transform_params
-                or category not in bin_dict
-                or not transform_params[category]
-            ):
-                continue  # Skip categories without transformations
-            # Slice the DataFrame based on the columns specified in bin_dict for the current category
-            df_slice = df[bin_dict[category] + panel]
-            # Iterate through each transformation and its parameters for the current category
-            for transformation, parameters in transform_params[
-                category
-            ].items():
-                transformation_function = transformation_functions[
-                    transformation
-                ]
-                # Check if there is panel data to group by
-                if len(panel) > 0:
-                    # Apply the transformation to each group
-                    category_df = pd.concat(
-                        [
-                            df_slice.groupby(panel)
-                            .transform(transformation_function, p)
-                            .add_suffix(f"@{transformation}_{p}")
-                            for p in parameters
-                        ],
-                        axis=1,
-                    )
-                    # Replace all NaN or null values in category_df with 0
-                    category_df.fillna(0, inplace=True)
-                    # Update df_slice
-                    df_slice = pd.concat(
-                        [df[panel], category_df],
-                        axis=1,
-                    )
-                else:
-                    for p in parameters:
-                        # Apply the transformation function to each column
-                        temp_df = df_slice.apply(
-                            lambda x: transformation_function(x, p), axis=0
-                        ).rename(
-                            lambda x: f"{x}@{transformation}_{p}",
-                            axis="columns",
-                        )
-                        # Concatenate the transformed DataFrame slice to the category DataFrame
-                        category_df = pd.concat([category_df, temp_df], axis=1)
-                    # Replace all NaN or null values in category_df with 0
-                    category_df.fillna(0, inplace=True)
-                    # Update df_slice
-                    df_slice = pd.concat(
-                        [df[panel], category_df],
-                        axis=1,
-                    )
-        # If category_df has been modified, concatenate it with the panel and response metrics from the original DataFrame
-        if not category_df.empty:
-            final_df = pd.concat([df, category_df], axis=1)
-        else:
-            # If no transformations were applied, use the original DataFrame
-            final_df = df
-        return final_df
-    # Function to infers the granularity of the date column in a DataFrame
-    @st.cache_resource(show_spinner=False)
-    def infer_date_granularity(df):
-        # Find the most common difference
-        common_freq = (
-            pd.Series(df["date"].unique()).diff().dt.days.dropna().mode()[0]
-        )
-        # Map the most common difference to a granularity
-        if common_freq == 1:
-            return "daily"
-        elif common_freq == 7:
-            return "weekly"
-        elif 28 <= common_freq <= 31:
-            return "monthly"
-        else:
-            return "irregular"
-    #########################################################################################################################################################
-    # User input for transformations
-    #########################################################################################################################################################
-    # Infer date granularity
-    date_granularity = infer_date_granularity(final_df_loaded)
-    # Initialize the main dictionary to store the transformation parameters for each category
-    transform_params = {"Media": {}, "Internal": {}, "Exogenous": {}}
-    # User input for transformations
-    st.markdown("### Select Transformations to Apply")
-    for category in ["Media", "Internal", "Exogenous"]:
-        # Skip Internal
-        if category == "Internal":
-            continue
-        transformation_widgets(category, transform_params, date_granularity)
-    #########################################################################################################################################################
-    # Apply transformations
-    #########################################################################################################################################################
-    # Apply category-based transformations to the DataFrame
-    if st.button("Accept and Proceed", use_container_width=True):
-        with st.spinner("Applying transformations..."):
-            final_df = apply_category_transformations(
-                final_df_loaded, bin_dict_loaded, transform_params, panel
-            )
-            # Generate a dictionary mapping original column names to lists of transformed column names
-            transformed_columns_dict, summary_string = (
-                generate_transformed_columns(
-                    original_columns, transform_params
-                )
-            )
-            # Store into transformed dataframe and summary session state
-            st.session_state["final_df"] = final_df
-            st.session_state["summary_string"] = summary_string
-    #########################################################################################################################################################
-    # Display the transformed DataFrame and summary
-    #########################################################################################################################################################
-    # Display the transformed DataFrame in the Streamlit app
-    st.markdown("### Transformed DataFrame")
-    st.dataframe(st.session_state["final_df"], hide_index=True)
-    # Total rows and columns
-    total_rows, total_columns = st.session_state["final_df"].shape
-    st.markdown(
-        f"<p style='text-align: justify;'>The transformed DataFrame contains <strong>{total_rows}</strong> rows and <strong>{total_columns}</strong> columns.</p>",
-        unsafe_allow_html=True,
-    )
-    # Display the summary of transformations as markdown
-    if st.session_state["summary_string"]:
-        with st.expander("Summary of Transformations"):
-            st.markdown("### Summary of Transformations")
-            st.markdown(
-                st.session_state["summary_string"], unsafe_allow_html=True
-            )
-    @st.cache_resource(show_spinner=False)
-    def save_to_pickle(file_path, final_df):
-        # Open the file in write-binary mode and dump the objects
-        with open(file_path, "wb") as f:
-            pickle.dump({"final_df_transformed": final_df}, f)
-            # Data is now saved to file
-    if st.button("Accept and Save", use_container_width=True):
-        save_to_pickle(
-            os.path.join(
-                st.session_state["project_path"], "final_df_transformed.pkl"
-            ),
-            st.session_state["final_df"],
-        )
-        project_dct_path = os.path.join(
-            st.session_state["project_path"], "project_dct.pkl"
-        )
-        with open(project_dct_path, "wb") as f:
-            pickle.dump(st.session_state["project_dct"], f)
-        update_db("3_Transformations.py")
-        st.toast("💾 Saved Successfully!")

pages/4_Model_Build.py DELETED Viewed

@@ -1,1062 +0,0 @@
-"""
-MMO Build Sprint 3
-additions : adding more variables to session state for saved model : random effect, predicted train & test
-MMO Build Sprint 4
-additions : ability to run models for different response metrics
-"""
-import streamlit as st
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from Eda_functions import format_numbers
-import numpy as np
-import pickle
-from st_aggrid import AgGrid
-from st_aggrid import GridOptionsBuilder, GridUpdateMode
-from utilities import set_header, load_local_css
-from st_aggrid import GridOptionsBuilder
-import time
-import itertools
-import statsmodels.api as sm
-import numpy as npc
-import re
-import itertools
-from sklearn.metrics import (
-    mean_absolute_error,
-    r2_score,
-    mean_absolute_percentage_error,
-)
-from sklearn.preprocessing import MinMaxScaler
-import os
-import matplotlib.pyplot as plt
-from statsmodels.stats.outliers_influence import variance_inflation_factor
-import yaml
-from yaml import SafeLoader
-import streamlit_authenticator as stauth
-st.set_option("deprecation.showPyplotGlobalUse", False)
-import statsmodels.api as sm
-import statsmodels.formula.api as smf
-from datetime import datetime
-import seaborn as sns
-from Data_prep_functions import *
-import sqlite3
-from utilities import update_db
-@st.cache_resource(show_spinner=False)
-# def save_to_pickle(file_path, final_df):
-#     # Open the file in write-binary mode and dump the objects
-#     with open(file_path, "wb") as f:
-#         pickle.dump({file_path: final_df}, f)
-def get_random_effects(media_data, panel_col, _mdf):
-    random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
-    for i, market in enumerate(media_data[panel_col].unique()):
-        print(i, end="\r")
-        intercept = _mdf.random_effects[market].values[0]
-        random_eff_df.loc[i, "random_effect"] = intercept
-        random_eff_df.loc[i, panel_col] = market
-    return random_eff_df
-def mdf_predict(X_df, mdf, random_eff_df):
-    X = X_df.copy()
-    X["fixed_effect"] = mdf.predict(X)
-    X = pd.merge(X, random_eff_df, on=panel_col, how="left")
-    X["pred"] = X["fixed_effect"] + X["random_effect"]
-    # X.to_csv('Test/megred_df.csv',index=False)
-    X.drop(columns=["fixed_effect", "random_effect"], inplace=True)
-    return X["pred"]
-st.set_page_config(
-    page_title="Model Build",
-    page_icon=":shark:",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-load_local_css("styles.css")
-set_header()
-# Check for authentication status
-for k, v in st.session_state.items():
-    if k not in [
-        "logout",
-        "login",
-        "config",
-        "model_build_button",
-    ] and not k.startswith("FormSubmitter"):
-        st.session_state[k] = v
-with open("config.yaml") as file:
-    config = yaml.load(file, Loader=SafeLoader)
-    st.session_state["config"] = config
-authenticator = stauth.Authenticate(
-    config["credentials"],
-    config["cookie"]["name"],
-    config["cookie"]["key"],
-    config["cookie"]["expiry_days"],
-    config["preauthorized"],
-)
-st.session_state["authenticator"] = authenticator
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    authenticator.logout("Logout", "main")
-    is_state_initiaized = st.session_state.get("initialized", False)
-    conn = sqlite3.connect(
-        r"DB/User.db", check_same_thread=False
-    )  # connection with sql db
-    c = conn.cursor()
-    if not is_state_initiaized:
-        if "session_name" not in st.session_state:
-            st.session_state["session_name"] = None
-    if "project_dct" not in st.session_state:
-        st.error("Please load a project from Home page")
-        st.stop()
-    st.title("1. Build Your Model")
-    if not os.path.exists(
-        os.path.join(st.session_state["project_path"], "data_import.pkl")
-    ):
-        st.error("Please move to Data Import Page and save.")
-        st.stop()
-    with open(
-        os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb"
-    ) as f:
-        data = pickle.load(f)
-        st.session_state["bin_dict"] = data["bin_dict"]
-    if not os.path.exists(
-        os.path.join(
-            st.session_state["project_path"], "final_df_transformed.pkl"
-        )
-    ):
-        st.error(
-            "Please move to Transformation Page and save transformations."
-        )
-        st.stop()
-    with open(
-        os.path.join(
-            st.session_state["project_path"], "final_df_transformed.pkl"
-        ),
-        "rb",
-    ) as f:
-        data = pickle.load(f)
-        media_data = data["final_df_transformed"]
-        #media_data.to_csv("Test/media_data.csv", index=False)
-    train_idx = int(len(media_data) / 5) * 4
-    # Sprint4 - available response metrics is a list of all reponse metrics in the data
-    ## these will be put in a drop down
-    st.session_state["media_data"] = media_data
-    if "available_response_metrics" not in st.session_state:
-        # st.session_state['available_response_metrics'] = ['Total Approved Accounts - Revenue',
-        #                                                   'Total Approved Accounts - Appsflyer',
-        #                                                   'Account Requests - Appsflyer',
-        #                                                   'App Installs - Appsflyer']
-        st.session_state["available_response_metrics"] = st.session_state[
-            "bin_dict"
-        ]["Response Metrics"]
-    # Sprint4
-    if "is_tuned_model" not in st.session_state:
-        st.session_state["is_tuned_model"] = {}
-    for resp_metric in st.session_state["available_response_metrics"]:
-        resp_metric = (
-            resp_metric.lower()
-            .replace(" ", "_")
-            .replace("-", "")
-            .replace(":", "")
-            .replace("__", "_")
-        )
-        st.session_state["is_tuned_model"][resp_metric] = False
-    # Sprint4 - used_response_metrics is a list of resp metrics for which user has created & saved a model
-    if "used_response_metrics" not in st.session_state:
-        st.session_state["used_response_metrics"] = []
-    # Sprint4 - saved_model_names
-    if "saved_model_names" not in st.session_state:
-        st.session_state["saved_model_names"] = []
-    if "Model" not in st.session_state:
-        if (
-            "session_state_saved"
-            in st.session_state["project_dct"]["model_build"].keys()
-            and st.session_state["project_dct"]["model_build"][
-                "session_state_saved"
-            ]
-            is not None
-            and "Model"
-            in st.session_state["project_dct"]["model_build"][
-                "session_state_saved"
-            ].keys()
-        ):
-            st.session_state["Model"] = st.session_state["project_dct"][
-                "model_build"
-            ]["session_state_saved"]["Model"]
-        else:
-            st.session_state["Model"] = {}
-    # Sprint4 - select a response metric
-    default_target_idx = (
-        st.session_state["project_dct"]["model_build"].get(
-            "sel_target_col", None
-        )
-        if st.session_state["project_dct"]["model_build"].get(
-            "sel_target_col", None
-        )
-        is not None
-        else st.session_state["available_response_metrics"][0]
-    )
-    sel_target_col = st.selectbox(
-        "Select the response metric",
-        st.session_state["available_response_metrics"],
-        index=st.session_state["available_response_metrics"].index(
-            default_target_idx
-        ),
-    )
-    # , on_change=reset_save())
-    st.session_state["project_dct"]["model_build"][
-        "sel_target_col"
-    ] = sel_target_col
-    target_col = (
-        sel_target_col.lower()
-        .replace(" ", "_")
-        .replace("-", "")
-        .replace(":", "")
-        .replace("__", "_")
-    )
-    new_name_dct = {
-        col: col.lower()
-        .replace(".", "_")
-        .lower()
-        .replace("@", "_")
-        .replace(" ", "_")
-        .replace("-", "")
-        .replace(":", "")
-        .replace("__", "_")
-        for col in media_data.columns
-    }
-    media_data.columns = [
-        col.lower()
-        .replace(".", "_")
-        .replace("@", "_")
-        .replace(" ", "_")
-        .replace("-", "")
-        .replace(":", "")
-        .replace("__", "_")
-        for col in media_data.columns
-    ]
-    panel_col = [
-        col.lower()
-        .replace(".", "_")
-        .replace("@", "_")
-        .replace(" ", "_")
-        .replace("-", "")
-        .replace(":", "")
-        .replace("__", "_")
-        for col in st.session_state["bin_dict"]["Panel Level 1"]
-    ][
-        0
-    ]  # set the panel column
-    date_col = "date"
-    is_panel = True if len(panel_col) > 0 else False
-    if "is_panel" not in st.session_state:
-        st.session_state["is_panel"] = is_panel
-    if is_panel:
-        media_data.sort_values([date_col, panel_col], inplace=True)
-    else:
-        media_data.sort_values(date_col, inplace=True)
-    media_data.reset_index(drop=True, inplace=True)
-    date = media_data[date_col]
-    st.session_state["date"] = date
-    y = media_data[target_col]
-    if is_panel:
-        spends_data = media_data[
-            [
-                c
-                for c in media_data.columns
-                if "_cost" in c.lower() or "_spend" in c.lower()
-            ]
-            + [date_col, panel_col]
-        ]
-        # Sprint3 - spends for resp curves
-    else:
-        spends_data = media_data[
-            [
-                c
-                for c in media_data.columns
-                if "_cost" in c.lower() or "_spend" in c.lower()
-            ]
-            + [date_col]
-        ]
-    y = media_data[target_col]
-    media_data.drop([date_col], axis=1, inplace=True)
-    media_data.reset_index(drop=True, inplace=True)
-    columns = st.columns(2)
-    old_shape = media_data.shape
-    if "old_shape" not in st.session_state:
-        st.session_state["old_shape"] = old_shape
-    if "media_data" not in st.session_state:
-        st.session_state["media_data"] = pd.DataFrame()
-    # Sprint3
-    if "orig_media_data" not in st.session_state:
-        st.session_state["orig_media_data"] = pd.DataFrame()
-    # Sprint3 additions
-    if "random_effects" not in st.session_state:
-        st.session_state["random_effects"] = pd.DataFrame()
-    if "pred_train" not in st.session_state:
-        st.session_state["pred_train"] = []
-    if "pred_test" not in st.session_state:
-        st.session_state["pred_test"] = []
-    # end of Sprint3 additions
-    # Section 3 - Create combinations
-    # bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','FB: Level Achieved - Tier 1 Impressions',
-    #       ' FB: Level Achieved - Tier 2 Impressions','paid_social_others',
-    #         ' GA App: Will And Cid Pequena Baixo Risco Clicks',
-    #       'digital_tactic_others',"programmatic"
-    #       ]
-    # srishti - bucket names changed
-    bucket = [
-        "paid_search",
-        "kwai",
-        "indicacao",
-        "infleux",
-        "influencer",
-        "fb_level_achieved_tier_2",
-        "fb_level_achieved_tier_1",
-        "paid_social_others",
-        "ga_app",
-        "digital_tactic_others",
-        "programmatic",
-    ]
-    # with columns[0]:
-    #     if st.button('Create Combinations of Variables'):
-    top_3_correlated_features = []
-    # # for col in st.session_state['media_data'].columns[:19]:
-    # original_cols = [c for c in st.session_state['media_data'].columns if
-    #                  "_clicks" in c.lower() or "_impressions" in c.lower()]
-    # original_cols = [c for c in original_cols if "_lag" not in c.lower() and "_adstock" not in c.lower()]
-    original_cols = (
-        st.session_state["bin_dict"]["Media"]
-        + st.session_state["bin_dict"]["Internal"]
-    )
-    original_cols = [
-        col.lower()
-        .replace(".", "_")
-        .replace("@", "_")
-        .replace(" ", "_")
-        .replace("-", "")
-        .replace(":", "")
-        .replace("__", "_")
-        for col in original_cols
-    ]
-    original_cols = [col for col in original_cols if "_cost" not in col]
-    # for col in st.session_state['media_data'].columns[:19]:
-    for col in original_cols:  # srishti - new
-        corr_df = (
-            pd.concat(
-                [st.session_state["media_data"].filter(regex=col), y], axis=1
-            )
-            .corr()[target_col]
-            .iloc[:-1]
-        )
-        top_3_correlated_features.append(
-            list(corr_df.sort_values(ascending=False).head(2).index)
-        )
-    flattened_list = [
-        item for sublist in top_3_correlated_features for item in sublist
-    ]
-    # all_features_set={var:[col for col in flattened_list if var in col] for var in bucket}
-    all_features_set = {
-        var: [col for col in flattened_list if var in col]
-        for var in bucket
-        if len([col for col in flattened_list if var in col]) > 0
-    }  # srishti
-    channels_all = [values for values in all_features_set.values()]
-    st.session_state["combinations"] = list(itertools.product(*channels_all))
-    # if 'combinations' not in st.session_state:
-    #   st.session_state['combinations']=combinations_all
-    st.session_state["final_selection"] = st.session_state["combinations"]
-    # st.success('Created combinations')
-    # revenue.reset_index(drop=True,inplace=True)
-    y.reset_index(drop=True, inplace=True)
-    if "Model_results" not in st.session_state:
-        st.session_state["Model_results"] = {
-            "Model_object": [],
-            "Model_iteration": [],
-            "Feature_set": [],
-            "MAPE": [],
-            "R2": [],
-            "ADJR2": [],
-            "pos_count": [],
-        }
-    def reset_model_result_dct():
-        st.session_state["Model_results"] = {
-            "Model_object": [],
-            "Model_iteration": [],
-            "Feature_set": [],
-            "MAPE": [],
-            "R2": [],
-            "ADJR2": [],
-            "pos_count": [],
-        }
-        # if st.button('Build Model'):
-    if "iterations" not in st.session_state:
-        st.session_state["iterations"] = 0
-    if "final_selection" not in st.session_state:
-        st.session_state["final_selection"] = False
-    save_path = r"Model/"
-    if st.session_state["final_selection"]:
-        st.write(
-            f'Total combinations created {format_numbers(len(st.session_state["final_selection"]))}'
-        )
-    # st.session_state["project_dct"]["model_build"]["all_iters_check"] = False
-    checkbox_default = (
-        st.session_state["project_dct"]["model_build"]["all_iters_check"]
-        if st.session_state["project_dct"]["model_build"]["all_iters_check"]
-        is not None
-        else False
-    )
-    if st.checkbox("Build all iterations", value=checkbox_default):
-        # st.session_state["project_dct"]["model_build"]["all_iters_check"]
-        iterations = len(st.session_state["final_selection"])
-        st.session_state["project_dct"]["model_build"][
-            "all_iters_check"
-        ] = True
-    else:
-        iterations = st.number_input(
-            "Select the number of iterations to perform",
-            min_value=0,
-            step=100,
-            value=st.session_state["iterations"],
-            on_change=reset_model_result_dct,
-        )
-        st.session_state["project_dct"]["model_build"][
-            "all_iters_check"
-        ] = False
-        st.session_state["project_dct"]["model_build"][
-            "iterations"
-        ] = iterations
-        # st.stop()
-    # build_button = st.session_state["project_dct"]["model_build"]["build_button"] if \
-    #     "build_button" in st.session_state["project_dct"]["model_build"].keys() else False
-    # model_button =st.button('Build Model', on_click=reset_model_result_dct, key='model_build_button')
-    # if
-    # if model_button:
-    if st.button(
-        "Build Model",
-        on_click=reset_model_result_dct,
-        key="model_build_button",
-    ):
-        if iterations < 1:
-            st.error("Please select number of iterations")
-            st.stop()
-        st.session_state["project_dct"]["model_build"]["build_button"] = True
-        st.session_state["iterations"] = iterations
-        # Section 4 - Model
-        # st.session_state['media_data'] = st.session_state['media_data'].fillna(method='ffill')
-        st.session_state["media_data"] = st.session_state["media_data"].ffill()
-        st.markdown(
-            "Data Split -- Training Period: May 9th, 2023 - October 5th,2023 , Testing Period: October 6th, 2023 - November 7th, 2023 "
-        )
-        progress_bar = st.progress(0)  # Initialize the progress bar
-        # time_remaining_text = st.empty()  # Create an empty space for time remaining text
-        start_time = time.time()  # Record the start time
-        progress_text = st.empty()
-        # time_elapsed_text = st.empty()
-        # for i, selected_features in enumerate(st.session_state["final_selection"][40000:40000 + int(iterations)]):
-        # for i, selected_features in enumerate(st.session_state["final_selection"]):
-        if is_panel == True:
-            for i, selected_features in enumerate(
-                st.session_state["final_selection"][0 : int(iterations)]
-            ):  # srishti
-                df = st.session_state["media_data"]
-                fet = [var for var in selected_features if len(var) > 0]
-                inp_vars_str = " + ".join(fet)  # new
-                X = df[fet]
-                y = df[target_col]
-                ss = MinMaxScaler()
-                X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-                X[target_col] = y  # Sprint2
-                X[panel_col] = df[panel_col]  # Sprint2
-                X_train = X.iloc[:train_idx]
-                X_test = X.iloc[train_idx:]
-                y_train = y.iloc[:train_idx]
-                y_test = y.iloc[train_idx:]
-                print(X_train.shape)
-                # model = sm.OLS(y_train, X_train).fit()
-                md_str = target_col + " ~ " + inp_vars_str
-                # md = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
-                #                 data=X_train[[target_col] + fet],
-                #                 groups=X_train[panel_col])
-                md = smf.mixedlm(
-                    md_str,
-                    data=X_train[[target_col] + fet],
-                    groups=X_train[panel_col],
-                )
-                mdf = md.fit()
-                predicted_values = mdf.fittedvalues
-                coefficients = mdf.fe_params.to_dict()
-                model_positive = [
-                    col for col in coefficients.keys() if coefficients[col] > 0
-                ]
-                pvalues = [var for var in list(mdf.pvalues) if var <= 0.06]
-                if (len(model_positive) / len(selected_features)) > 0 and (
-                    len(pvalues) / len(selected_features)
-                ) >= 0:  # srishti - changed just for testing, revert later
-                    # predicted_values = model.predict(X_train)
-                    mape = mean_absolute_percentage_error(
-                        y_train, predicted_values
-                    )
-                    r2 = r2_score(y_train, predicted_values)
-                    adjr2 = 1 - (1 - r2) * (len(y_train) - 1) / (
-                        len(y_train) - len(selected_features) - 1
-                    )
-                    filename = os.path.join(save_path, f"model_{i}.pkl")
-                    with open(filename, "wb") as f:
-                        pickle.dump(mdf, f)
-                    # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
-                    #   model = pickle.load(file)
-                    st.session_state["Model_results"]["Model_object"].append(
-                        filename
-                    )
-                    st.session_state["Model_results"][
-                        "Model_iteration"
-                    ].append(i)
-                    st.session_state["Model_results"]["Feature_set"].append(
-                        fet
-                    )
-                    st.session_state["Model_results"]["MAPE"].append(mape)
-                    st.session_state["Model_results"]["R2"].append(r2)
-                    st.session_state["Model_results"]["pos_count"].append(
-                        len(model_positive)
-                    )
-                    st.session_state["Model_results"]["ADJR2"].append(adjr2)
-                current_time = time.time()
-                time_taken = current_time - start_time
-                time_elapsed_minutes = time_taken / 60
-                completed_iterations_text = f"{i + 1}/{iterations}"
-                progress_bar.progress((i + 1) / int(iterations))
-                progress_text.text(
-                    f"Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}"
-                )
-            st.write(
-                f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models'
-            )
-        else:
-            for i, selected_features in enumerate(
-                st.session_state["final_selection"][0 : int(iterations)]
-            ):  # srishti
-                df = st.session_state["media_data"]
-                fet = [var for var in selected_features if len(var) > 0]
-                inp_vars_str = " + ".join(fet)
-                X = df[fet]
-                y = df[target_col]
-                ss = MinMaxScaler()
-                X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-                X = sm.add_constant(X)
-                X_train = X.iloc[:130]
-                X_test = X.iloc[130:]
-                y_train = y.iloc[:130]
-                y_test = y.iloc[130:]
-                model = sm.OLS(y_train, X_train).fit()
-                coefficients = model.params.to_list()
-                model_positive = [coef for coef in coefficients if coef > 0]
-                predicted_values = model.predict(X_train)
-                pvalues = [var for var in list(model.pvalues) if var <= 0.06]
-                # if (len(model_possitive) / len(selected_features)) > 0.9 and (len(pvalues) / len(selected_features)) >= 0.8:
-                if (len(model_positive) / len(selected_features)) > 0 and (
-                    len(pvalues) / len(selected_features)
-                ) >= 0.5:  # srishti - changed just for testing, revert later VALID MODEL CRITERIA
-                    # predicted_values = model.predict(X_train)
-                    mape = mean_absolute_percentage_error(
-                        y_train, predicted_values
-                    )
-                    adjr2 = model.rsquared_adj
-                    r2 = model.rsquared
-                    filename = os.path.join(save_path, f"model_{i}.pkl")
-                    with open(filename, "wb") as f:
-                        pickle.dump(model, f)
-                    # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
-                    #   model = pickle.load(file)
-                    st.session_state["Model_results"]["Model_object"].append(
-                        filename
-                    )
-                    st.session_state["Model_results"][
-                        "Model_iteration"
-                    ].append(i)
-                    st.session_state["Model_results"]["Feature_set"].append(
-                        fet
-                    )
-                    st.session_state["Model_results"]["MAPE"].append(mape)
-                    st.session_state["Model_results"]["R2"].append(r2)
-                    st.session_state["Model_results"]["ADJR2"].append(adjr2)
-                    st.session_state["Model_results"]["pos_count"].append(
-                        len(model_positive)
-                    )
-                current_time = time.time()
-                time_taken = current_time - start_time
-                time_elapsed_minutes = time_taken / 60
-                completed_iterations_text = f"{i + 1}/{iterations}"
-                progress_bar.progress((i + 1) / int(iterations))
-                progress_text.text(
-                    f"Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}"
-                )
-            st.write(
-                f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models'
-            )
-        pd.DataFrame(st.session_state["Model_results"]).to_csv(
-            "model_output.csv"
-        )
-        def to_percentage(value):
-            return f"{value * 100:.1f}%"
-    ## Section 5 - Select Model
-    st.title("2. Select Models")
-    show_results_defualt = (
-        st.session_state["project_dct"]["model_build"]["show_results_check"]
-        if st.session_state["project_dct"]["model_build"]["show_results_check"]
-        is not None
-        else False
-    )
-    if "tick" not in st.session_state:
-        st.session_state["tick"] = False
-    if st.checkbox(
-        "Show results of top 10 models (based on MAPE and Adj. R2)",
-        value=show_results_defualt,
-    ):
-        st.session_state["project_dct"]["model_build"][
-            "show_results_check"
-        ] = True
-        st.session_state["tick"] = True
-        st.write(
-            "Select one model iteration to generate performance metrics for it:"
-        )
-        data = pd.DataFrame(st.session_state["Model_results"])
-        data = data[data["pos_count"] == data["pos_count"].max()].reset_index(
-            drop=True
-        )  # Sprint4 -- Srishti -- only show models with the lowest num of neg coeffs
-        data.sort_values(by=["ADJR2"], ascending=False, inplace=True)
-        data.drop_duplicates(subset="Model_iteration", inplace=True)
-        top_10 = data.head(10)
-        top_10["Rank"] = np.arange(1, len(top_10) + 1, 1)
-        top_10[["MAPE", "R2", "ADJR2"]] = np.round(
-            top_10[["MAPE", "R2", "ADJR2"]], 4
-        ).applymap(to_percentage)
-        top_10_table = top_10[
-            ["Rank", "Model_iteration", "MAPE", "ADJR2", "R2"]
-        ]
-        # top_10_table.columns=[['Rank','Model Iteration Index','MAPE','Adjusted R2','R2']]
-        gd = GridOptionsBuilder.from_dataframe(top_10_table)
-        gd.configure_pagination(enabled=True)
-        gd.configure_selection(
-            use_checkbox=True,
-            selection_mode="single",
-            pre_select_all_rows=False,
-            pre_selected_rows=[1],
-        )
-        gridoptions = gd.build()
-        table = AgGrid(
-            top_10,
-            gridOptions=gridoptions,
-            update_mode=GridUpdateMode.SELECTION_CHANGED,
-        )
-        selected_rows = table.selected_rows
-        # if st.session_state["selected_rows"] != selected_rows:
-        #   st.session_state["build_rc_cb"] = False
-        st.session_state["selected_rows"] = selected_rows
-        # Section 6 - Display Results
-        if len(selected_rows) > 0:
-            st.header("2.1 Results Summary")
-            model_object = data[
-                data["Model_iteration"] == selected_rows[0]["Model_iteration"]
-            ]["Model_object"]
-            features_set = data[
-                data["Model_iteration"] == selected_rows[0]["Model_iteration"]
-            ]["Feature_set"]
-            with open(str(model_object.values[0]), "rb") as file:
-                # print(file)
-                model = pickle.load(file)
-            st.write(model.summary())
-            st.header("2.2 Actual vs. Predicted Plot")
-            if is_panel:
-                df = st.session_state["media_data"]
-                X = df[features_set.values[0]]
-                y = df[target_col]
-                ss = MinMaxScaler()
-                X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-                # Sprint2 changes
-                X[target_col] = y  # new
-                X[panel_col] = df[panel_col]
-                X[date_col] = date
-                X_train = X.iloc[:train_idx]
-                X_test = X.iloc[train_idx:].reset_index(drop=True)
-                y_train = y.iloc[:train_idx]
-                y_test = y.iloc[train_idx:].reset_index(drop=True)
-                test_spends = spends_data[
-                    train_idx:
-                ]  # Sprint3 - test spends for resp curves
-                random_eff_df = get_random_effects(
-                    media_data, panel_col, model
-                )
-                train_pred = model.fittedvalues
-                test_pred = mdf_predict(X_test, model, random_eff_df)
-                print("__" * 20, test_pred.isna().sum())
-            else:
-                df = st.session_state["media_data"]
-                X = df[features_set.values[0]]
-                y = df[target_col]
-                ss = MinMaxScaler()
-                X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-                X = sm.add_constant(X)
-                X[date_col] = date
-                X_train = X.iloc[:130]
-                X_test = X.iloc[130:].reset_index(drop=True)
-                y_train = y.iloc[:130]
-                y_test = y.iloc[130:].reset_index(drop=True)
-                test_spends = spends_data[
-                    130:
-                ]  # Sprint3 - test spends for resp curves
-                train_pred = model.predict(
-                    X_train[features_set.values[0] + ["const"]]
-                )
-                test_pred = model.predict(
-                    X_test[features_set.values[0] + ["const"]]
-                )
-            # save x test to test - srishti
-            # x_test_to_save = X_test.copy()
-            # x_test_to_save['Actuals'] = y_test
-            # x_test_to_save['Predictions'] = test_pred
-            #
-            # x_train_to_save = X_train.copy()
-            # x_train_to_save['Actuals'] = y_train
-            # x_train_to_save['Predictions'] = train_pred
-            #
-            # x_train_to_save.to_csv('Test/x_train_to_save.csv', index=False)
-            # x_test_to_save.to_csv('Test/x_test_to_save.csv', index=False)
-            st.session_state["X"] = X_train
-            st.session_state["features_set"] = features_set.values[0]
-            print(
-                "**" * 20, "selected model features : ", features_set.values[0]
-            )
-            metrics_table, line, actual_vs_predicted_plot = (
-                plot_actual_vs_predicted(
-                    X_train[date_col],
-                    y_train,
-                    train_pred,
-                    model,
-                    target_column=sel_target_col,
-                    is_panel=is_panel,
-                )
-            )  # Sprint2
-            st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
-            st.markdown("## 2.3 Residual Analysis")
-            columns = st.columns(2)
-            with columns[0]:
-                fig = plot_residual_predicted(
-                    y_train, train_pred, X_train
-                )  # Sprint2
-                st.plotly_chart(fig)
-            with columns[1]:
-                st.empty()
-                fig = qqplot(y_train, train_pred)  # Sprint2
-                st.plotly_chart(fig)
-            with columns[0]:
-                fig = residual_distribution(y_train, train_pred)  # Sprint2
-                st.pyplot(fig)
-            vif_data = pd.DataFrame()
-            # X=X.drop('const',axis=1)
-            X_train_orig = (
-                X_train.copy()
-            )  # Sprint2 -- creating a copy of xtrain. Later deleting panel, target & date from xtrain
-            del_col_list = list(
-                set([target_col, panel_col, date_col]).intersection(
-                    set(X_train.columns)
-                )
-            )
-            X_train.drop(columns=del_col_list, inplace=True)  # Sprint2
-            vif_data["Variable"] = X_train.columns
-            vif_data["VIF"] = [
-                variance_inflation_factor(X_train.values, i)
-                for i in range(X_train.shape[1])
-            ]
-            vif_data.sort_values(by=["VIF"], ascending=False, inplace=True)
-            vif_data = np.round(vif_data)
-            vif_data["VIF"] = vif_data["VIF"].astype(float)
-            st.header("2.4 Variance Inflation Factor (VIF)")
-            # st.dataframe(vif_data)
-            color_mapping = {
-                "darkgreen": (vif_data["VIF"] < 3),
-                "orange": (vif_data["VIF"] >= 3) & (vif_data["VIF"] <= 10),
-                "darkred": (vif_data["VIF"] > 10),
-            }
-            # Create a horizontal bar plot
-            fig, ax = plt.subplots()
-            fig.set_figwidth(10)  # Adjust the width of the figure as needed
-            # Sort the bars by descending VIF values
-            vif_data = vif_data.sort_values(by="VIF", ascending=False)
-            # Iterate through the color mapping and plot bars with corresponding colors
-            for color, condition in color_mapping.items():
-                subset = vif_data[condition]
-                bars = ax.barh(
-                    subset["Variable"], subset["VIF"], color=color, label=color
-                )
-                # Add text annotations on top of the bars
-                for bar in bars:
-                    width = bar.get_width()
-                    ax.annotate(
-                        f"{width:}",
-                        xy=(width, bar.get_y() + bar.get_height() / 2),
-                        xytext=(5, 0),
-                        textcoords="offset points",
-                        va="center",
-                    )
-            # Customize the plot
-            ax.set_xlabel("VIF Values")
-            # ax.set_title('2.4 Variance Inflation Factor (VIF)')
-            # ax.legend(loc='upper right')
-            # Display the plot in Streamlit
-            st.pyplot(fig)
-            with st.expander("Results Summary Test data"):
-                # ss = MinMaxScaler()
-                # X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.columns)
-                st.header("2.2 Actual vs. Predicted Plot")
-                metrics_table, line, actual_vs_predicted_plot = (
-                    plot_actual_vs_predicted(
-                        X_test[date_col],
-                        y_test,
-                        test_pred,
-                        model,
-                        target_column=sel_target_col,
-                        is_panel=is_panel,
-                    )
-                )  # Sprint2
-                st.plotly_chart(
-                    actual_vs_predicted_plot, use_container_width=True
-                )
-                st.markdown("## 2.3 Residual Analysis")
-                columns = st.columns(2)
-                with columns[0]:
-                    fig = plot_residual_predicted(
-                        y, test_pred, X_test
-                    )  # Sprint2
-                    st.plotly_chart(fig)
-                with columns[1]:
-                    st.empty()
-                    fig = qqplot(y, test_pred)  # Sprint2
-                    st.plotly_chart(fig)
-                with columns[0]:
-                    fig = residual_distribution(y, test_pred)  # Sprint2
-                    st.pyplot(fig)
-            value = False
-            save_button_model = st.checkbox(
-                "Save this model to tune", key="build_rc_cb"
-            )  # , on_click=set_save())
-            if save_button_model:
-                mod_name = st.text_input("Enter model name")
-                if len(mod_name) > 0:
-                    mod_name = (
-                        mod_name + "__" + target_col
-                    )  # Sprint4 - adding target col to model name
-                    if is_panel:
-                        pred_train = model.fittedvalues
-                        pred_test = mdf_predict(X_test, model, random_eff_df)
-                    else:
-                        st.session_state["features_set"] = st.session_state[
-                            "features_set"
-                        ] + ["const"]
-                        pred_train = model.predict(
-                            X_train_orig[st.session_state["features_set"]]
-                        )
-                        pred_test = model.predict(
-                            X_test[st.session_state["features_set"]]
-                        )
-                    st.session_state["Model"][mod_name] = {
-                        "Model_object": model,
-                        "feature_set": st.session_state["features_set"],
-                        "X_train": X_train_orig,
-                        "X_test": X_test,
-                        "y_train": y_train,
-                        "y_test": y_test,
-                        "pred_train": pred_train,
-                        "pred_test": pred_test,
-                    }
-                    st.session_state["X_train"] = X_train_orig
-                    st.session_state["X_test_spends"] = test_spends
-                    st.session_state["saved_model_names"].append(mod_name)
-                    # Sprint3 additions
-                    if is_panel:
-                        random_eff_df = get_random_effects(
-                            media_data, panel_col, model
-                        )
-                        st.session_state["random_effects"] = random_eff_df
-                    with open(
-                        os.path.join(
-                            st.session_state["project_path"], "best_models.pkl"
-                        ),
-                        "wb",
-                    ) as f:
-                        pickle.dump(st.session_state["Model"], f)
-                        st.success(
-                            mod_name
-                            + " model saved! Proceed to the next page to tune the model"
-                        )
-                        urm = st.session_state["used_response_metrics"]
-                        urm.append(sel_target_col)
-                        st.session_state["used_response_metrics"] = list(
-                            set(urm)
-                        )
-                        mod_name = ""
-                        # Sprint4 - add the formatted name of the target col to used resp metrics
-                    value = False
-                    st.session_state["project_dct"]["model_build"][
-                        "session_state_saved"
-                    ] = {}
-                    for key in [
-                        "Model",
-                        "bin_dict",
-                        "used_response_metrics",
-                        "date",
-                        "saved_model_names",
-                        "media_data",
-                        "X_test_spends",
-                    ]:
-                        st.session_state["project_dct"]["model_build"][
-                            "session_state_saved"
-                        ][key] = st.session_state[key]
-                    project_dct_path = os.path.join(
-                        st.session_state["project_path"], "project_dct.pkl"
-                    )
-                    with open(project_dct_path, "wb") as f:
-                        pickle.dump(st.session_state["project_dct"], f)
-                    update_db("4_Model_Build.py")
-                    st.toast("💾 Saved Successfully!")
-    else:
-        st.session_state["project_dct"]["model_build"][
-            "show_results_check"
-        ] = False

pages/5_Model_Tuning.py DELETED Viewed

@@ -1,912 +0,0 @@
-"""
-MMO Build Sprint 3
-date :
-changes : capability to tune MixedLM as well as simple LR in the same page
-"""
-import os
-import streamlit as st
-import pandas as pd
-from Eda_functions import format_numbers
-import pickle
-from utilities import set_header, load_local_css
-import statsmodels.api as sm
-import re
-from sklearn.preprocessing import MinMaxScaler
-import matplotlib.pyplot as plt
-from statsmodels.stats.outliers_influence import variance_inflation_factor
-import yaml
-from yaml import SafeLoader
-import streamlit_authenticator as stauth
-st.set_option("deprecation.showPyplotGlobalUse", False)
-import statsmodels.formula.api as smf
-from Data_prep_functions import *
-import sqlite3
-from utilities import update_db
-# for i in ["model_tuned", "X_train_tuned", "X_test_tuned", "tuned_model_features", "tuned_model", "tuned_model_dict"] :
-st.set_page_config(
-    page_title="Model Tuning",
-    page_icon=":shark:",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-load_local_css("styles.css")
-set_header()
-# Check for authentication status
-for k, v in st.session_state.items():
-    # print(k, v)
-    if k not in [
-        "logout",
-        "login",
-        "config",
-        "build_tuned_model",
-    ] and not k.startswith("FormSubmitter"):
-        st.session_state[k] = v
-with open("config.yaml") as file:
-    config = yaml.load(file, Loader=SafeLoader)
-    st.session_state["config"] = config
-authenticator = stauth.Authenticate(
-    config["credentials"],
-    config["cookie"]["name"],
-    config["cookie"]["key"],
-    config["cookie"]["expiry_days"],
-    config["preauthorized"],
-)
-st.session_state["authenticator"] = authenticator
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    authenticator.logout("Logout", "main")
-    is_state_initiaized = st.session_state.get("initialized", False)
-    if "project_dct" not in st.session_state:
-        st.error("Please load a project from Home page")
-        st.stop()
-    if not os.path.exists(
-        os.path.join(st.session_state["project_path"], "best_models.pkl")
-    ):
-        st.error("Please save a model before tuning")
-        st.stop()
-    conn = sqlite3.connect(
-        r"DB/User.db", check_same_thread=False
-    )  # connection with sql db
-    c = conn.cursor()
-    if not is_state_initiaized:
-        if "session_name" not in st.session_state:
-            st.session_state["session_name"] = None
-    if (
-        "session_state_saved"
-        in st.session_state["project_dct"]["model_build"].keys()
-    ):
-        for key in [
-            "Model",
-            "date",
-            "saved_model_names",
-            "media_data",
-            "X_test_spends",
-        ]:
-            if key not in st.session_state:
-                st.session_state[key] = st.session_state["project_dct"][
-                    "model_build"
-                ]["session_state_saved"][key]
-            st.session_state["bin_dict"] = st.session_state["project_dct"][
-                "model_build"
-            ]["session_state_saved"]["bin_dict"]
-            if (
-                "used_response_metrics" not in st.session_state
-                or st.session_state["used_response_metrics"] == []
-            ):
-                st.session_state["used_response_metrics"] = st.session_state[
-                    "project_dct"
-                ]["model_build"]["session_state_saved"][
-                    "used_response_metrics"
-                ]
-    else:
-        st.error("Please load a session with a built model")
-        st.stop()
-    # if 'sel_model' not in st.session_state["project_dct"]["model_tuning"].keys():
-    #     st.session_state["project_dct"]["model_tuning"]['sel_model']= {}
-    for key in ["select_all_flags_check", "selected_flags", "sel_model"]:
-        if key not in st.session_state["project_dct"]["model_tuning"].keys():
-            st.session_state["project_dct"]["model_tuning"][key] = {}
-    # Sprint3
-    # is_panel = st.session_state['is_panel']
-    # panel_col = 'markets'  # set the panel column
-    date_col = "date"
-    panel_col = [
-        col.lower()
-        .replace(".", "_")
-        .replace("@", "_")
-        .replace(" ", "_")
-        .replace("-", "")
-        .replace(":", "")
-        .replace("__", "_")
-        for col in st.session_state["bin_dict"]["Panel Level 1"]
-    ][
-        0
-    ]  # set the panel column
-    is_panel = True if len(panel_col) > 0 else False
-    # flag indicating there is not tuned model till now
-    # Sprint4 - model tuned dict
-    if "Model_Tuned" not in st.session_state:
-        st.session_state["Model_Tuned"] = {}
-    st.title("1. Model Tuning")
-    if "is_tuned_model" not in st.session_state:
-        st.session_state["is_tuned_model"] = {}
-    # Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
-    if (
-        "used_response_metrics" in st.session_state
-        and st.session_state["used_response_metrics"] != []
-    ):
-        default_target_idx = (
-            st.session_state["project_dct"]["model_tuning"].get(
-                "sel_target_col", None
-            )
-            if st.session_state["project_dct"]["model_tuning"].get(
-                "sel_target_col", None
-            )
-            is not None
-            else st.session_state["used_response_metrics"][0]
-        )
-        sel_target_col = st.selectbox(
-            "Select the response metric",
-            st.session_state["used_response_metrics"],
-            index=st.session_state["used_response_metrics"].index(
-                default_target_idx
-            ),
-        )
-        target_col = (
-            sel_target_col.lower()
-            .replace(" ", "_")
-            .replace("-", "")
-            .replace(":", "")
-            .replace("__", "_")
-        )
-        st.session_state["project_dct"]["model_tuning"][
-            "sel_target_col"
-        ] = sel_target_col
-    else:
-        sel_target_col = "Total Approved Accounts - Revenue"
-        target_col = "total_approved_accounts_revenue"
-    # Sprint4 - Look through all saved models, only show saved models of the sel resp metric (target_col)
-    # saved_models = st.session_state['saved_model_names']
-    with open(
-        os.path.join(st.session_state["project_path"], "best_models.pkl"), "rb"
-    ) as file:
-        model_dict = pickle.load(file)
-    saved_models = model_dict.keys()
-    required_saved_models = [
-        m.split("__")[0]
-        for m in saved_models
-        if m.split("__")[1] == target_col
-    ]
-    if len(required_saved_models) > 0:
-        default_model_idx = st.session_state["project_dct"]["model_tuning"][
-            "sel_model"
-        ].get(sel_target_col, required_saved_models[0])
-        sel_model = st.selectbox(
-            "Select the model to tune",
-            required_saved_models,
-            index=required_saved_models.index(default_model_idx),
-        )
-    else:
-        default_model_idx = st.session_state["project_dct"]["model_tuning"][
-            "sel_model"
-        ].get(sel_target_col, 0)
-        sel_model = st.selectbox(
-            "Select the model to tune", required_saved_models
-        )
-    st.session_state["project_dct"]["model_tuning"]["sel_model"][
-        sel_target_col
-    ] = default_model_idx
-    sel_model_dict = model_dict[
-        sel_model + "__" + target_col
-    ]  # Sprint4 - get the model obj of the selected model
-    X_train = sel_model_dict["X_train"]
-    X_test = sel_model_dict["X_test"]
-    y_train = sel_model_dict["y_train"]
-    y_test = sel_model_dict["y_test"]
-    df = st.session_state["media_data"]
-    if "selected_model" not in st.session_state:
-        st.session_state["selected_model"] = 0
-    st.markdown("### 1.1 Event Flags")
-    st.markdown(
-        "Helps in quantifying the impact of specific occurrences of events"
-    )
-    flag_expander_default = (
-        st.session_state["project_dct"]["model_tuning"].get(
-            "flag_expander", None
-        )
-        if st.session_state["project_dct"]["model_tuning"].get(
-            "flag_expander", None
-        )
-        is not None
-        else False
-    )
-    with st.expander("Apply Event Flags", flag_expander_default):
-        st.session_state["project_dct"]["model_tuning"]["flag_expander"] = True
-        model = sel_model_dict["Model_object"]
-        date = st.session_state["date"]
-        date = pd.to_datetime(date)
-        X_train = sel_model_dict["X_train"]
-        # features_set= model_dict[st.session_state["selected_model"]]['feature_set']
-        features_set = sel_model_dict["feature_set"]
-        col = st.columns(3)
-        min_date = min(date)
-        max_date = max(date)
-        start_date_default = (
-            st.session_state["project_dct"]["model_tuning"].get(
-                "start_date_default"
-            )
-            if st.session_state["project_dct"]["model_tuning"].get(
-                "start_date_default"
-            )
-            is not None
-            else min_date
-        )
-        end_date_default = (
-            st.session_state["project_dct"]["model_tuning"].get(
-                "end_date_default"
-            )
-            if st.session_state["project_dct"]["model_tuning"].get(
-                "end_date_default"
-            )
-            is not None
-            else max_date
-        )
-        with col[0]:
-            start_date = st.date_input(
-                "Select Start Date",
-                start_date_default,
-                min_value=min_date,
-                max_value=max_date,
-            )
-        with col[1]:
-            end_date_default = (
-                end_date_default
-                if end_date_default >= start_date
-                else start_date
-            )
-            end_date = st.date_input(
-                "Select End Date",
-                end_date_default,
-                min_value=max(min_date, start_date),
-                max_value=max_date,
-            )
-        with col[2]:
-            repeat_default = (
-                st.session_state["project_dct"]["model_tuning"].get(
-                    "repeat_default"
-                )
-                if st.session_state["project_dct"]["model_tuning"].get(
-                    "repeat_default"
-                )
-                is not None
-                else "No"
-            )
-            repeat_default_idx = 0 if repeat_default.lower() == "yes" else 1
-            repeat = st.selectbox(
-                "Repeat Annually", ["Yes", "No"], index=repeat_default_idx
-            )
-        st.session_state["project_dct"]["model_tuning"][
-            "start_date_default"
-        ] = start_date
-        st.session_state["project_dct"]["model_tuning"][
-            "end_date_default"
-        ] = end_date
-        st.session_state["project_dct"]["model_tuning"][
-            "repeat_default"
-        ] = repeat
-        if repeat == "Yes":
-            repeat = True
-        else:
-            repeat = False
-        if "Flags" not in st.session_state:
-            st.session_state["Flags"] = {}
-        if "flags" in st.session_state["project_dct"]["model_tuning"].keys():
-            st.session_state["Flags"] = st.session_state["project_dct"][
-                "model_tuning"
-            ]["flags"]
-        # print("**"*50)
-        # print(y_train)
-        # print("**"*50)
-        # print(model.fittedvalues)
-        if is_panel:  # Sprint3
-            met, line_values, fig_flag = plot_actual_vs_predicted(
-                X_train[date_col],
-                y_train,
-                model.fittedvalues,
-                model,
-                target_column=sel_target_col,
-                flag=(start_date, end_date),
-                repeat_all_years=repeat,
-                is_panel=True,
-            )
-            st.plotly_chart(fig_flag, use_container_width=True)
-            # create flag on test
-            met, test_line_values, fig_flag = plot_actual_vs_predicted(
-                X_test[date_col],
-                y_test,
-                sel_model_dict["pred_test"],
-                model,
-                target_column=sel_target_col,
-                flag=(start_date, end_date),
-                repeat_all_years=repeat,
-                is_panel=True,
-            )
-        else:
-            pred_train = model.predict(X_train[features_set])
-            met, line_values, fig_flag = plot_actual_vs_predicted(
-                X_train[date_col],
-                y_train,
-                pred_train,
-                model,
-                flag=(start_date, end_date),
-                repeat_all_years=repeat,
-                is_panel=False,
-            )
-            st.plotly_chart(fig_flag, use_container_width=True)
-            pred_test = model.predict(X_test[features_set])
-            met, test_line_values, fig_flag = plot_actual_vs_predicted(
-                X_test[date_col],
-                y_test,
-                pred_test,
-                model,
-                flag=(start_date, end_date),
-                repeat_all_years=repeat,
-                is_panel=False,
-            )
-        flag_name = "f1_flag"
-        flag_name = st.text_input("Enter Flag Name")
-        # Sprint4 - add selected target col to flag name
-        if st.button("Update flag"):
-            st.session_state["Flags"][flag_name + "__" + target_col] = {}
-            st.session_state["Flags"][flag_name + "__" + target_col][
-                "train"
-            ] = line_values
-            st.session_state["Flags"][flag_name + "__" + target_col][
-                "test"
-            ] = test_line_values
-            st.success(f'{flag_name + "__" + target_col} stored')
-            st.session_state["project_dct"]["model_tuning"]["flags"] = (
-                st.session_state["Flags"]
-            )
-        # Sprint4 - only show flag created for the particular target col
-        if st.session_state["Flags"] is None:
-            st.session_state["Flags"] = {}
-        target_model_flags = [
-            f.split("__")[0]
-            for f in st.session_state["Flags"].keys()
-            if f.split("__")[1] == target_col
-        ]
-        options = list(target_model_flags)
-        selected_options = []
-        num_columns = 4
-        num_rows = -(-len(options) // num_columns)
-    tick = False
-    if st.checkbox(
-        "Select all",
-        value=st.session_state["project_dct"]["model_tuning"][
-            "select_all_flags_check"
-        ].get(sel_target_col, False),
-    ):
-        tick = True
-        st.session_state["project_dct"]["model_tuning"][
-            "select_all_flags_check"
-        ][sel_target_col] = True
-    else:
-        st.session_state["project_dct"]["model_tuning"][
-            "select_all_flags_check"
-        ][sel_target_col] = False
-    selection_defualts = st.session_state["project_dct"]["model_tuning"][
-        "selected_flags"
-    ].get(sel_target_col, [])
-    selected_options = selection_defualts
-    for row in range(num_rows):
-        cols = st.columns(num_columns)
-        for col in cols:
-            if options:
-                option = options.pop(0)
-                option_default = (
-                    True if option in selection_defualts else False
-                )
-                selected = col.checkbox(option, value=(tick or option_default))
-                if selected:
-                    selected_options.append(option)
-    st.session_state["project_dct"]["model_tuning"]["selected_flags"][
-        sel_target_col
-    ] = selected_options
-    st.markdown("### 1.2 Select Parameters to Apply")
-    parameters = st.columns(3)
-    with parameters[0]:
-        Trend = st.checkbox(
-            "**Trend**",
-            value=st.session_state["project_dct"]["model_tuning"].get(
-                "trend_check", False
-            ),
-        )
-        st.markdown(
-            "Helps account for long-term trends or seasonality that could influence advertising effectiveness"
-        )
-    with parameters[1]:
-        week_number = st.checkbox(
-            "**Week_number**",
-            value=st.session_state["project_dct"]["model_tuning"].get(
-                "week_num_check", False
-            ),
-        )
-        st.markdown(
-            "Assists in detecting and incorporating weekly patterns or seasonality"
-        )
-    with parameters[2]:
-        sine_cosine = st.checkbox(
-            "**Sine and Cosine Waves**",
-            value=st.session_state["project_dct"]["model_tuning"].get(
-                "sine_cosine_check", False
-            ),
-        )
-        st.markdown(
-            "Helps in capturing cyclical patterns or seasonality in the data"
-        )
-    #
-    # def get_tuned_model():
-    #     st.session_state['build_tuned_model']=True
-    if st.button(
-        "Build model with Selected Parameters and Flags",
-        key="build_tuned_model",
-    ):
-        new_features = features_set
-        st.header("2.1 Results Summary")
-        # date=list(df.index)
-        # df = df.reset_index(drop=True)
-        # X_train=df[features_set]
-        ss = MinMaxScaler()
-        if is_panel == True:
-            X_train_tuned = X_train[features_set]
-            # X_train_tuned = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-            X_train_tuned[target_col] = X_train[target_col]
-            X_train_tuned[date_col] = X_train[date_col]
-            X_train_tuned[panel_col] = X_train[panel_col]
-            X_test_tuned = X_test[features_set]
-            # X_test_tuned = pd.DataFrame(ss.transform(X), columns=X.columns)
-            X_test_tuned[target_col] = X_test[target_col]
-            X_test_tuned[date_col] = X_test[date_col]
-            X_test_tuned[panel_col] = X_test[panel_col]
-        else:
-            X_train_tuned = X_train[features_set]
-            # X_train_tuned = pd.DataFrame(ss.fit_transform(X_train_tuned), columns=X_train_tuned.columns)
-            X_test_tuned = X_test[features_set]
-            # X_test_tuned = pd.DataFrame(ss.transform(X_test_tuned), columns=X_test_tuned.columns)
-        for flag in selected_options:
-            # Spirnt4 - added target_col in flag name
-            X_train_tuned[flag] = st.session_state["Flags"][
-                flag + "__" + target_col
-            ]["train"]
-            X_test_tuned[flag] = st.session_state["Flags"][
-                flag + "__" + target_col
-            ]["test"]
-            # test
-            # X_train_tuned.to_csv("Test/X_train_tuned_flag.csv",index=False)
-            # X_test_tuned.to_csv("Test/X_test_tuned_flag.csv",index=False)
-        # print("()()"*20,flag, len(st.session_state['Flags'][flag]))
-        if Trend:
-            st.session_state["project_dct"]["model_tuning"][
-                "trend_check"
-            ] = True
-            # Sprint3 - group by panel, calculate trend of each panel spearately. Add trend to new feature set
-            if is_panel:
-                newdata = pd.DataFrame()
-                panel_wise_end_point_train = {}
-                for panel, groupdf in X_train_tuned.groupby(panel_col):
-                    groupdf.sort_values(date_col, inplace=True)
-                    groupdf["Trend"] = np.arange(1, len(groupdf) + 1, 1)
-                    newdata = pd.concat([newdata, groupdf])
-                    panel_wise_end_point_train[panel] = len(groupdf)
-                X_train_tuned = newdata.copy()
-                test_newdata = pd.DataFrame()
-                for panel, test_groupdf in X_test_tuned.groupby(panel_col):
-                    test_groupdf.sort_values(date_col, inplace=True)
-                    start = panel_wise_end_point_train[panel] + 1
-                    end = start + len(test_groupdf)  # should be + 1? - Sprint4
-                    # print("??"*20, panel, len(test_groupdf), len(np.arange(start, end, 1)), start)
-                    test_groupdf["Trend"] = np.arange(start, end, 1)
-                    test_newdata = pd.concat([test_newdata, test_groupdf])
-                X_test_tuned = test_newdata.copy()
-                new_features = new_features + ["Trend"]
-            else:
-                X_train_tuned["Trend"] = np.arange(
-                    1, len(X_train_tuned) + 1, 1
-                )
-                X_test_tuned["Trend"] = np.arange(
-                    len(X_train_tuned) + 1,
-                    len(X_train_tuned) + len(X_test_tuned) + 1,
-                    1,
-                )
-                new_features = new_features + ["Trend"]
-        else:
-            st.session_state["project_dct"]["model_tuning"][
-                "trend_check"
-            ] = False
-        if week_number:
-            st.session_state["project_dct"]["model_tuning"][
-                "week_num_check"
-            ] = True
-            # Sprint3 - create weeknumber from date column in xtrain tuned. add week num to new feature set
-            if is_panel:
-                X_train_tuned[date_col] = pd.to_datetime(
-                    X_train_tuned[date_col]
-                )
-                X_train_tuned["Week_number"] = X_train_tuned[
-                    date_col
-                ].dt.day_of_week
-                if X_train_tuned["Week_number"].nunique() == 1:
-                    st.write(
-                        "All dates in the data are of the same week day. Hence Week number can't be used."
-                    )
-                else:
-                    X_test_tuned[date_col] = pd.to_datetime(
-                        X_test_tuned[date_col]
-                    )
-                    X_test_tuned["Week_number"] = X_test_tuned[
-                        date_col
-                    ].dt.day_of_week
-                    new_features = new_features + ["Week_number"]
-            else:
-                date = pd.to_datetime(date.values)
-                X_train_tuned["Week_number"] = pd.to_datetime(
-                    X_train[date_col]
-                ).dt.day_of_week
-                X_test_tuned["Week_number"] = pd.to_datetime(
-                    X_test[date_col]
-                ).dt.day_of_week
-                new_features = new_features + ["Week_number"]
-        else:
-            st.session_state["project_dct"]["model_tuning"][
-                "week_num_check"
-            ] = False
-        if sine_cosine:
-            st.session_state["project_dct"]["model_tuning"][
-                "sine_cosine_check"
-            ] = True
-            # Sprint3 - create panel wise sine cosine waves in xtrain tuned. add to new feature set
-            if is_panel:
-                new_features = new_features + ["sine_wave", "cosine_wave"]
-                newdata = pd.DataFrame()
-                newdata_test = pd.DataFrame()
-                groups = X_train_tuned.groupby(panel_col)
-                frequency = 2 * np.pi / 365  # Adjust the frequency as needed
-                train_panel_wise_end_point = {}
-                for panel, groupdf in groups:
-                    num_samples = len(groupdf)
-                    train_panel_wise_end_point[panel] = num_samples
-                    days_since_start = np.arange(num_samples)
-                    sine_wave = np.sin(frequency * days_since_start)
-                    cosine_wave = np.cos(frequency * days_since_start)
-                    sine_cosine_df = pd.DataFrame(
-                        {"sine_wave": sine_wave, "cosine_wave": cosine_wave}
-                    )
-                    assert len(sine_cosine_df) == len(groupdf)
-                    # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
-                    groupdf["sine_wave"] = sine_wave
-                    groupdf["cosine_wave"] = cosine_wave
-                    newdata = pd.concat([newdata, groupdf])
-                X_train_tuned = newdata.copy()
-                test_groups = X_test_tuned.groupby(panel_col)
-                for panel, test_groupdf in test_groups:
-                    num_samples = len(test_groupdf)
-                    start = train_panel_wise_end_point[panel]
-                    days_since_start = np.arange(start, start + num_samples, 1)
-                    # print("##", panel, num_samples, start, len(np.arange(start, start+num_samples, 1)))
-                    sine_wave = np.sin(frequency * days_since_start)
-                    cosine_wave = np.cos(frequency * days_since_start)
-                    sine_cosine_df = pd.DataFrame(
-                        {"sine_wave": sine_wave, "cosine_wave": cosine_wave}
-                    )
-                    assert len(sine_cosine_df) == len(test_groupdf)
-                    # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
-                    test_groupdf["sine_wave"] = sine_wave
-                    test_groupdf["cosine_wave"] = cosine_wave
-                    newdata_test = pd.concat([newdata_test, test_groupdf])
-                X_test_tuned = newdata_test.copy()
-            else:
-                new_features = new_features + ["sine_wave", "cosine_wave"]
-                num_samples = len(X_train_tuned)
-                frequency = 2 * np.pi / 365  # Adjust the frequency as needed
-                days_since_start = np.arange(num_samples)
-                sine_wave = np.sin(frequency * days_since_start)
-                cosine_wave = np.cos(frequency * days_since_start)
-                sine_cosine_df = pd.DataFrame(
-                    {"sine_wave": sine_wave, "cosine_wave": cosine_wave}
-                )
-                # Concatenate the sine and cosine waves with the scaled X DataFrame
-                X_train_tuned = pd.concat(
-                    [X_train_tuned, sine_cosine_df], axis=1
-                )
-                test_num_samples = len(X_test_tuned)
-                start = num_samples
-                days_since_start = np.arange(
-                    start, start + test_num_samples, 1
-                )
-                sine_wave = np.sin(frequency * days_since_start)
-                cosine_wave = np.cos(frequency * days_since_start)
-                sine_cosine_df = pd.DataFrame(
-                    {"sine_wave": sine_wave, "cosine_wave": cosine_wave}
-                )
-                # Concatenate the sine and cosine waves with the scaled X DataFrame
-                X_test_tuned = pd.concat(
-                    [X_test_tuned, sine_cosine_df], axis=1
-                )
-        else:
-            st.session_state["project_dct"]["model_tuning"][
-                "sine_cosine_check"
-            ] = False
-        # model
-        if selected_options:
-            new_features = new_features + selected_options
-        if is_panel:
-            inp_vars_str = " + ".join(new_features)
-            new_features = list(set(new_features))
-            md_str = target_col + " ~ " + inp_vars_str
-            md_tuned = smf.mixedlm(
-                md_str,
-                data=X_train_tuned[[target_col] + new_features],
-                groups=X_train_tuned[panel_col],
-            )
-            model_tuned = md_tuned.fit()
-            # plot act v pred for original model and tuned model
-            metrics_table, line, actual_vs_predicted_plot = (
-                plot_actual_vs_predicted(
-                    X_train[date_col],
-                    y_train,
-                    model.fittedvalues,
-                    model,
-                    target_column=sel_target_col,
-                    is_panel=True,
-                )
-            )
-            metrics_table_tuned, line, actual_vs_predicted_plot_tuned = (
-                plot_actual_vs_predicted(
-                    X_train_tuned[date_col],
-                    X_train_tuned[target_col],
-                    model_tuned.fittedvalues,
-                    model_tuned,
-                    target_column=sel_target_col,
-                    is_panel=True,
-                )
-            )
-        else:
-            new_features = list(set(new_features))
-            model_tuned = sm.OLS(y_train, X_train_tuned[new_features]).fit()
-            metrics_table, line, actual_vs_predicted_plot = (
-                plot_actual_vs_predicted(
-                    date[:130],
-                    y_train,
-                    model.predict(X_train[features_set]),
-                    model,
-                    target_column=sel_target_col,
-                )
-            )
-            metrics_table_tuned, line, actual_vs_predicted_plot_tuned = (
-                plot_actual_vs_predicted(
-                    date[:130],
-                    y_train,
-                    model_tuned.predict(X_train_tuned),
-                    model_tuned,
-                    target_column=sel_target_col,
-                )
-            )
-        mape = np.round(metrics_table.iloc[0, 1], 2)
-        r2 = np.round(metrics_table.iloc[1, 1], 2)
-        adjr2 = np.round(metrics_table.iloc[2, 1], 2)
-        mape_tuned = np.round(metrics_table_tuned.iloc[0, 1], 2)
-        r2_tuned = np.round(metrics_table_tuned.iloc[1, 1], 2)
-        adjr2_tuned = np.round(metrics_table_tuned.iloc[2, 1], 2)
-        parameters_ = st.columns(3)
-        with parameters_[0]:
-            st.metric("R2", r2_tuned, np.round(r2_tuned - r2, 2))
-        with parameters_[1]:
-            st.metric(
-                "Adjusted R2", adjr2_tuned, np.round(adjr2_tuned - adjr2, 2)
-            )
-        with parameters_[2]:
-            st.metric(
-                "MAPE", mape_tuned, np.round(mape_tuned - mape, 2), "inverse"
-            )
-        st.write(model_tuned.summary())
-        X_train_tuned[date_col] = X_train[date_col]
-        X_test_tuned[date_col] = X_test[date_col]
-        X_train_tuned[target_col] = y_train
-        X_test_tuned[target_col] = y_test
-        st.header("2.2 Actual vs. Predicted Plot")
-        # if is_panel:
-        #   metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date, y_train, model.predict(X_train),
-        #                                                                              model, target_column='Revenue',is_panel=True)
-        # else:
-        #   metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue')
-        if is_panel:
-            metrics_table, line, actual_vs_predicted_plot = (
-                plot_actual_vs_predicted(
-                    X_train_tuned[date_col],
-                    X_train_tuned[target_col],
-                    model_tuned.fittedvalues,
-                    model_tuned,
-                    target_column=sel_target_col,
-                    is_panel=True,
-                )
-            )
-        else:
-            metrics_table, line, actual_vs_predicted_plot = (
-                plot_actual_vs_predicted(
-                    X_train_tuned[date_col],
-                    X_train_tuned[target_col],
-                    model_tuned.predict(X_train_tuned[new_features]),
-                    model_tuned,
-                    target_column=sel_target_col,
-                    is_panel=False,
-                )
-            )
-        # plot_actual_vs_predicted(X_train[date_col], y_train,
-        #                                                                             model.fittedvalues, model,
-        #                                                                             target_column='Revenue',
-        #                                                                             is_panel=is_panel)
-        st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
-        st.markdown("## 2.3 Residual Analysis")
-        if is_panel:
-            columns = st.columns(2)
-            with columns[0]:
-                fig = plot_residual_predicted(
-                    y_train, model_tuned.fittedvalues, X_train_tuned
-                )
-                st.plotly_chart(fig)
-            with columns[1]:
-                st.empty()
-                fig = qqplot(y_train, model_tuned.fittedvalues)
-                st.plotly_chart(fig)
-            with columns[0]:
-                fig = residual_distribution(y_train, model_tuned.fittedvalues)
-                st.pyplot(fig)
-        else:
-            columns = st.columns(2)
-            with columns[0]:
-                fig = plot_residual_predicted(
-                    y_train,
-                    model_tuned.predict(X_train_tuned[new_features]),
-                    X_train,
-                )
-                st.plotly_chart(fig)
-            with columns[1]:
-                st.empty()
-                fig = qqplot(
-                    y_train, model_tuned.predict(X_train_tuned[new_features])
-                )
-                st.plotly_chart(fig)
-            with columns[0]:
-                fig = residual_distribution(
-                    y_train, model_tuned.predict(X_train_tuned[new_features])
-                )
-                st.pyplot(fig)
-        # st.session_state['is_tuned_model'][target_col] = True
-        # Sprint4 - saved tuned model in a dict
-        st.session_state["Model_Tuned"][sel_model + "__" + target_col] = {
-            "Model_object": model_tuned,
-            "feature_set": new_features,
-            "X_train_tuned": X_train_tuned,
-            "X_test_tuned": X_test_tuned,
-        }
-    # Pending
-    # if st.session_state['build_tuned_model']==True:
-    if st.session_state["Model_Tuned"] is not None:
-        if st.checkbox(
-            "Use this model to build response curves", key="save_model"
-        ):
-            #   save_model = st.button('Use this model to build response curves', key='saved_tuned_model')
-            #   if save_model:
-            st.session_state["is_tuned_model"][target_col] = True
-            with open(
-                os.path.join(
-                    st.session_state["project_path"], "tuned_model.pkl"
-                ),
-                "wb",
-            ) as f:
-                # pickle.dump(st.session_state['tuned_model'], f)
-                pickle.dump(st.session_state["Model_Tuned"], f)  # Sprint4
-            st.session_state["project_dct"]["model_tuning"][
-                "session_state_saved"
-            ] = {}
-            for key in [
-                "bin_dict",
-                "used_response_metrics",
-                "is_tuned_model",
-                "media_data",
-                "X_test_spends",
-            ]:
-                st.session_state["project_dct"]["model_tuning"][
-                    "session_state_saved"
-                ][key] = st.session_state[key]
-            project_dct_path = os.path.join(
-                st.session_state["project_path"], "project_dct.pkl"
-            )
-            with open(project_dct_path, "wb") as f:
-                pickle.dump(st.session_state["project_dct"], f)
-            update_db("5_Model_Tuning.py")
-            st.success(sel_model + "__" + target_col + " Tuned saved!")

pages/6_AI_Model_Results.py DELETED Viewed

@@ -1,728 +0,0 @@
-import plotly.express as px
-import numpy as np
-import plotly.graph_objects as go
-import streamlit as st
-import pandas as pd
-import statsmodels.api as sm
-from sklearn.metrics import mean_absolute_percentage_error
-import sys
-import os
-from utilities import set_header, load_local_css, load_authenticator
-import seaborn as sns
-import matplotlib.pyplot as plt
-import sweetviz as sv
-import tempfile
-from sklearn.preprocessing import MinMaxScaler
-from st_aggrid import AgGrid
-from st_aggrid import GridOptionsBuilder, GridUpdateMode
-from st_aggrid import GridOptionsBuilder
-import sys
-import re
-import pickle
-from sklearn.metrics import r2_score, mean_absolute_percentage_error
-from Data_prep_functions import plot_actual_vs_predicted
-import sqlite3
-from utilities import update_db
-sys.setrecursionlimit(10**6)
-original_stdout = sys.stdout
-sys.stdout = open("temp_stdout.txt", "w")
-sys.stdout.close()
-sys.stdout = original_stdout
-st.set_page_config(layout="wide")
-load_local_css("styles.css")
-set_header()
-# TODO :
-## 1. Add non panel model support
-## 2. EDA Function
-for k, v in st.session_state.items():
-    if k not in ["logout", "login", "config"] and not k.startswith(
-        "FormSubmitter"
-    ):
-        st.session_state[k] = v
-authenticator = st.session_state.get("authenticator")
-if authenticator is None:
-    authenticator = load_authenticator()
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    is_state_initiaized = st.session_state.get("initialized", False)
-    if not is_state_initiaized:
-        if "session_name" not in st.session_state:
-            st.session_state["session_name"] = None
-    if "project_dct" not in st.session_state:
-        st.error("Please load a project from Home page")
-        st.stop()
-    conn = sqlite3.connect(
-        r"DB/User.db", check_same_thread=False
-    )  # connection with sql db
-    c = conn.cursor()
-    if not os.path.exists(
-        os.path.join(st.session_state["project_path"], "tuned_model.pkl")
-    ):
-        st.error("Please save a tuned model")
-        st.stop()
-    if (
-        "session_state_saved"
-        in st.session_state["project_dct"]["model_tuning"].keys()
-        and st.session_state["project_dct"]["model_tuning"][
-            "session_state_saved"
-        ]
-        != []
-    ):
-        for key in ["used_response_metrics", "media_data", "bin_dict"]:
-            if key not in st.session_state:
-                st.session_state[key] = st.session_state["project_dct"][
-                    "model_tuning"
-                ]["session_state_saved"][key]
-            st.session_state["bin_dict"] = st.session_state["project_dct"][
-                "model_build"
-            ]["session_state_saved"]["bin_dict"]
-    media_data = st.session_state["media_data"]
-    panel_col = [
-        col.lower()
-        .replace(".", "_")
-        .replace("@", "_")
-        .replace(" ", "_")
-        .replace("-", "")
-        .replace(":", "")
-        .replace("__", "_")
-        for col in st.session_state["bin_dict"]["Panel Level 1"]
-    ][
-        0
-    ]  # set the panel column
-    is_panel = True if len(panel_col) > 0 else False
-    date_col = "date"
-    def plot_residual_predicted(actual, predicted, df_):
-        df_["Residuals"] = actual - pd.Series(predicted)
-        df_["StdResidual"] = (
-            df_["Residuals"] - df_["Residuals"].mean()
-        ) / df_["Residuals"].std()
-        # Create a Plotly scatter plot
-        fig = px.scatter(
-            df_,
-            x=predicted,
-            y="StdResidual",
-            opacity=0.5,
-            color_discrete_sequence=["#11B6BD"],
-        )
-        # Add horizontal lines
-        fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
-        fig.add_hline(y=2, line_color="red")
-        fig.add_hline(y=-2, line_color="red")
-        fig.update_xaxes(title="Predicted")
-        fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)")
-        # Set the same width and height for both figures
-        fig.update_layout(
-            title="Residuals over Predicted Values",
-            autosize=False,
-            width=600,
-            height=400,
-        )
-        return fig
-    def residual_distribution(actual, predicted):
-        Residuals = actual - pd.Series(predicted)
-        # Create a Seaborn distribution plot
-        sns.set(style="whitegrid")
-        plt.figure(figsize=(6, 4))
-        sns.histplot(Residuals, kde=True, color="#11B6BD")
-        plt.title(" Distribution of Residuals")
-        plt.xlabel("Residuals")
-        plt.ylabel("Probability Density")
-        return plt
-    def qqplot(actual, predicted):
-        Residuals = actual - pd.Series(predicted)
-        Residuals = pd.Series(Residuals)
-        Resud_std = (Residuals - Residuals.mean()) / Residuals.std()
-        # Create a QQ plot using Plotly with custom colors
-        fig = go.Figure()
-        fig.add_trace(
-            go.Scatter(
-                x=sm.ProbPlot(Resud_std).theoretical_quantiles,
-                y=sm.ProbPlot(Resud_std).sample_quantiles,
-                mode="markers",
-                marker=dict(size=5, color="#11B6BD"),
-                name="QQ Plot",
-            )
-        )
-        # Add the 45-degree reference line
-        diagonal_line = go.Scatter(
-            x=[
-                -2,
-                2,
-            ],  # Adjust the x values as needed to fit the range of your data
-            y=[-2, 2],  # Adjust the y values accordingly
-            mode="lines",
-            line=dict(color="red"),  # Customize the line color and style
-            name=" ",
-        )
-        fig.add_trace(diagonal_line)
-        # Customize the layout
-        fig.update_layout(
-            title="QQ Plot of Residuals",
-            title_x=0.5,
-            autosize=False,
-            width=600,
-            height=400,
-            xaxis_title="Theoretical Quantiles",
-            yaxis_title="Sample Quantiles",
-        )
-        return fig
-    def get_random_effects(media_data, panel_col, mdf):
-        random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
-        for i, market in enumerate(media_data[panel_col].unique()):
-            print(i, end="\r")
-            intercept = mdf.random_effects[market].values[0]
-            random_eff_df.loc[i, "random_effect"] = intercept
-            random_eff_df.loc[i, panel_col] = market
-        return random_eff_df
-    def mdf_predict(X_df, mdf, random_eff_df):
-        X = X_df.copy()
-        X = pd.merge(
-            X,
-            random_eff_df[[panel_col, "random_effect"]],
-            on=panel_col,
-            how="left",
-        )
-        X["pred_fixed_effect"] = mdf.predict(X)
-        X["pred"] = X["pred_fixed_effect"] + X["random_effect"]
-        X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True)
-        return X
-    def metrics_df_panel(model_dict):
-        metrics_df = pd.DataFrame(
-            columns=[
-                "Model",
-                "R2",
-                "ADJR2",
-                "Train Mape",
-                "Test Mape",
-                "Summary",
-                "Model_object",
-            ]
-        )
-        i = 0
-        for key in model_dict.keys():
-            target = key.split("__")[1]
-            metrics_df.at[i, "Model"] = target
-            y = model_dict[key]["X_train_tuned"][target]
-            random_df = get_random_effects(
-                media_data, panel_col, model_dict[key]["Model_object"]
-            )
-            pred = mdf_predict(
-                model_dict[key]["X_train_tuned"],
-                model_dict[key]["Model_object"],
-                random_df,
-            )["pred"]
-            ytest = model_dict[key]["X_test_tuned"][target]
-            predtest = mdf_predict(
-                model_dict[key]["X_test_tuned"],
-                model_dict[key]["Model_object"],
-                random_df,
-            )["pred"]
-            metrics_df.at[i, "R2"] = r2_score(y, pred)
-            metrics_df.at[i, "ADJR2"] = 1 - (1 - metrics_df.loc[i, "R2"]) * (
-                len(y) - 1
-            ) / (len(y) - len(model_dict[key]["feature_set"]) - 1)
-            metrics_df.at[i, "Train Mape"] = mean_absolute_percentage_error(
-                y, pred
-            )
-            metrics_df.at[i, "Test Mape"] = mean_absolute_percentage_error(
-                ytest, predtest
-            )
-            metrics_df.at[i, "Summary"] = model_dict[key][
-                "Model_object"
-            ].summary()
-            metrics_df.at[i, "Model_object"] = model_dict[key]["Model_object"]
-            i += 1
-        metrics_df = np.round(metrics_df, 2)
-        return metrics_df
-    with open(
-        os.path.join(
-            st.session_state["project_path"], "final_df_transformed.pkl"
-        ),
-        "rb",
-    ) as f:
-        data = pickle.load(f)
-        transformed_data = data["final_df_transformed"]
-    with open(
-        os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb"
-    ) as f:
-        data = pickle.load(f)
-        st.session_state["bin_dict"] = data["bin_dict"]
-    with open(
-        os.path.join(st.session_state["project_path"], "tuned_model.pkl"), "rb"
-    ) as file:
-        tuned_model_dict = pickle.load(file)
-    feature_set_dct = {
-        key.split("__")[1]: key_dict["feature_set"]
-        for key, key_dict in tuned_model_dict.items()
-    }
-    # """ the above part should be modified so that we are fetching features set from the saved model"""
-    def contributions(X, model, target):
-        X1 = X.copy()
-        for j, col in enumerate(X1.columns):
-            X1[col] = X1[col] * model.params.values[j]
-        contributions = np.round(
-            (X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2
-        )
-        contributions = (
-            pd.DataFrame(contributions, columns=target)
-            .reset_index()
-            .rename(columns={"index": "Channel"})
-        )
-        contributions["Channel"] = [
-            re.split(r"_imp|_cli", col)[0] for col in contributions["Channel"]
-        ]
-        return contributions
-    if "contribution_df" not in st.session_state:
-        st.session_state["contribution_df"] = None
-    def contributions_panel(model_dict):
-        media_data = st.session_state["media_data"]
-        contribution_df = pd.DataFrame(columns=["Channel"])
-        for key in model_dict.keys():
-            best_feature_set = model_dict[key]["feature_set"]
-            model = model_dict[key]["Model_object"]
-            target = key.split("__")[1]
-            X_train = model_dict[key]["X_train_tuned"]
-            contri_df = pd.DataFrame()
-            y = []
-            y_pred = []
-            random_eff_df = get_random_effects(media_data, panel_col, model)
-            random_eff_df["fixed_effect"] = model.fe_params["Intercept"]
-            random_eff_df["panel_effect"] = (
-                random_eff_df["random_effect"] + random_eff_df["fixed_effect"]
-            )
-            coef_df = pd.DataFrame(model.fe_params)
-            coef_df.reset_index(inplace=True)
-            coef_df.columns = ["feature", "coef"]
-            x_train_contribution = X_train.copy()
-            x_train_contribution = mdf_predict(
-                x_train_contribution, model, random_eff_df
-            )
-            x_train_contribution = pd.merge(
-                x_train_contribution,
-                random_eff_df[[panel_col, "panel_effect"]],
-                on=panel_col,
-                how="left",
-            )
-            for i in range(len(coef_df))[1:]:
-                coef = coef_df.loc[i, "coef"]
-                col = coef_df.loc[i, "feature"]
-                x_train_contribution[str(col) + "_contr"] = (
-                    coef * x_train_contribution[col]
-                )
-            # x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1)
-            # x_train_contribution['sum_contributions'] = x_train_contribution['sum_contributions'] + x_train_contribution[
-            #     'panel_effect']
-            base_cols = ["panel_effect"] + [
-                c
-                for c in x_train_contribution.filter(regex="contr").columns
-                if c
-                in [
-                    "Week_number_contr",
-                    "Trend_contr",
-                    "sine_wave_contr",
-                    "cosine_wave_contr",
-                ]
-            ]
-            x_train_contribution["base_contr"] = x_train_contribution[
-                base_cols
-            ].sum(axis=1)
-            x_train_contribution.drop(columns=base_cols, inplace=True)
-            # x_train_contribution.to_csv("Test/smr_x_train_contribution.csv", index=False)
-            contri_df = pd.DataFrame(
-                x_train_contribution.filter(regex="contr").sum(axis=0)
-            )
-            contri_df.reset_index(inplace=True)
-            contri_df.columns = ["Channel", target]
-            contri_df["Channel"] = (
-                contri_df["Channel"]
-                .str.split("(_impres|_clicks)")
-                .apply(lambda c: c[0])
-            )
-            contri_df[target] = (
-                100 * contri_df[target] / contri_df[target].sum()
-            )
-            contri_df["Channel"].replace("base_contr", "base", inplace=True)
-            contribution_df = pd.merge(
-                contribution_df, contri_df, on="Channel", how="outer"
-            )
-        # st.session_state["contribution_df"] = contributions_panel(tuned_model_dict)
-        return contribution_df
-    metrics_table = metrics_df_panel(tuned_model_dict)
-    eda_columns = st.columns(2)
-    with eda_columns[1]:
-        eda = st.button(
-            "Generate EDA Report",
-            help="Click to generate a bivariate report for the selected response metric from the table below.",
-        )
-    # st.markdown('Model Metrics')
-    st.title("Contribution Overview")
-    options = st.session_state["used_response_metrics"]
-    options = [
-        opt.lower()
-        .replace(" ", "_")
-        .replace("-", "")
-        .replace(":", "")
-        .replace("__", "_")
-        for opt in options
-    ]
-    default_options = (
-        st.session_state["project_dct"]["saved_model_results"].get(
-            "selected_options"
-        )
-        if st.session_state["project_dct"]["saved_model_results"].get(
-            "selected_options"
-        )
-        is not None
-        else [options[-1]]
-    )
-    for i in default_options:
-        if i not in options:
-            st.write(i)
-            default_options.remove(i)
-    contribution_selections = st.multiselect(
-        "Select the Response Metrics to compare contributions",
-        options,
-        default=default_options,
-    )
-    trace_data = []
-    st.session_state["contribution_df"] = contributions_panel(tuned_model_dict)
-    for selection in contribution_selections:
-        trace = go.Bar(
-            x=st.session_state["contribution_df"]["Channel"],
-            y=st.session_state["contribution_df"][selection],
-            name=selection,
-            text=np.round(st.session_state["contribution_df"][selection], 0)
-            .astype(int)
-            .astype(str)
-            + "%",
-            textposition="outside",
-        )
-        trace_data.append(trace)
-    layout = go.Layout(
-        title="Metrics Contribution by Channel",
-        xaxis=dict(title="Channel Name"),
-        yaxis=dict(title="Metrics Contribution"),
-        barmode="group",
-    )
-    fig = go.Figure(data=trace_data, layout=layout)
-    st.plotly_chart(fig, use_container_width=True)
-    ############################################ Waterfall Chart ############################################
-    # import plotly.graph_objects as go
-    # # Initialize a Plotly figure
-    # fig = go.Figure()
-    # for selection in contribution_selections:
-    #     # Ensure y_values are numeric
-    #     y_values = st.session_state["contribution_df"][selection].values.astype(float)
-    #     # Generating text labels for each bar, ensuring operations are compatible with string formats
-    #     text_values = [f"{val}%" for val in np.round(y_values, 0).astype(int)]
-    #     fig.add_trace(
-    #         go.Waterfall(
-    #             name=selection,
-    #             orientation="v",
-    #             measure=["relative"]
-    #             * len(y_values),  # Adjust if you have absolute values at certain points
-    #             x=st.session_state["contribution_df"]["Channel"].tolist(),
-    #             text=text_values,
-    #             textposition="outside",
-    #             y=y_values,
-    #             increasing={"marker": {"color": "green"}},
-    #             decreasing={"marker": {"color": "red"}},
-    #             totals={"marker": {"color": "blue"}},
-    #         )
-    #     )
-    # fig.update_layout(
-    #     title="Metrics Contribution by Channel",
-    #     xaxis={"title": "Channel Name"},
-    #     yaxis={"title": "Metrics Contribution"},
-    #     height=600,
-    # )
-    # # Displaying the waterfall chart in Streamlit
-    # st.plotly_chart(fig, use_container_width=True)
-    import plotly.graph_objects as go
-    # Initialize a Plotly figure
-    fig = go.Figure()
-    for selection in contribution_selections:
-        # Ensure contributions are numeric
-        contributions = (
-            st.session_state["contribution_df"][selection]
-            .values.astype(float)
-            .tolist()
-        )
-        channel_names = st.session_state["contribution_df"]["Channel"].tolist()
-        display_name, display_contribution, base_contribution = [], [], 0
-        for channel_name, contribution in zip(channel_names, contributions):
-            if channel_name != "const" and channel_name != "base":
-                display_name.append(channel_name)
-                display_contribution.append(contribution)
-            else:
-                base_contribution = contribution
-        display_name = ["Base Sales"] + display_name
-        display_contribution = [base_contribution] + display_contribution
-        # Generating text labels for each bar, ensuring operations are compatible with string formats
-        text_values = [
-            f"{val}%" for val in np.round(display_contribution, 0).astype(int)
-        ]
-        fig.add_trace(
-            go.Waterfall(
-                orientation="v",
-                measure=["relative"]
-                * len(
-                    display_contribution
-                ),  # Adjust if you have absolute values at certain points
-                x=display_name,
-                text=text_values,
-                textposition="outside",
-                y=display_contribution,
-                increasing={"marker": {"color": "green"}},
-                decreasing={"marker": {"color": "red"}},
-                totals={"marker": {"color": "blue"}},
-            )
-        )
-    fig.update_layout(
-        title="Metrics Contribution by Channel",
-        xaxis={"title": "Channel Name"},
-        yaxis={"title": "Metrics Contribution"},
-        height=600,
-    )
-    # Displaying the waterfall chart in Streamlit
-    st.plotly_chart(fig, use_container_width=True)
-    ############################################ Waterfall Chart ############################################
-    st.title("Analysis of Models Result")
-    # st.markdown()
-    previous_selection = st.session_state["project_dct"][
-        "saved_model_results"
-    ].get("model_grid_sel", [1])
-    st.write(np.round(metrics_table, 2))
-    gd_table = metrics_table.iloc[:, :-2]
-    gd = GridOptionsBuilder.from_dataframe(gd_table)
-    # gd.configure_pagination(enabled=True)
-    gd.configure_selection(
-        use_checkbox=True,
-        selection_mode="single",
-        pre_select_all_rows=False,
-        pre_selected_rows=previous_selection,
-    )
-    gridoptions = gd.build()
-    table = AgGrid(
-        gd_table,
-        gridOptions=gridoptions,
-        fit_columns_on_grid_load=True,
-        height=200,
-    )
-    # table=metrics_table.iloc[:,:-2]
-    # table.insert(0, "Select", False)
-    # selection_table=st.data_editor(table,column_config={"Select": st.column_config.CheckboxColumn(required=True)})
-    if len(table.selected_rows) > 0:
-        st.session_state["project_dct"]["saved_model_results"][
-            "model_grid_sel"
-        ] = table.selected_rows[0]["_selectedRowNodeInfo"]["nodeRowIndex"]
-    if len(table.selected_rows) == 0:
-        st.warning(
-            "Click on the checkbox to view comprehensive results of the selected model."
-        )
-        st.stop()
-    else:
-        target_column = table.selected_rows[0]["Model"]
-        feature_set = feature_set_dct[target_column]
-    # with eda_columns[1]:
-    #     if eda:
-    #         def generate_report_with_target(channel_data, target_feature):
-    #             report = sv.analyze(
-    #                 [channel_data, "Dataset"], target_feat=target_feature, verbose=False
-    #             )
-    #             temp_dir = tempfile.mkdtemp()
-    #             report_path = os.path.join(temp_dir, "report.html")
-    #             report.show_html(
-    #                 filepath=report_path, open_browser=False
-    #             )  # Generate the report as an HTML file
-    #             return report_path
-    #
-    #         report_data = transformed_data[feature_set]
-    #         report_data[target_column] = transformed_data[target_column]
-    #         report_file = generate_report_with_target(report_data, target_column)
-    #
-    #         if os.path.exists(report_file):
-    #             with open(report_file, "rb") as f:
-    #                 st.download_button(
-    #                     label="Download EDA Report",
-    #                     data=f.read(),
-    #                     file_name="report.html",
-    #                     mime="text/html",
-    #                 )
-    #         else:
-    #             st.warning("Report generation failed. Unable to find the report file.")
-    model = metrics_table[metrics_table["Model"] == target_column][
-        "Model_object"
-    ].iloc[0]
-    target = metrics_table[metrics_table["Model"] == target_column][
-        "Model"
-    ].iloc[0]
-    st.header("Model Summary")
-    st.write(model.summary())
-    sel_dict = tuned_model_dict[
-        [k for k in tuned_model_dict.keys() if k.split("__")[1] == target][0]
-    ]
-    X_train = sel_dict["X_train_tuned"]
-    y_train = X_train[target]
-    random_effects = get_random_effects(media_data, panel_col, model)
-    pred = mdf_predict(X_train, model, random_effects)["pred"]
-    X_test = sel_dict["X_test_tuned"]
-    y_test = X_test[target]
-    predtest = mdf_predict(X_test, model, random_effects)["pred"]
-    metrics_table_train, _, fig_train = plot_actual_vs_predicted(
-        X_train[date_col],
-        y_train,
-        pred,
-        model,
-        target_column=target_column,
-        flag=None,
-        repeat_all_years=False,
-        is_panel=is_panel,
-    )
-    metrics_table_test, _, fig_test = plot_actual_vs_predicted(
-        X_test[date_col],
-        y_test,
-        predtest,
-        model,
-        target_column=target_column,
-        flag=None,
-        repeat_all_years=False,
-        is_panel=is_panel,
-    )
-    metrics_table_train = metrics_table_train.set_index("Metric").transpose()
-    metrics_table_train.index = ["Train"]
-    metrics_table_test = metrics_table_test.set_index("Metric").transpose()
-    metrics_table_test.index = ["test"]
-    metrics_table = np.round(
-        pd.concat([metrics_table_train, metrics_table_test]), 2
-    )
-    st.markdown("Result Overview")
-    st.dataframe(np.round(metrics_table, 2), use_container_width=True)
-    st.subheader("Actual vs Predicted Plot Train")
-    st.plotly_chart(fig_train, use_container_width=True)
-    st.subheader("Actual vs Predicted Plot Test")
-    st.plotly_chart(fig_test, use_container_width=True)
-    st.markdown("## Residual Analysis")
-    columns = st.columns(2)
-    Xtrain1 = X_train.copy()
-    with columns[0]:
-        fig = plot_residual_predicted(y_train, model.predict(Xtrain1), Xtrain1)
-        st.plotly_chart(fig)
-    with columns[1]:
-        st.empty()
-        fig = qqplot(y_train, model.predict(X_train))
-        st.plotly_chart(fig)
-    with columns[0]:
-        fig = residual_distribution(y_train, model.predict(X_train))
-        st.pyplot(fig)
-    update_db("6_AI_Model_Result.py")
-elif auth_status == False:
-    st.error("Username/Password is incorrect")
-    try:
-        username_forgot_pw, email_forgot_password, random_password = (
-            authenticator.forgot_password("Forgot password")
-        )
-        if username_forgot_pw:
-            st.success("New password sent securely")
-            # Random password to be transferred to the user securely
-        elif username_forgot_pw == False:
-            st.error("Username not found")
-    except Exception as e:
-        st.error(e)

pages/7_Current_Media_Performance.py DELETED Viewed

@@ -1,573 +0,0 @@
-"""
-MMO Build Sprint 3
-additions : contributions calculated using tuned Mixed LM model
-pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model
-MMO Build Sprint 4
-additions : response metrics selection
-pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model
-"""
-import streamlit as st
-import pandas as pd
-from sklearn.preprocessing import MinMaxScaler
-import pickle
-import os
-from utilities_with_panel import load_local_css, set_header
-import yaml
-from yaml import SafeLoader
-import streamlit_authenticator as stauth
-import sqlite3
-from utilities import update_db
-st.set_page_config(layout="wide")
-load_local_css("styles.css")
-set_header()
-for k, v in st.session_state.items():
-    # print(k, v)
-    if k not in [
-        "logout",
-        "login",
-        "config",
-        "build_tuned_model",
-    ] and not k.startswith("FormSubmitter"):
-        st.session_state[k] = v
-with open("config.yaml") as file:
-    config = yaml.load(file, Loader=SafeLoader)
-    st.session_state["config"] = config
-authenticator = stauth.Authenticate(
-    config["credentials"],
-    config["cookie"]["name"],
-    config["cookie"]["key"],
-    config["cookie"]["expiry_days"],
-    config["preauthorized"],
-)
-st.session_state["authenticator"] = authenticator
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    authenticator.logout("Logout", "main")
-    is_state_initiaized = st.session_state.get("initialized", False)
-    if "project_dct" not in st.session_state:
-        st.error("Please load a project from Home page")
-        st.stop()
-    conn = sqlite3.connect(
-        r"DB/User.db", check_same_thread=False
-    )  # connection with sql db
-    c = conn.cursor()
-    if not os.path.exists(
-        os.path.join(st.session_state["project_path"], "tuned_model.pkl")
-    ):
-        st.error("Please save a tuned model")
-        st.stop()
-    if (
-        "session_state_saved"
-        in st.session_state["project_dct"]["model_tuning"].keys()
-        and st.session_state["project_dct"]["model_tuning"][
-            "session_state_saved"
-        ]
-        != []
-    ):
-        for key in [
-            "used_response_metrics",
-            "is_tuned_model",
-            "media_data",
-            "X_test_spends",
-        ]:
-            st.session_state[key] = st.session_state["project_dct"][
-                "model_tuning"
-            ]["session_state_saved"][key]
-    elif (
-        "session_state_saved"
-        in st.session_state["project_dct"]["model_build"].keys()
-        and st.session_state["project_dct"]["model_build"][
-            "session_state_saved"
-        ]
-        != []
-    ):
-        for key in [
-            "used_response_metrics",
-            "date",
-            "saved_model_names",
-            "media_data",
-            "X_test_spends",
-        ]:
-            st.session_state[key] = st.session_state["project_dct"][
-                "model_build"
-            ]["session_state_saved"][key]
-    else:
-        st.error("Please tune a model first")
-    st.session_state["bin_dict"] = st.session_state["project_dct"][
-        "model_build"
-    ]["session_state_saved"]["bin_dict"]
-    st.session_state["media_data"].columns = [
-        c.lower() for c in st.session_state["media_data"].columns
-    ]
-    from utilities_with_panel import (
-        overview_test_data_prep_panel,
-        overview_test_data_prep_nonpanel,
-        initialize_data,
-        create_channel_summary,
-        create_contribution_pie,
-        create_contribuion_stacked_plot,
-        create_channel_spends_sales_plot,
-        format_numbers,
-        channel_name_formating,
-    )
-    import plotly.graph_objects as go
-    import streamlit_authenticator as stauth
-    import yaml
-    from yaml import SafeLoader
-    import time
-    def get_random_effects(media_data, panel_col, mdf):
-        random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
-        for i, market in enumerate(media_data[panel_col].unique()):
-            print(i, end="\r")
-            intercept = mdf.random_effects[market].values[0]
-            random_eff_df.loc[i, "random_effect"] = intercept
-            random_eff_df.loc[i, panel_col] = market
-        return random_eff_df
-    def process_train_and_test(train, test, features, panel_col, target_col):
-        X1 = train[features]
-        ss = MinMaxScaler()
-        X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns)
-        X1[panel_col] = train[panel_col]
-        X1[target_col] = train[target_col]
-        if test is not None:
-            X2 = test[features]
-            X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns)
-            X2[panel_col] = test[panel_col]
-            X2[target_col] = test[target_col]
-            return X1, X2
-        return X1
-    def mdf_predict(X_df, mdf, random_eff_df):
-        X = X_df.copy()
-        X = pd.merge(
-            X,
-            random_eff_df[[panel_col, "random_effect"]],
-            on=panel_col,
-            how="left",
-        )
-        X["pred_fixed_effect"] = mdf.predict(X)
-        X["pred"] = X["pred_fixed_effect"] + X["random_effect"]
-        X.to_csv("Test/merged_df_contri.csv", index=False)
-        X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True)
-        return X
-    # target='Revenue'
-    # is_panel=False
-    # is_panel = st.session_state['is_panel']
-    panel_col = [
-        col.lower()
-        .replace(".", "_")
-        .replace("@", "_")
-        .replace(" ", "_")
-        .replace("-", "")
-        .replace(":", "")
-        .replace("__", "_")
-        for col in st.session_state["bin_dict"]["Panel Level 1"]
-    ][
-        0
-    ]  # set the panel column
-    is_panel = True if len(panel_col) > 0 else False
-    date_col = "date"
-    # Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
-    if (
-        "used_response_metrics" in st.session_state
-        and st.session_state["used_response_metrics"] != []
-    ):
-        sel_target_col = st.selectbox(
-            "Select the response metric",
-            st.session_state["used_response_metrics"],
-        )
-        target_col = (
-            sel_target_col.lower()
-            .replace(" ", "_")
-            .replace("-", "")
-            .replace(":", "")
-            .replace("__", "_")
-        )
-    else:
-        sel_target_col = "Total Approved Accounts - Revenue"
-        target_col = "total_approved_accounts_revenue"
-    target = sel_target_col
-    # Sprint4 - Look through all saved tuned models, only show saved models of the sel resp metric (target_col)
-    # saved_models = st.session_state['saved_model_names']
-    # Sprint4 - get the model obj of the selected model
-    # st.write(sel_model_dict)
-    # Sprint3 - Contribution
-    if is_panel:
-        # read tuned mixedLM model
-        # if st.session_state["tuned_model"] is not None :
-        if st.session_state["is_tuned_model"][target_col] == True:  # Sprint4
-            with open(
-                os.path.join(
-                    st.session_state["project_path"], "tuned_model.pkl"
-                ),
-                "rb",
-            ) as file:
-                model_dict = pickle.load(file)
-            saved_models = list(model_dict.keys())
-            # st.write(saved_models)
-            required_saved_models = [
-                m.split("__")[0]
-                for m in saved_models
-                if m.split("__")[1] == target_col
-            ]
-            sel_model = st.selectbox(
-                "Select the model to review", required_saved_models
-            )
-            sel_model_dict = model_dict[sel_model + "__" + target_col]
-            model = sel_model_dict["Model_object"]
-            X_train = sel_model_dict["X_train_tuned"]
-            X_test = sel_model_dict["X_test_tuned"]
-            best_feature_set = sel_model_dict["feature_set"]
-        else:  # if non tuned model to be used # Pending
-            with open(
-                os.path.join(
-                    st.session_state["project_path"], "best_models.pkl"
-                ),
-                "rb",
-            ) as file:
-                model_dict = pickle.load(file)
-            # st.write(model_dict)
-            saved_models = list(model_dict.keys())
-            required_saved_models = [
-                m.split("__")[0]
-                for m in saved_models
-                if m.split("__")[1] == target_col
-            ]
-            sel_model = st.selectbox(
-                "Select the model to review", required_saved_models
-            )
-            sel_model_dict = model_dict[sel_model + "__" + target_col]
-            # st.write(sel_model, sel_model_dict)
-            model = sel_model_dict["Model_object"]
-            X_train = sel_model_dict["X_train"]
-            X_test = sel_model_dict["X_test"]
-            best_feature_set = sel_model_dict["feature_set"]
-        # Calculate contributions
-        with open(
-            os.path.join(st.session_state["project_path"], "data_import.pkl"),
-            "rb",
-        ) as f:
-            data = pickle.load(f)
-        # Accessing the loaded objects
-        st.session_state["orig_media_data"] = data["final_df"]
-        st.session_state["orig_media_data"].columns = [
-            col.lower()
-            .replace(".", "_")
-            .replace("@", "_")
-            .replace(" ", "_")
-            .replace("-", "")
-            .replace(":", "")
-            .replace("__", "_")
-            for col in st.session_state["orig_media_data"].columns
-        ]
-        media_data = st.session_state["media_data"]
-        # st.session_state['orig_media_data']=st.session_state["media_data"]
-        # st.write(media_data)
-        contri_df = pd.DataFrame()
-        y = []
-        y_pred = []
-        random_eff_df = get_random_effects(media_data, panel_col, model)
-        random_eff_df["fixed_effect"] = model.fe_params["Intercept"]
-        random_eff_df["panel_effect"] = (
-            random_eff_df["random_effect"] + random_eff_df["fixed_effect"]
-        )
-        # random_eff_df.to_csv("Test/random_eff_df_contri.csv", index=False)
-        coef_df = pd.DataFrame(model.fe_params)
-        coef_df.reset_index(inplace=True)
-        coef_df.columns = ["feature", "coef"]
-        # coef_df.reset_index().to_csv("Test/coef_df_contri1.csv",index=False)
-        # print(model.fe_params)
-        x_train_contribution = X_train.copy()
-        x_test_contribution = X_test.copy()
-        # preprocessing not needed since X_train is already preprocessed
-        # X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col)
-        # x_train_contribution[best_feature_set] = X1[best_feature_set]
-        # x_test_contribution[best_feature_set] = X2[best_feature_set]
-        x_train_contribution = mdf_predict(
-            x_train_contribution, model, random_eff_df
-        )
-        x_test_contribution = mdf_predict(
-            x_test_contribution, model, random_eff_df
-        )
-        x_train_contribution = pd.merge(
-            x_train_contribution,
-            random_eff_df[[panel_col, "panel_effect"]],
-            on=panel_col,
-            how="left",
-        )
-        x_test_contribution = pd.merge(
-            x_test_contribution,
-            random_eff_df[[panel_col, "panel_effect"]],
-            on=panel_col,
-            how="left",
-        )
-        for i in range(len(coef_df))[1:]:
-            coef = coef_df.loc[i, "coef"]
-            col = coef_df.loc[i, "feature"]
-            x_train_contribution[str(col) + "_contr"] = (
-                coef * x_train_contribution[col]
-            )
-            x_test_contribution[str(col) + "_contr"] = (
-                coef * x_train_contribution[col]
-            )
-        x_train_contribution["sum_contributions"] = (
-            x_train_contribution.filter(regex="contr").sum(axis=1)
-        )
-        x_train_contribution["sum_contributions"] = (
-            x_train_contribution["sum_contributions"]
-            + x_train_contribution["panel_effect"]
-        )
-        x_test_contribution["sum_contributions"] = x_test_contribution.filter(
-            regex="contr"
-        ).sum(axis=1)
-        x_test_contribution["sum_contributions"] = (
-            x_test_contribution["sum_contributions"]
-            + x_test_contribution["panel_effect"]
-        )
-        # # test
-        x_train_contribution.to_csv(
-            "Test/x_train_contribution.csv", index=False
-        )
-        x_test_contribution.to_csv("Test/x_test_contribution.csv", index=False)
-        #
-        # st.session_state['orig_media_data'].to_csv("Test/transformed_data.csv",index=False)
-        # st.session_state['X_test_spends'].to_csv("Test/test_spends.csv",index=False)
-        # # st.write(st.session_state['orig_media_data'].columns)
-        # st.write(date_col,panel_col)
-        # st.write(x_test_contribution)
-        overview_test_data_prep_panel(
-            x_test_contribution,
-            st.session_state["orig_media_data"],
-            st.session_state["X_test_spends"],
-            date_col,
-            panel_col,
-            target_col,
-        )
-    else:  # NON PANEL
-        if st.session_state["is_tuned_model"][target_col] == True:  # Sprint4
-            with open(
-                os.path.join(
-                    st.session_state["project_path"], "tuned_model.pkl"
-                ),
-                "rb",
-            ) as file:
-                model_dict = pickle.load(file)
-            saved_models = list(model_dict.keys())
-            required_saved_models = [
-                m.split("__")[0]
-                for m in saved_models
-                if m.split("__")[1] == target_col
-            ]
-            sel_model = st.selectbox(
-                "Select the model to review", required_saved_models
-            )
-            sel_model_dict = model_dict[sel_model + "__" + target_col]
-            model = sel_model_dict["Model_object"]
-            X_train = sel_model_dict["X_train_tuned"]
-            X_test = sel_model_dict["X_test_tuned"]
-            best_feature_set = sel_model_dict["feature_set"]
-        else:  # Sprint4
-            with open(
-                os.path.join(
-                    st.session_state["project_path"], "best_models.pkl"
-                ),
-                "rb",
-            ) as file:
-                model_dict = pickle.load(file)
-            saved_models = list(model_dict.keys())
-            required_saved_models = [
-                m.split("__")[0]
-                for m in saved_models
-                if m.split("__")[1] == target_col
-            ]
-            sel_model = st.selectbox(
-                "Select the model to review", required_saved_models
-            )
-            sel_model_dict = model_dict[sel_model + "__" + target_col]
-            model = sel_model_dict["Model_object"]
-            X_train = sel_model_dict["X_train"]
-            X_test = sel_model_dict["X_test"]
-            best_feature_set = sel_model_dict["feature_set"]
-        x_train_contribution = X_train.copy()
-        x_test_contribution = X_test.copy()
-        x_train_contribution["pred"] = model.predict(
-            x_train_contribution[best_feature_set]
-        )
-        x_test_contribution["pred"] = model.predict(
-            x_test_contribution[best_feature_set]
-        )
-        for num, i in enumerate(model.params.values):
-            col = best_feature_set[num]
-            x_train_contribution[col + "_contr"] = X_train[col] * i
-            x_test_contribution[col + "_contr"] = X_test[col] * i
-        x_test_contribution.to_csv(
-            "Test/x_test_contribution_non_panel.csv", index=False
-        )
-        overview_test_data_prep_nonpanel(
-            x_test_contribution,
-            st.session_state["orig_media_data"].copy(),
-            st.session_state["X_test_spends"].copy(),
-            date_col,
-            target_col,
-        )
-    # for k, v in st.session_sta
-    # te.items():
-    #     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-    #         st.session_state[k] = v
-    # authenticator = st.session_state.get('authenticator')
-    # if authenticator is None:
-    #     authenticator = load_authenticator()
-    # name, authentication_status, username = authenticator.login('Login', 'main')
-    # auth_status = st.session_state['authentication_status']
-    # if auth_status:
-    #     authenticator.logout('Logout', 'main')
-    #     is_state_initiaized = st.session_state.get('initialized',False)
-    #     if not is_state_initiaized:
-    initialize_data(target_col)
-    scenario = st.session_state["scenario"]
-    raw_df = st.session_state["raw_df"]
-    st.header("Overview of previous spends")
-    # st.write(scenario.actual_total_spends)
-    # st.write(scenario.actual_total_sales)
-    columns = st.columns((1, 1, 3))
-    with columns[0]:
-        st.metric(
-            label="Spends",
-            value=format_numbers(float(scenario.actual_total_spends)),
-        )
-    ###print(f"##################### {scenario.actual_total_sales} ##################")
-    with columns[1]:
-        st.metric(
-            label=target,
-            value=format_numbers(
-                float(scenario.actual_total_sales), include_indicator=False
-            ),
-        )
-    actual_summary_df = create_channel_summary(scenario)
-    actual_summary_df["Channel"] = actual_summary_df["Channel"].apply(
-        channel_name_formating
-    )
-    columns = st.columns((2, 1))
-    with columns[0]:
-        with st.expander("Channel wise overview"):
-            st.markdown(
-                actual_summary_df.style.set_table_styles(
-                    [
-                        {
-                            "selector": "th",
-                            "props": [("background-color", "#11B6BD")],
-                        },
-                        {
-                            "selector": "tr:nth-child(even)",
-                            "props": [("background-color", "#11B6BD")],
-                        },
-                    ]
-                ).to_html(),
-                unsafe_allow_html=True,
-            )
-    st.markdown("<hr>", unsafe_allow_html=True)
-    ##############################
-    st.plotly_chart(
-        create_contribution_pie(scenario), use_container_width=True
-    )
-    st.markdown("<hr>", unsafe_allow_html=True)
-    ################################3
-    st.plotly_chart(
-        create_contribuion_stacked_plot(scenario), use_container_width=True
-    )
-    st.markdown("<hr>", unsafe_allow_html=True)
-    #######################################
-    selected_channel_name = st.selectbox(
-        "Channel",
-        st.session_state["channels_list"] + ["non media"],
-        format_func=channel_name_formating,
-    )
-    selected_channel = scenario.channels.get(selected_channel_name, None)
-    st.plotly_chart(
-        create_channel_spends_sales_plot(selected_channel),
-        use_container_width=True,
-    )
-    st.markdown("<hr>", unsafe_allow_html=True)
-    if st.checkbox("Save this session", key="save"):
-        project_dct_path = os.path.join(
-            st.session_state["session_path"], "project_dct.pkl"
-        )
-        with open(project_dct_path, "wb") as f:
-            pickle.dump(st.session_state["project_dct"], f)
-        update_db("7_Current_Media_Performance.py")

pages/8_Build_Response_Curves.py DELETED Viewed

@@ -1,596 +0,0 @@
-import streamlit as st
-import plotly.express as px
-import numpy as np
-import plotly.graph_objects as go
-from utilities import (
-    channel_name_formating,
-    load_authenticator,
-    initialize_data,
-    fetch_actual_data,
-)
-from sklearn.metrics import r2_score
-from collections import OrderedDict
-from classes import class_from_dict, class_to_dict
-import pickle
-import json
-import sqlite3
-from utilities import update_db
-for k, v in st.session_state.items():
-    if k not in ["logout", "login", "config"] and not k.startswith(
-        "FormSubmitter"
-    ):
-        st.session_state[k] = v
-def s_curve(x, K, b, a, x0):
-    return K / (1 + b * np.exp(-a * (x - x0)))
-def save_scenario(scenario_name):
-    """
-    Save the current scenario with the mentioned name in the session state
-    Parameters
-    ----------
-    scenario_name
-        Name of the scenario to be saved
-    """
-    if "saved_scenarios" not in st.session_state:
-        st.session_state = OrderedDict()
-    # st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
-    st.session_state["saved_scenarios"][scenario_name] = class_to_dict(
-        st.session_state["scenario"]
-    )
-    st.session_state["scenario_input"] = ""
-    print(type(st.session_state["saved_scenarios"]))
-    with open("../saved_scenarios.pkl", "wb") as f:
-        pickle.dump(st.session_state["saved_scenarios"], f)
-def reset_curve_parameters(
-    metrics=None, panel=None, selected_channel_name=None
-):
-    del st.session_state["K"]
-    del st.session_state["b"]
-    del st.session_state["a"]
-    del st.session_state["x0"]
-    if (
-        metrics is not None
-        and panel is not None
-        and selected_channel_name is not None
-    ):
-        if f"{metrics}#@{panel}#@{selected_channel_name}" in list(
-            st.session_state["update_rcs"].keys()
-        ):
-            del st.session_state["update_rcs"][
-                f"{metrics}#@{panel}#@{selected_channel_name}"
-            ]
-def update_response_curve(
-    K_updated,
-    b_updated,
-    a_updated,
-    x0_updated,
-    metrics=None,
-    panel=None,
-    selected_channel_name=None,
-):
-    print(
-        "[DEBUG] update_response_curves: ",
-        st.session_state["project_dct"]["scenario_planner"].keys(),
-    )
-    st.session_state["project_dct"]["scenario_planner"][unique_key].channels[
-        selected_channel_name
-    ].response_curve_params = {
-        "K": st.session_state["K"],
-        "b": st.session_state["b"],
-        "a": st.session_state["a"],
-        "x0": st.session_state["x0"],
-    }
-    # if (
-    #     metrics is not None
-    #     and panel is not None
-    #     and selected_channel_name is not None
-    # ):
-    #     st.session_state["update_rcs"][
-    #         f"{metrics}#@{panel}#@{selected_channel_name}"
-    #     ] = {
-    #         "K": K_updated,
-    #         "b": b_updated,
-    #         "a": a_updated,
-    #         "x0": x0_updated,
-    #     }
-    # st.session_state["scenario"].channels[
-    #     selected_channel_name
-    # ].response_curve_params = {
-    #     "K": K_updated,
-    #     "b": b_updated,
-    #     "a": a_updated,
-    #     "x0": x0_updated,
-    # }
-# authenticator = st.session_state.get('authenticator')
-# if authenticator is None:
-#     authenticator = load_authenticator()
-# name, authentication_status, username = authenticator.login('Login', 'main')
-# auth_status = st.session_state.get('authentication_status')
-# if auth_status == True:
-#     is_state_initiaized = st.session_state.get('initialized',False)
-#     if not is_state_initiaized:
-#         print("Scenario page state reloaded")
-import pandas as pd
-@st.cache_resource(show_spinner=False)
-def panel_fetch(file_selected):
-    raw_data_mmm_df = pd.read_excel(file_selected, sheet_name="RAW DATA MMM")
-    if "Panel" in raw_data_mmm_df.columns:
-        panel = list(set(raw_data_mmm_df["Panel"]))
-    else:
-        raw_data_mmm_df = None
-        panel = None
-    return panel
-import glob
-import os
-def get_excel_names(directory):
-    # Create a list to hold the final parts of the filenames
-    last_portions = []
-    # Patterns to match Excel files (.xlsx and .xls) that contain @#
-    patterns = [
-        os.path.join(directory, "*@#*.xlsx"),
-        os.path.join(directory, "*@#*.xls"),
-    ]
-    # Process each pattern
-    for pattern in patterns:
-        files = glob.glob(pattern)
-        # Extracting the last portion after @# for each file
-        for file in files:
-            base_name = os.path.basename(file)
-            last_portion = base_name.split("@#")[-1]
-            last_portion = last_portion.replace(".xlsx", "").replace(
-                ".xls", ""
-            )  # Removing extensions
-            last_portions.append(last_portion)
-    return last_portions
-def name_formating(channel_name):
-    # Replace underscores with spaces
-    name_mod = channel_name.replace("_", " ")
-    # Capitalize the first letter of each word
-    name_mod = name_mod.title()
-    return name_mod
-def fetch_panel_data():
-    print("DEBUG etch_panel_data: running... ")
-    file_selected = f"./metrics_level_data/Overview_data_test_panel@#{st.session_state['response_metrics_selectbox']}.xlsx"
-    panel_selected = st.session_state["panel_selected_selectbox"]
-    print(panel_selected)
-    if panel_selected == "Aggregated":
-        (
-            st.session_state["actual_input_df"],
-            st.session_state["actual_contribution_df"],
-        ) = fetch_actual_data(panel=panel_selected, target_file=file_selected)
-    else:
-        (
-            st.session_state["actual_input_df"],
-            st.session_state["actual_contribution_df"],
-        ) = fetch_actual_data(panel=panel_selected, target_file=file_selected)
-    unique_key = f"{st.session_state['response_metrics_selectbox']}-{st.session_state['panel_selected_selectbox']}"
-    print("unique_key")
-    if unique_key not in st.session_state["project_dct"]["scenario_planner"]:
-        if panel_selected == "Aggregated":
-            initialize_data(
-                panel=panel_selected,
-                target_file=file_selected,
-                updated_rcs={},
-                metrics=metrics_selected,
-            )
-            panel = None
-        else:
-            initialize_data(
-                panel=panel_selected,
-                target_file=file_selected,
-                updated_rcs={},
-                metrics=metrics_selected,
-            )
-        st.session_state["project_dct"]["scenario_planner"][unique_key] = (
-            st.session_state["scenario"]
-        )
-        # print(
-        #     "DEBUG etch_panel_data: ",
-        #     st.session_state["project_dct"]["scenario_planner"][
-        #         unique_key
-        #     ].keys(),
-        # )
-    else:
-        st.session_state["scenario"] = st.session_state["project_dct"][
-            "scenario_planner"
-        ][unique_key]
-        st.session_state["rcs"] = {}
-        st.session_state["powers"] = {}
-        for channel_name, _channel in st.session_state["project_dct"][
-            "scenario_planner"
-        ][unique_key].channels.items():
-            st.session_state["rcs"][
-                channel_name
-            ] = _channel.response_curve_params
-            st.session_state["powers"][channel_name] = _channel.power
-    if "K" in st.session_state:
-        del st.session_state["K"]
-    if "b" in st.session_state:
-        del st.session_state["b"]
-    if "a" in st.session_state:
-        del st.session_state["a"]
-    if "x0" in st.session_state:
-        del st.session_state["x0"]
-if "project_dct" not in st.session_state:
-    st.error("Please load a project from home")
-    st.stop()
-    database_file = r"DB\User.db"
-    conn = sqlite3.connect(
-        database_file, check_same_thread=False
-    )  # connection with sql db
-    c = conn.cursor()
-st.subheader("Build Response Curves")
-if "update_rcs" not in st.session_state:
-    st.session_state["update_rcs"] = {}
-st.session_state["first_time"] = True
-col1, col2, col3 = st.columns([1, 1, 1])
-directory = "metrics_level_data"
-metrics_list = get_excel_names(directory)
-metrics_selected = col1.selectbox(
-    "Response Metrics",
-    metrics_list,
-    on_change=fetch_panel_data,
-    format_func=name_formating,
-    key="response_metrics_selectbox",
-)
-file_selected = (
-    f"./metrics_level_data/Overview_data_test_panel@#{metrics_selected}.xlsx"
-)
-panel_list = panel_fetch(file_selected)
-final_panel_list = ["Aggregated"] + panel_list
-panel_selected = col3.selectbox(
-    "Panel",
-    final_panel_list,
-    on_change=fetch_panel_data,
-    key="panel_selected_selectbox",
-)
-is_state_initiaized = st.session_state.get("initialized_rcs", False)
-print(is_state_initiaized)
-if not is_state_initiaized:
-    print("DEBUG.....", "Here")
-    fetch_panel_data()
-    # if panel_selected == "Aggregated":
-    #     initialize_data(panel=panel_selected, target_file=file_selected)
-    #     panel = None
-    # else:
-    #     initialize_data(panel=panel_selected, target_file=file_selected)
-    st.session_state["initialized_rcs"] = True
-# channels_list = st.session_state["channels_list"]
-unique_key = f"{st.session_state['response_metrics_selectbox']}-{st.session_state['panel_selected_selectbox']}"
-chanel_list_final = list(
-    st.session_state["project_dct"]["scenario_planner"][
-        unique_key
-    ].channels.keys()
-) + ["Others"]
-selected_channel_name = col2.selectbox(
-    "Channel",
-    chanel_list_final,
-    format_func=channel_name_formating,
-    on_change=reset_curve_parameters,
-    key="selected_channel_name_selectbox",
-)
-rcs = st.session_state["rcs"]
-if "K" not in st.session_state:
-    st.session_state["K"] = rcs[selected_channel_name]["K"]
-if "b" not in st.session_state:
-    st.session_state["b"] = rcs[selected_channel_name]["b"]
-if "a" not in st.session_state:
-    st.session_state["a"] = rcs[selected_channel_name]["a"]
-if "x0" not in st.session_state:
-    st.session_state["x0"] = rcs[selected_channel_name]["x0"]
-x = st.session_state["actual_input_df"][selected_channel_name].values
-y = st.session_state["actual_contribution_df"][selected_channel_name].values
-power = np.ceil(np.log(x.max()) / np.log(10)) - 3
-print(f"DEBUG BUILD RCS: {selected_channel_name}")
-print(f"DEBUG BUILD RCS: K : {st.session_state['K']}")
-print(f"DEBUG BUILD RCS: b : {st.session_state['b']}")
-print(f"DEBUG BUILD RCS: a : {st.session_state['a']}")
-print(f"DEBUG BUILD RCS: x0: {st.session_state['x0']}")
-# fig = px.scatter(x, s_curve(x/10**power,
-#                             st.session_state['K'],
-#                             st.session_state['b'],
-#                             st.session_state['a'],
-#                             st.session_state['x0']))
-x_plot = np.linspace(0, 5 * max(x), 50)
-fig = px.scatter(x=x, y=y)
-fig.add_trace(
-    go.Scatter(
-        x=x_plot,
-        y=s_curve(
-            x_plot / 10**power,
-            st.session_state["K"],
-            st.session_state["b"],
-            st.session_state["a"],
-            st.session_state["x0"],
-        ),
-        line=dict(color="red"),
-        name="Modified",
-    ),
-)
-fig.add_trace(
-    go.Scatter(
-        x=x_plot,
-        y=s_curve(
-            x_plot / 10**power,
-            rcs[selected_channel_name]["K"],
-            rcs[selected_channel_name]["b"],
-            rcs[selected_channel_name]["a"],
-            rcs[selected_channel_name]["x0"],
-        ),
-        line=dict(color="rgba(0, 255, 0, 0.4)"),
-        name="Actual",
-    ),
-)
-fig.update_layout(title_text="Response Curve", showlegend=True)
-fig.update_annotations(font_size=10)
-fig.update_xaxes(title="Spends")
-fig.update_yaxes(title="Revenue")
-st.plotly_chart(fig, use_container_width=True)
-r2 = r2_score(
-    y,
-    s_curve(
-        x / 10**power,
-        st.session_state["K"],
-        st.session_state["b"],
-        st.session_state["a"],
-        st.session_state["x0"],
-    ),
-)
-r2_actual = r2_score(
-    y,
-    s_curve(
-        x / 10**power,
-        rcs[selected_channel_name]["K"],
-        rcs[selected_channel_name]["b"],
-        rcs[selected_channel_name]["a"],
-        rcs[selected_channel_name]["x0"],
-    ),
-)
-columns = st.columns((1, 1, 2))
-with columns[0]:
-    st.metric("R2 Modified", round(r2, 2))
-with columns[1]:
-    st.metric("R2 Actual", round(r2_actual, 2))
-st.markdown("#### Set Parameters", unsafe_allow_html=True)
-columns = st.columns(4)
-if "updated_parms" not in st.session_state:
-    st.session_state["updated_parms"] = {
-        "K_updated": 0,
-        "b_updated": 0,
-        "a_updated": 0,
-        "x0_updated": 0,
-    }
-with columns[0]:
-    st.session_state["updated_parms"]["K_updated"] = st.number_input(
-        "K", key="K", format="%0.5f"
-    )
-with columns[1]:
-    st.session_state["updated_parms"]["b_updated"] = st.number_input(
-        "b", key="b", format="%0.5f"
-    )
-with columns[2]:
-    st.session_state["updated_parms"]["a_updated"] = st.number_input(
-        "a", key="a", step=0.0001, format="%0.5f"
-    )
-with columns[3]:
-    st.session_state["updated_parms"]["x0_updated"] = st.number_input(
-        "x0", key="x0", format="%0.5f"
-    )
-# st.session_state["project_dct"]["scenario_planner"]["K_number_input"] = (
-#     st.session_state["updated_parms"]["K_updated"]
-# )
-# st.session_state["project_dct"]["scenario_planner"]["b_number_input"] = (
-#     st.session_state["updated_parms"]["b_updated"]
-# )
-# st.session_state["project_dct"]["scenario_planner"]["a_number_input"] = (
-#     st.session_state["updated_parms"]["a_updated"]
-# )
-# st.session_state["project_dct"]["scenario_planner"]["x0_number_input"] = (
-#     st.session_state["updated_parms"]["x0_updated"]
-# )
-update_col, reset_col = st.columns([1, 1])
-if update_col.button(
-    "Update Parameters",
-    on_click=update_response_curve,
-    args=(
-        st.session_state["updated_parms"]["K_updated"],
-        st.session_state["updated_parms"]["b_updated"],
-        st.session_state["updated_parms"]["a_updated"],
-        st.session_state["updated_parms"]["x0_updated"],
-        metrics_selected,
-        panel_selected,
-        selected_channel_name,
-    ),
-    use_container_width=True,
-):
-    st.session_state["rcs"][selected_channel_name]["K"] = st.session_state[
-        "updated_parms"
-    ]["K_updated"]
-    st.session_state["rcs"][selected_channel_name]["b"] = st.session_state[
-        "updated_parms"
-    ]["b_updated"]
-    st.session_state["rcs"][selected_channel_name]["a"] = st.session_state[
-        "updated_parms"
-    ]["a_updated"]
-    st.session_state["rcs"][selected_channel_name]["x0"] = st.session_state[
-        "updated_parms"
-    ]["x0_updated"]
-reset_col.button(
-    "Reset Parameters",
-    on_click=reset_curve_parameters,
-    args=(metrics_selected, panel_selected, selected_channel_name),
-    use_container_width=True,
-)
-st.divider()
-save_col, down_col = st.columns([1, 1])
-with save_col:
-    file_name = st.text_input(
-        "rcs download file name",
-        key="file_name_input",
-        placeholder="File name",
-        label_visibility="collapsed",
-    )
-    down_col.download_button(
-        label="Download response curves",
-        data=json.dumps(rcs),
-        file_name=f"{file_name}.json",
-        mime="application/json",
-        disabled=len(file_name) == 0,
-        use_container_width=True,
-    )
-def s_curve_derivative(x, K, b, a, x0):
-    # Derivative of the S-curve function
-    return (
-        a
-        * b
-        * K
-        * np.exp(-a * (x - x0))
-        / ((1 + b * np.exp(-a * (x - x0))) ** 2)
-    )
-# Parameters of the S-curve
-K = st.session_state["K"]
-b = st.session_state["b"]
-a = st.session_state["a"]
-x0 = st.session_state["x0"]
-# # Optimized spend value obtained from the tool
-# optimized_spend = st.number_input(
-#     "value of x"
-# )  # Replace this with your optimized spend value
-# # Calculate the slope at the optimized spend value
-# slope_at_optimized_spend = s_curve_derivative(optimized_spend, K, b, a, x0)
-# st.write("Slope ", slope_at_optimized_spend)
-# Initialize a list to hold our rows
-rows = []
-# Iterate over the dictionary
-for key, value in st.session_state["update_rcs"].items():
-    # Split the key into its components
-    metrics, panel, channel_name = key.split("#@")
-    # Create a new row with the components and the values
-    row = {
-        "Metrics": name_formating(metrics),
-        "Panel": name_formating(panel),
-        "Channel Name": channel_name,
-        "K": value["K"],
-        "b": value["b"],
-        "a": value["a"],
-        "x0": value["x0"],
-    }
-    # Append the row to our list
-    rows.append(row)
-# Convert the list of rows into a DataFrame
-updated_parms_df = pd.DataFrame(rows)
-if len(list(st.session_state["update_rcs"].keys())) > 0:
-    st.markdown("#### Updated Parameters", unsafe_allow_html=True)
-    st.dataframe(updated_parms_df, hide_index=True)
-else:
-    st.info("No parameters are updated")
-update_db("8_Build_Response_Curves.py")

pages/9_Scenario_Planner.py DELETED Viewed

@@ -1,1712 +0,0 @@
-import streamlit as st
-from numerize.numerize import numerize
-import numpy as np
-from functools import partial
-from collections import OrderedDict
-from plotly.subplots import make_subplots
-import plotly.graph_objects as go
-from utilities import (
-    format_numbers,
-    load_local_css,
-    set_header,
-    initialize_data,
-    load_authenticator,
-    send_email,
-    channel_name_formating,
-)
-from classes import class_from_dict, class_to_dict
-import pickle
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-import re
-import pandas as pd
-import plotly.express as px
-import logging
-from utilities import update_db
-import sqlite3
-st.set_page_config(layout="wide")
-load_local_css("styles.css")
-set_header()
-for k, v in st.session_state.items():
-    if k not in ["logout", "login", "config"] and not k.startswith(
-        "FormSubmitter"
-    ):
-        st.session_state[k] = v
-# ======================================================== #
-# ======================= Functions ====================== #
-# ======================================================== #
-def optimize(key, status_placeholder):
-    """
-    Optimize the spends for the sales
-    """
-    channel_list = [
-        key
-        for key, value in st.session_state["optimization_channels"].items()
-        if value
-    ]
-    if len(channel_list) > 0:
-        scenario = st.session_state["scenario"]
-        if key.lower() == "media spends":
-            with status_placeholder:
-                with st.spinner("Optimizing"):
-                    result = st.session_state["scenario"].optimize(
-                        st.session_state["total_spends_change"], channel_list
-                    )
-        # elif key.lower() == "revenue":
-        else:
-            with status_placeholder:
-                with st.spinner("Optimizing"):
-                    result = st.session_state["scenario"].optimize_spends(
-                        st.session_state["total_sales_change"], channel_list
-                    )
-        for channel_name, modified_spends in result:
-            st.session_state[channel_name] = numerize(
-                modified_spends
-                * scenario.channels[channel_name].conversion_rate,
-                1,
-            )
-            prev_spends = (
-                st.session_state["scenario"]
-                .channels[channel_name]
-                .actual_total_spends
-            )
-            st.session_state[f"{channel_name}_change"] = round(
-                100 * (modified_spends - prev_spends) / prev_spends, 2
-            )
-def save_scenario(scenario_name):
-    """
-    Save the current scenario with the mentioned name in the session state
-    Parameters
-    ----------
-    scenario_name
-        Name of the scenario to be saved
-    """
-    if "saved_scenarios" not in st.session_state:
-        st.session_state = OrderedDict()
-    # st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
-    st.session_state["saved_scenarios"][scenario_name] = class_to_dict(
-        st.session_state["scenario"]
-    )
-    st.session_state["scenario_input"] = ""
-    # print(type(st.session_state['saved_scenarios']))
-    with open("../saved_scenarios.pkl", "wb") as f:
-        pickle.dump(st.session_state["saved_scenarios"], f)
-def update_sales_abs_slider():
-    actual_sales = st.session_state["scenario"].actual_total_sales
-    if validate_input(st.session_state["total_sales_change_abs_slider"]):
-        modified_sales = extract_number_for_string(
-            st.session_state["total_sales_change_abs_slider"]
-        )
-        st.session_state["total_sales_change"] = round(
-            ((modified_sales / actual_sales) - 1) * 100
-        )
-        st.session_state["total_sales_change_abs"] = numerize(
-            modified_sales, 1
-        )
-        st.session_state["project_dct"]["scenario_planner"][
-            "total_sales_change"
-        ] = st.session_state.total_sales_change
-def update_sales_abs():
-    actual_sales = st.session_state["scenario"].actual_total_sales
-    if validate_input(st.session_state["total_sales_change_abs"]):
-        modified_sales = extract_number_for_string(
-            st.session_state["total_sales_change_abs"]
-        )
-        st.session_state["total_sales_change"] = round(
-            ((modified_sales / actual_sales) - 1) * 100
-        )
-        st.session_state["total_sales_change_abs_slider"] = numerize(
-            modified_sales, 1
-        )
-def update_sales():
-    # print("DEBUG: running update_sales")
-    # st.session_state["project_dct"]["scenario_planner"][
-    #     "total_sales_change"
-    # ] = st.session_state.total_sales_change
-    # st.session_state["total_spends_change"] = st.session_state[
-    #     "total_sales_change"
-    # ]
-    st.session_state["total_sales_change_abs"] = numerize(
-        (1 + st.session_state["total_sales_change"] / 100)
-        * st.session_state["scenario"].actual_total_sales,
-        1,
-    )
-    st.session_state["total_sales_change_abs_slider"] = numerize(
-        (1 + st.session_state["total_sales_change"] / 100)
-        * st.session_state["scenario"].actual_total_sales,
-        1,
-    )
-    # update_spends()
-def update_all_spends_abs_slider():
-    actual_spends = st.session_state["scenario"].actual_total_spends
-    if validate_input(st.session_state["total_spends_change_abs_slider"]):
-        modified_spends = extract_number_for_string(
-            st.session_state["total_spends_change_abs_slider"]
-        )
-        st.session_state["total_spends_change"] = round(
-            ((modified_spends / actual_spends) - 1) * 100
-        )
-        st.session_state["total_spends_change_abs"] = numerize(
-            modified_spends, 1
-        )
-        st.session_state["project_dct"]["scenario_planner"][
-            "total_spends_change"
-        ] = st.session_state.total_spends_change
-        update_all_spends()
-# def update_all_spends_abs_slider():
-#     actual_spends = _scenario.actual_total_spends
-#     if validate_input(st.session_state["total_spends_change_abs_slider"]):
-#         print("#" * 100)
-#         print(st.session_state["total_spends_change_abs_slider"])
-#         print("#" * 100)
-#         modified_spends = extract_number_for_string(
-#             st.session_state["total_spends_change_abs_slider"]
-#         )
-#         st.session_state["total_spends_change"] = (
-#             (modified_spends / actual_spends) - 1
-#         ) * 100
-#         st.session_state["total_spends_change_abs"] = st.session_state[
-#             "total_spends_change_abs_slider"
-#         ]
-#         update_all_spends()
-def update_all_spends_abs():
-    print("DEBUG: ", "inside update_all_spends_abs")
-    # print(st.session_state["total_spends_change_abs_slider_options"])
-    actual_spends = st.session_state["scenario"].actual_total_spends
-    if validate_input(st.session_state["total_spends_change_abs"]):
-        modified_spends = extract_number_for_string(
-            st.session_state["total_spends_change_abs"]
-        )
-        st.session_state["total_spends_change"] = (
-            (modified_spends / actual_spends) - 1
-        ) * 100
-        st.session_state["total_spends_change_abs_slider"] = numerize(
-            extract_number_for_string(
-                st.session_state["total_spends_change_abs"]
-            ),
-            1,
-        )
-        st.session_state["project_dct"]["scenario_planner"][
-            "total_spends_change"
-        ] = st.session_state.total_spends_change
-        # print(
-        #     "DEBUG UPDATE_ALL_SPENDS_ABS: ",
-        #     st.session_state["total_spends_change"],
-        # )
-        update_all_spends()
-def update_spends():
-    print("update_spends")
-    st.session_state["total_spends_change_abs"] = numerize(
-        (1 + st.session_state["total_spends_change"] / 100)
-        * st.session_state["scenario"].actual_total_spends,
-        1,
-    )
-    st.session_state["total_spends_change_abs_slider"] = numerize(
-        (1 + st.session_state["total_spends_change"] / 100)
-        * st.session_state["scenario"].actual_total_spends,
-        1,
-    )
-    st.session_state["project_dct"]["scenario_planner"][
-        "total_spends_change"
-    ] = st.session_state.total_spends_change
-    update_all_spends()
-def update_all_spends():
-    """
-    Updates spends for all the channels with the given overall spends change
-    """
-    percent_change = st.session_state["total_spends_change"]
-    print("runs update_all")
-    for channel_name in list(
-        st.session_state["project_dct"]["scenario_planner"][
-            unique_key
-        ].channels.keys()
-    ):
-        st.session_state[f"{channel_name}_percent"] = percent_change
-        channel = st.session_state["scenario"].channels[channel_name]
-        current_spends = channel.actual_total_spends
-        modified_spends = (1 + percent_change / 100) * current_spends
-        st.session_state["scenario"].update(channel_name, modified_spends)
-        st.session_state[channel_name] = numerize(
-            modified_spends * channel.conversion_rate, 1
-        )
-        st.session_state[f"{channel_name}_change"] = percent_change
-def extract_number_for_string(string_input):
-    string_input = string_input.upper()
-    if string_input.endswith("K"):
-        return float(string_input[:-1]) * 10**3
-    elif string_input.endswith("M"):
-        return float(string_input[:-1]) * 10**6
-    elif string_input.endswith("B"):
-        return float(string_input[:-1]) * 10**9
-def validate_input(string_input):
-    pattern = r"\d+\.?\d*[K|M|B]$"
-    match = re.match(pattern, string_input)
-    if match is None:
-        return False
-    return True
-def update_data_by_percent(channel_name):
-    prev_spends = (
-        st.session_state["scenario"].channels[channel_name].actual_total_spends
-        * st.session_state["scenario"].channels[channel_name].conversion_rate
-    )
-    modified_spends = prev_spends * (
-        1 + st.session_state[f"{channel_name}_percent"] / 100
-    )
-    st.session_state[channel_name] = numerize(modified_spends, 1)
-    st.session_state["scenario"].update(
-        channel_name,
-        modified_spends
-        / st.session_state["scenario"].channels[channel_name].conversion_rate,
-    )
-def update_data(channel_name):
-    """
-    Updates the spends for the given channel
-    """
-    print("tuns update_Data")
-    if validate_input(st.session_state[channel_name]):
-        modified_spends = extract_number_for_string(
-            st.session_state[channel_name]
-        )
-        prev_spends = (
-            st.session_state["scenario"]
-            .channels[channel_name]
-            .actual_total_spends
-            * st.session_state["scenario"]
-            .channels[channel_name]
-            .conversion_rate
-        )
-        st.session_state[f"{channel_name}_percent"] = round(
-            100 * (modified_spends - prev_spends) / prev_spends, 2
-        )
-        st.session_state["scenario"].update(
-            channel_name,
-            modified_spends
-            / st.session_state["scenario"]
-            .channels[channel_name]
-            .conversion_rate,
-        )
-    # st.session_state['scenario'].update(channel_name, modified_spends)
-    # else:
-    #     try:
-    #         modified_spends = float(st.session_state[channel_name])
-    #         prev_spends = st.session_state['scenario'].channels[channel_name].actual_total_spends * st.session_state['scenario'].channels[channel_name].conversion_rate
-    #         st.session_state[f'{channel_name}_change'] = round(100*(modified_spends - prev_spends) / prev_spends,2)
-    #         st.session_state['scenario'].update(channel_name, modified_spends/st.session_state['scenario'].channels[channel_name].conversion_rate)
-    #         st.session_state[f'{channel_name}'] = numerize(modified_spends,1)
-    #     except ValueError:
-    #         st.write('Invalid input')
-def select_channel_for_optimization(channel_name):
-    """
-    Marks the given channel for optimization
-    """
-    st.session_state["optimization_channels"][channel_name] = st.session_state[
-        f"{channel_name}_selected"
-    ]
-def select_all_channels_for_optimization():
-    """
-    Marks all the channel for optimization
-    """
-    # print(
-    #     "DEBUG: select_all_channels_for_opt",
-    #     st.session_state["optimze_all_channels"],
-    # )
-    for channel_name in st.session_state["optimization_channels"].keys():
-        st.session_state[f"{channel_name}_selected"] = st.session_state[
-            "optimze_all_channels"
-        ]
-        st.session_state["optimization_channels"][channel_name] = (
-            st.session_state["optimze_all_channels"]
-        )
-    from pprint import pprint
-def update_penalty():
-    """
-    Updates the penalty flag for sales calculation
-    """
-    st.session_state["scenario"].update_penalty(
-        st.session_state["apply_penalty"]
-    )
-def reset_optimization():
-    print("DEBUG: ", "Running reset_optimization")
-    for channel_name in list(
-        st.session_state["project_dct"]["scenario_planner"][
-            unique_key
-        ].channels.keys()
-    ):
-        st.session_state[f"{channel_name}_selected"] = False
-        # st.session_state[f"{channel_name}_change"] = 0
-    st.session_state["optimze_all_channels"] = False
-    st.session_state["initialized"] = False
-    del st.session_state["total_sales_change_abs_slider"]
-    del st.session_state["total_sales_change_abs"]
-    del st.session_state["total_sales_change"]
-def reset_scenario():
-    print("[DEBUG]: reset_scenario")
-    # def reset_scenario(panel_selected, file_selected, updated_rcs):
-    # #print(st.session_state['default_scenario_dict'])
-    # st.session_state['scenario']  = class_from_dict(st.session_state['default_scenario_dict'])
-    # for channel in st.session_state['scenario'].channels.values():
-    #     st.session_state[channel.name] = float(channel.actual_total_spends * channel.conversion_rate)
-    for channel_name in list(
-        st.session_state["project_dct"]["scenario_planner"][
-            unique_key
-        ].channels.keys()
-    ):
-        st.session_state[f"{channel_name}_selected"] = False
-        # st.session_state[f"{channel_name}_change"] = 0
-    st.session_state["optimze_all_channels"] = False
-    st.session_state["initialized"] = False
-    del st.session_state["optimization_channels"]
-    panel_selected = st.session_state.get("panel_selected", 0)
-    file_selected = st.session_state["file_selected"]
-    update_rcs = st.session_state.get("update_rcs", None)
-    # print(f"## [DEBUG] [SCENARIO PLANNER][RESET SCENARIO]: {}")
-    del st.session_state["project_dct"]["scenario_planner"][
-        f"{st.session_state['metric_selected']}-{st.session_state['panel_selected']}"
-    ]
-    del st.session_state["total_sales_change_abs_slider"]
-    del st.session_state["total_sales_change_abs"]
-    del st.session_state["total_sales_change"]
-    # if panel_selected == "Aggregated":
-    #     initialize_data(
-    #         panel=panel_selected,
-    #         target_file=file_selected,
-    #         updated_rcs=updated_rcs,
-    #         metrics=metrics_selected,
-    #     )
-    #     panel = None
-    # else:
-    #     initialize_data(
-    #         panel=panel_selected,
-    #         target_file=file_selected,
-    #         updated_rcs=updated_rcs,
-    #         metrics=metrics_selected,
-    #     )
-    # st.session_state["total_spends_change"] = 0
-    # update_all_spends()
-def format_number(num):
-    if num >= 1_000_000:
-        return f"{num / 1_000_000:.2f}M"
-    elif num >= 1_000:
-        return f"{num / 1_000:.0f}K"
-    else:
-        return f"{num:.2f}"
-def summary_plot(data, x, y, title, text_column):
-    fig = px.bar(
-        data,
-        x=x,
-        y=y,
-        orientation="h",
-        title=title,
-        text=text_column,
-        color="Channel_name",
-    )
-    # Convert text_column to numeric values
-    data[text_column] = pd.to_numeric(data[text_column], errors="coerce")
-    # Update the format of the displayed text based on magnitude
-    fig.update_traces(
-        texttemplate="%{text:.2s}",
-        textposition="outside",
-        hovertemplate="%{x:.2s}",
-    )
-    fig.update_layout(
-        xaxis_title=x, yaxis_title="Channel Name", showlegend=False
-    )
-    return fig
-def s_curve(x, K, b, a, x0):
-    return K / (1 + b * np.exp(-a * (x - x0)))
-def find_segment_value(x, roi, mroi):
-    start_value = x[0]
-    end_value = x[len(x) - 1]
-    # Condition for green region: Both MROI and ROI > 1
-    green_condition = (roi > 1) & (mroi > 1)
-    left_indices = np.where(green_condition)[0]
-    left_value = x[left_indices[0]] if left_indices.size > 0 else x[0]
-    right_indices = np.where(green_condition)[0]
-    right_value = x[right_indices[-1]] if right_indices.size > 0 else x[0]
-    return start_value, end_value, left_value, right_value
-def calculate_rgba(
-    start_value, end_value, left_value, right_value, current_channel_spends
-):
-    # Initialize alpha to None for clarity
-    alpha = None
-    # Determine the color and calculate relative_position and alpha based on the point's position
-    if start_value <= current_channel_spends <= left_value:
-        color = "yellow"
-        relative_position = (current_channel_spends - start_value) / (
-            left_value - start_value
-        )
-        alpha = 0.8 - (
-            0.6 * relative_position
-        )  # Alpha decreases from start to end
-    elif left_value < current_channel_spends <= right_value:
-        color = "green"
-        relative_position = (current_channel_spends - left_value) / (
-            right_value - left_value
-        )
-        alpha = 0.8 - (
-            0.6 * relative_position
-        )  # Alpha decreases from start to end
-    elif right_value < current_channel_spends <= end_value:
-        color = "red"
-        relative_position = (current_channel_spends - right_value) / (
-            end_value - right_value
-        )
-        alpha = 0.2 + (
-            0.6 * relative_position
-        )  # Alpha increases from start to end
-    else:
-        # Default case, if the spends are outside the defined ranges
-        return "rgba(136, 136, 136, 0.5)"  # Grey for values outside the range
-    # Ensure alpha is within the intended range in case of any calculation overshoot
-    alpha = max(0.2, min(alpha, 0.8))
-    # Define color codes for RGBA
-    color_codes = {
-        "yellow": "255, 255, 0",  # RGB for yellow
-        "green": "0, 128, 0",  # RGB for green
-        "red": "255, 0, 0",  # RGB for red
-    }
-    rgba = f"rgba({color_codes[color]}, {alpha})"
-    return rgba
-def debug_temp(x_test, power, K, b, a, x0):
-    print("*" * 100)
-    # Calculate the count of bins
-    count_lower_bin = sum(1 for x in x_test if x <= 2524)
-    count_center_bin = sum(1 for x in x_test if x > 2524 and x <= 3377)
-    count_ = sum(1 for x in x_test if x > 3377)
-    print(
-        f"""
-            lower : {count_lower_bin}
-            center : {count_center_bin}
-            upper : {count_}
-          """
-    )
-# @st.cache
-def plot_response_curves():
-    cols = 4
-    rows = (
-        len(channels_list) // cols
-        if len(channels_list) % cols == 0
-        else len(channels_list) // cols + 1
-    )
-    rcs = st.session_state["rcs"]
-    shapes = []
-    fig = make_subplots(rows=rows, cols=cols, subplot_titles=channels_list)
-    for i in range(0, len(channels_list)):
-        col = channels_list[i]
-        x_actual = st.session_state["scenario"].channels[col].actual_spends
-        # x_modified = st.session_state["scenario"].channels[col].modified_spends
-        power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
-        K = rcs[col]["K"]
-        b = rcs[col]["b"]
-        a = rcs[col]["a"]
-        x0 = rcs[col]["x0"]
-        x_plot = np.linspace(0, 5 * x_actual.sum(), 50)
-        x, y, marginal_roi = [], [], []
-        for x_p in x_plot:
-            x.append(x_p * x_actual / x_actual.sum())
-        for index in range(len(x_plot)):
-            y.append(s_curve(x[index] / 10**power, K, b, a, x0))
-        for index in range(len(x_plot)):
-            marginal_roi.append(
-                a
-                * y[index]
-                * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
-            )
-        x = (
-            np.sum(x, axis=1)
-            * st.session_state["scenario"].channels[col].conversion_rate
-        )
-        y = np.sum(y, axis=1)
-        marginal_roi = (
-            np.average(marginal_roi, axis=1)
-            / st.session_state["scenario"].channels[col].conversion_rate
-        )
-        roi = y / np.maximum(x, np.finfo(float).eps)
-        fig.add_trace(
-            go.Scatter(
-                x=x,
-                y=y,
-                name=col,
-                customdata=np.stack((roi, marginal_roi), axis=-1),
-                hovertemplate="Spend:%{x:$.2s}<br>Sale:%{y:$.2s}<br>ROI:%{customdata[0]:.3f}<br>MROI:%{customdata[1]:.3f}",
-                line=dict(color="blue"),
-            ),
-            row=1 + (i) // cols,
-            col=i % cols + 1,
-        )
-        x_optimal = (
-            st.session_state["scenario"].channels[col].modified_total_spends
-            * st.session_state["scenario"].channels[col].conversion_rate
-        )
-        y_optimal = (
-            st.session_state["scenario"].channels[col].modified_total_sales
-        )
-        # if col == "Paid_social_others":
-        #     debug_temp(x_optimal * x_actual / x_actual.sum(), power, K, b, a, x0)
-        fig.add_trace(
-            go.Scatter(
-                x=[x_optimal],
-                y=[y_optimal],
-                name=col,
-                legendgroup=col,
-                showlegend=False,
-                marker=dict(color=["black"]),
-            ),
-            row=1 + (i) // cols,
-            col=i % cols + 1,
-        )
-        shapes.append(
-            go.layout.Shape(
-                type="line",
-                x0=0,
-                y0=y_optimal,
-                x1=x_optimal,
-                y1=y_optimal,
-                line_width=1,
-                line_dash="dash",
-                line_color="black",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-        shapes.append(
-            go.layout.Shape(
-                type="line",
-                x0=x_optimal,
-                y0=0,
-                x1=x_optimal,
-                y1=y_optimal,
-                line_width=1,
-                line_dash="dash",
-                line_color="black",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-        start_value, end_value, left_value, right_value = find_segment_value(
-            x,
-            roi,
-            marginal_roi,
-        )
-        # Adding background colors
-        y_max = y.max() * 1.3  # 30% extra space above the max
-        # Yellow region
-        shapes.append(
-            go.layout.Shape(
-                type="rect",
-                x0=start_value,
-                y0=0,
-                x1=left_value,
-                y1=y_max,
-                line=dict(width=0),
-                fillcolor="rgba(255, 255, 0, 0.3)",
-                layer="below",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-        # Green region
-        shapes.append(
-            go.layout.Shape(
-                type="rect",
-                x0=left_value,
-                y0=0,
-                x1=right_value,
-                y1=y_max,
-                line=dict(width=0),
-                fillcolor="rgba(0, 255, 0, 0.3)",
-                layer="below",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-        # Red region
-        shapes.append(
-            go.layout.Shape(
-                type="rect",
-                x0=right_value,
-                y0=0,
-                x1=end_value,
-                y1=y_max,
-                line=dict(width=0),
-                fillcolor="rgba(255, 0, 0, 0.3)",
-                layer="below",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-    fig.update_layout(
-        # height=1000,
-        # width=1000,
-        title_text=f"Response Curves (X: Spends Vs Y: {target})",
-        showlegend=False,
-        shapes=shapes,
-    )
-    fig.update_annotations(font_size=10)
-    # fig.update_xaxes(title="Spends")
-    # fig.update_yaxes(title=target)
-    fig.update_yaxes(
-        gridcolor="rgba(136, 136, 136, 0.5)", gridwidth=0.5, griddash="dash"
-    )
-    return fig
-# ======================================================== #
-# ==================== HTML Components =================== #
-# ======================================================== #
-def generate_spending_header(heading):
-    return st.markdown(
-        f"""<h2 class="spends-header">{heading}</h2>""", unsafe_allow_html=True
-    )
-def save_checkpoint():
-    project_dct_path = os.path.join(
-        st.session_state["project_path"], "project_dct.pkl"
-    )
-    try:
-        pickle.dumps(st.session_state["project_dct"])
-        with open(project_dct_path, "wb") as f:
-            pickle.dump(st.session_state["project_dct"], f)
-    except Exception:
-        # with warning_placeholder:
-        st.toast("Unknown Issue, please reload the page.")
-def reset_checkpoint():
-    st.session_state["project_dct"]["scenario_planner"] = {}
-    save_checkpoint()
-# ======================================================== #
-# =================== Session variables ================== #
-# ======================================================== #
-with open("config.yaml") as file:
-    config = yaml.load(file, Loader=SafeLoader)
-    st.session_state["config"] = config
-authenticator = stauth.Authenticate(
-    config["credentials"],
-    config["cookie"]["name"],
-    config["cookie"]["key"],
-    config["cookie"]["expiry_days"],
-    config["preauthorized"],
-)
-st.session_state["authenticator"] = authenticator
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-import os
-import glob
-def get_excel_names(directory):
-    # Create a list to hold the final parts of the filenames
-    last_portions = []
-    # Patterns to match Excel files (.xlsx and .xls) that contain @#
-    patterns = [
-        os.path.join(directory, "*@#*.xlsx"),
-        os.path.join(directory, "*@#*.xls"),
-    ]
-    # Process each pattern
-    for pattern in patterns:
-        files = glob.glob(pattern)
-        # Extracting the last portion after @# for each file
-        for file in files:
-            base_name = os.path.basename(file)
-            last_portion = base_name.split("@#")[-1]
-            last_portion = last_portion.replace(".xlsx", "").replace(
-                ".xls", ""
-            )  # Removing extensions
-            last_portions.append(last_portion)
-    return last_portions
-def name_formating(channel_name):
-    # Replace underscores with spaces
-    name_mod = channel_name.replace("_", " ")
-    # Capitalize the first letter of each word
-    name_mod = name_mod.title()
-    return name_mod
-@st.cache_resource(show_spinner=False)
-def panel_fetch(file_selected):
-    raw_data_mmm_df = pd.read_excel(file_selected, sheet_name="RAW DATA MMM")
-    if "Panel" in raw_data_mmm_df.columns:
-        panel = list(set(raw_data_mmm_df["Panel"]))
-    else:
-        raw_data_mmm_df = None
-        panel = None
-    return panel
-if auth_status is True:
-    authenticator.logout("Logout", "main")
-    if "project_dct" not in st.session_state:
-        st.error("Please load a project from home")
-        st.stop()
-    database_file = r"DB\User.db"
-    conn = sqlite3.connect(
-        database_file, check_same_thread=False
-    )  # connection with sql db
-    c = conn.cursor()
-    with st.sidebar:
-        st.button("Save checkpoint", on_click=save_checkpoint)
-        st.button("Reset Checkpoint", on_click=reset_checkpoint)
-    warning_placeholder = st.empty()
-    st.header("Scenario Planner")
-    # st.subheader("Simulation")
-    col1, col2 = st.columns([1, 1])
-    # Get metric and panel from last saved state
-    if "last_saved_metric" not in st.session_state:
-        st.session_state["last_saved_metric"] = st.session_state[
-            "project_dct"
-        ]["scenario_planner"].get("metric_selected", 0)
-        # st.session_state["last_saved_metric"] = st.session_state[
-        #     "project_dct"
-        # ]["scenario_planner"].get("metric_selected", 0)
-    if "last_saved_panel" not in st.session_state:
-        st.session_state["last_saved_panel"] = st.session_state["project_dct"][
-            "scenario_planner"
-        ].get("panel_selected", 0)
-        # st.session_state["last_saved_panel"] = st.session_state["project_dct"][
-        #     "scenario_planner"
-        # ].get("panel_selected", 0)
-    # Response Metrics
-    directory = "metrics_level_data"
-    metrics_list = get_excel_names(directory)
-    metrics_selected = col1.selectbox(
-        "Response Metrics",
-        metrics_list,
-        format_func=name_formating,
-        index=st.session_state["last_saved_metric"],
-        on_change=reset_optimization,
-        key="metric_selected",
-    )
-    # Target
-    target = name_formating(metrics_selected)
-    file_selected = f"./metrics_level_data/Overview_data_test_panel@#{metrics_selected}.xlsx"
-    # print(f"[DEBUG]: {metrics_selected}")
-    # print(f"[DEBUG]: {file_selected}")
-    st.session_state["file_selected"] = file_selected
-    # Panel List
-    panel_list = panel_fetch(file_selected)
-    panel_list_final = ["Aggregated"] + panel_list
-    # Panel Selected
-    panel_selected = col2.selectbox(
-        "Panel",
-        panel_list_final,
-        on_change=reset_optimization,
-        key="panel_selected",
-        index=st.session_state["last_saved_panel"],
-    )
-    unique_key = f"{st.session_state['metric_selected']}-{st.session_state['panel_selected']}"
-    if "update_rcs" in st.session_state:
-        updated_rcs = st.session_state["update_rcs"]
-    else:
-        updated_rcs = None
-    if unique_key not in st.session_state["project_dct"]["scenario_planner"]:
-        if panel_selected == "Aggregated":
-            initialize_data(
-                panel=panel_selected,
-                target_file=file_selected,
-                updated_rcs=updated_rcs,
-                metrics=metrics_selected,
-            )
-            panel = None
-        else:
-            initialize_data(
-                panel=panel_selected,
-                target_file=file_selected,
-                updated_rcs=updated_rcs,
-                metrics=metrics_selected,
-            )
-        st.session_state["project_dct"]["scenario_planner"][unique_key] = (
-            st.session_state["scenario"]
-        )
-    else:
-        st.session_state["scenario"] = st.session_state["project_dct"][
-            "scenario_planner"
-        ][unique_key]
-        st.session_state["rcs"] = {}
-        st.session_state["powers"] = {}
-        if "optimization_channels" not in st.session_state:
-            st.session_state["optimization_channels"] = {}
-        for channel_name, _channel in st.session_state["project_dct"][
-            "scenario_planner"
-        ][unique_key].channels.items():
-            st.session_state[channel_name] = numerize(
-                _channel.modified_total_spends, 1
-            )
-            st.session_state["rcs"][
-                channel_name
-            ] = _channel.response_curve_params
-            st.session_state["powers"][channel_name] = _channel.power
-            if channel_name not in st.session_state["optimization_channels"]:
-                st.session_state["optimization_channels"][channel_name] = False
-    if "first_time" not in st.session_state:
-        st.session_state["first_time"] = True
-        st.session_state["first_run_scenario"] = True
-    # Check if state is initiaized
-    is_state_initiaized = st.session_state.get("initialized", False)
-    # if not is_state_initiaized:
-    #     print("running initialize...")
-    #     # initialize_data()
-    #     if panel_selected == "Aggregated":
-    #         initialize_data(
-    #             panel=panel_selected,
-    #             target_file=file_selected,
-    #             updated_rcs=updated_rcs,
-    #             metrics=metrics_selected,
-    #         )
-    #         panel = None
-    #     else:
-    #         initialize_data(
-    #             panel=panel_selected,
-    #             target_file=file_selected,
-    #             updated_rcs=updated_rcs,
-    #             metrics=metrics_selected,
-    #         )
-    #     st.session_state["initialized"] = True
-    #     st.session_state["first_time"] = False
-    # Channels List
-    channels_list = list(
-        st.session_state["project_dct"]["scenario_planner"][
-            unique_key
-        ].channels.keys()
-    )
-    # ======================================================== #
-    # ========================== UI ========================== #
-    # ======================================================== #
-    main_header = st.columns((2, 2))
-    sub_header = st.columns((1, 1, 1, 1))
-    # _scenario = st.session_state["scenario"]
-    st.session_state.total_spends_change = round(
-        (
-            st.session_state["scenario"].modified_total_spends
-            / st.session_state["scenario"].actual_total_spends
-            - 1
-        )
-        * 100
-    )
-    if "total_sales_change" not in st.session_state:
-        st.session_state.total_sales_change = round(
-            (
-                st.session_state["scenario"].modified_total_sales
-                / st.session_state["scenario"].actual_total_sales
-                - 1
-            )
-            * 100
-        )
-    st.session_state["total_spends_change_abs"] = numerize(
-        st.session_state["scenario"].modified_total_spends,
-        1,
-    )
-    if "total_sales_change_abs" not in st.session_state:
-        st.session_state["total_sales_change_abs"] = numerize(
-            st.session_state["scenario"].modified_total_sales,
-            1,
-        )
-    # if "total_spends_change_abs_slider" not in st.session_state:
-    st.session_state.total_spends_change_abs_slider = numerize(
-        st.session_state["scenario"].modified_total_spends, 1
-    )
-    if "total_sales_change_abs_slider" not in st.session_state:
-        st.session_state.total_sales_change_abs_slider = numerize(
-            st.session_state["scenario"].actual_total_sales, 1
-        )
-    st.session_state["allow_sales_update"] = True
-    st.session_state["allow_spends_update"] = True
-    # if "panel_selected" not in st.session_state:
-    #     st.session_state["panel_selected"] = 0
-    with main_header[0]:
-        st.subheader("Actual")
-    with main_header[-1]:
-        st.subheader("Simulated")
-    with sub_header[0]:
-        st.metric(
-            label="Spends",
-            value=format_numbers(
-                st.session_state["scenario"].actual_total_spends
-            ),
-        )
-    with sub_header[1]:
-        st.metric(
-            label=target,
-            value=format_numbers(
-                float(st.session_state["scenario"].actual_total_sales),
-                include_indicator=False,
-            ),
-        )
-    with sub_header[2]:
-        st.metric(
-            label="Spends",
-            value=format_numbers(
-                st.session_state["scenario"].modified_total_spends
-            ),
-            delta=numerize(st.session_state["scenario"].delta_spends, 1),
-        )
-    with sub_header[3]:
-        st.metric(
-            label=target,
-            value=format_numbers(
-                float(st.session_state["scenario"].modified_total_sales),
-                include_indicator=False,
-            ),
-            delta=numerize(st.session_state["scenario"].delta_sales, 1),
-        )
-    with st.expander("Channel Spends Simulator", expanded=True):
-        _columns1 = st.columns((2, 2, 1, 1))
-        with _columns1[0]:
-            optimization_selection = st.selectbox(
-                "Optimize",
-                options=["Media Spends", target],
-                key="optimization_key_value",
-            )
-        with _columns1[1]:
-            st.markdown("#")
-            # if st.checkbox(
-            #     label="Optimize all Channels",
-            #     key="optimze_all_channels",
-            #     value=False,
-            #     # on_change=select_all_channels_for_optimization,
-            # ):
-            #     select_all_channels_for_optimization()
-            st.checkbox(
-                label="Optimize all Channels",
-                key="optimze_all_channels",
-                on_change=select_all_channels_for_optimization,
-            )
-        with _columns1[2]:
-            st.markdown("#")
-            # st.button(
-            #     "Optimize",
-            #     on_click=optimize,
-            #     args=(st.session_state["optimization_key_value"]),
-            #     use_container_width=True,
-            # )
-            optimize_placeholder = st.empty()
-        with _columns1[3]:
-            st.markdown("#")
-            st.button(
-                "Reset",
-                on_click=reset_scenario,
-                # args=(panel_selected, file_selected, updated_rcs),
-                use_container_width=True,
-            )
-        _columns2 = st.columns((2, 2, 2))
-        if st.session_state["optimization_key_value"] == "Media Spends":
-            # update_spends()
-            with _columns2[0]:
-                spend_input = st.text_input(
-                    "Absolute",
-                    key="total_spends_change_abs",
-                    # label_visibility="collapsed",
-                    on_change=update_all_spends_abs,
-                )
-            with _columns2[1]:
-                st.number_input(
-                    "Percent Change",
-                    key="total_spends_change",
-                    min_value=-50,
-                    max_value=50,
-                    step=1,
-                    on_change=update_spends,
-                )
-            with _columns2[2]:
-                scenario = st.session_state["project_dct"]["scenario_planner"][
-                    unique_key
-                ]
-                min_value = round(scenario.actual_total_spends * 0.5)
-                max_value = round(scenario.actual_total_spends * 1.5)
-                st.session_state["total_spends_change_abs_slider_options"] = [
-                    numerize(value, 1)
-                    for value in range(min_value, max_value + 1, int(1e4))
-                ]
-                st.select_slider(
-                    "Absolute Slider",
-                    options=st.session_state[
-                        "total_spends_change_abs_slider_options"
-                    ],
-                    key="total_spends_change_abs_slider",
-                    on_change=update_all_spends_abs_slider,
-                )
-        elif st.session_state["optimization_key_value"] == target:
-            # update_sales()
-            with _columns2[0]:
-                sales_input = st.text_input(
-                    "Absolute",
-                    key="total_sales_change_abs",
-                    on_change=update_sales_abs,
-                )
-            with _columns2[1]:
-                st.number_input(
-                    "Percent Change",
-                    key="total_sales_change",
-                    min_value=-50,
-                    max_value=50,
-                    step=1,
-                    on_change=update_sales,
-                )
-            with _columns2[2]:
-                min_value = round(
-                    st.session_state["scenario"].actual_total_sales * 0.5
-                )
-                max_value = round(
-                    st.session_state["scenario"].actual_total_sales * 1.5
-                )
-                st.session_state["total_sales_change_abs_slider_options"] = [
-                    numerize(value, 1)
-                    for value in range(min_value, max_value + 1, int(1e5))
-                ]
-                st.select_slider(
-                    "Absolute Slider",
-                    options=st.session_state[
-                        "total_sales_change_abs_slider_options"
-                    ],
-                    key="total_sales_change_abs_slider",
-                    on_change=update_sales_abs_slider,
-                )
-        if (
-            not st.session_state["allow_sales_update"]
-            and optimization_selection == target
-        ):
-            st.warning("Invalid Input")
-        if (
-            not st.session_state["allow_spends_update"]
-            and optimization_selection == "Media Spends"
-        ):
-            st.warning("Invalid Input")
-        status_placeholder = st.empty()
-        # if optimize_placeholder.button("Optimize", use_container_width=True):
-        #     optimize(st.session_state["optimization_key_value"], status_placeholder)
-        #     st.rerun()
-        optimize_placeholder.button(
-            "Optimize",
-            on_click=optimize,
-            args=(
-                st.session_state["optimization_key_value"],
-                status_placeholder,
-            ),
-            use_container_width=True,
-        )
-        st.markdown(
-            """<hr class="spends-heading-seperator">""", unsafe_allow_html=True
-        )
-        _columns = st.columns((2.5, 2, 1.5, 1.5, 1))
-        with _columns[0]:
-            generate_spending_header("Channel")
-        with _columns[1]:
-            generate_spending_header("Spends Input")
-        with _columns[2]:
-            generate_spending_header("Spends")
-        with _columns[3]:
-            generate_spending_header(target)
-        with _columns[4]:
-            generate_spending_header("Optimize")
-        st.markdown(
-            """<hr class="spends-heading-seperator">""", unsafe_allow_html=True
-        )
-        if "acutual_predicted" not in st.session_state:
-            st.session_state["acutual_predicted"] = {
-                "Channel_name": [],
-                "Actual_spend": [],
-                "Optimized_spend": [],
-                "Delta": [],
-            }
-        for i, channel_name in enumerate(channels_list):
-            _channel_class = st.session_state["scenario"].channels[
-                channel_name
-            ]
-            st.session_state[f"{channel_name}_percent"] = round(
-                (
-                    _channel_class.modified_total_spends
-                    / _channel_class.actual_total_spends
-                    - 1
-                )
-                * 100
-            )
-            _columns = st.columns((2.5, 1.5, 1.5, 1.5, 1))
-            with _columns[0]:
-                st.write(channel_name_formating(channel_name))
-                bin_placeholder = st.container()
-            with _columns[1]:
-                channel_bounds = _channel_class.bounds
-                channel_spends = float(_channel_class.actual_total_spends)
-                min_value = float(
-                    (1 + channel_bounds[0] / 100) * channel_spends
-                )
-                max_value = float(
-                    (1 + channel_bounds[1] / 100) * channel_spends
-                )
-                # print("##########", st.session_state[channel_name])
-                spend_input = st.text_input(
-                    channel_name,
-                    key=channel_name,
-                    label_visibility="collapsed",
-                    on_change=partial(update_data, channel_name),
-                )
-                if not validate_input(spend_input):
-                    st.error("Invalid input")
-                channel_name_current = f"{channel_name}_change"
-                st.number_input(
-                    "Percent Change",
-                    key=f"{channel_name}_percent",
-                    step=1,
-                    on_change=partial(update_data_by_percent, channel_name),
-                )
-            with _columns[2]:
-                # spends
-                current_channel_spends = float(
-                    _channel_class.modified_total_spends
-                    * _channel_class.conversion_rate
-                )
-                actual_channel_spends = float(
-                    _channel_class.actual_total_spends
-                    * _channel_class.conversion_rate
-                )
-                spends_delta = float(
-                    _channel_class.delta_spends
-                    * _channel_class.conversion_rate
-                )
-                st.session_state["acutual_predicted"]["Channel_name"].append(
-                    channel_name
-                )
-                st.session_state["acutual_predicted"]["Actual_spend"].append(
-                    actual_channel_spends
-                )
-                st.session_state["acutual_predicted"][
-                    "Optimized_spend"
-                ].append(current_channel_spends)
-                st.session_state["acutual_predicted"]["Delta"].append(
-                    spends_delta
-                )
-                ## REMOVE
-                st.metric(
-                    "Spends",
-                    format_numbers(current_channel_spends),
-                    delta=numerize(spends_delta, 1),
-                    label_visibility="collapsed",
-                )
-            with _columns[3]:
-                # sales
-                current_channel_sales = float(
-                    _channel_class.modified_total_sales
-                )
-                actual_channel_sales = float(_channel_class.actual_total_sales)
-                sales_delta = float(_channel_class.delta_sales)
-                st.metric(
-                    target,
-                    format_numbers(
-                        current_channel_sales, include_indicator=False
-                    ),
-                    delta=numerize(sales_delta, 1),
-                    label_visibility="collapsed",
-                )
-            with _columns[4]:
-                # if st.checkbox(
-                #     label="select for optimization",
-                #     key=f"{channel_name}_selected",
-                #     value=False,
-                #     # on_change=partial(select_channel_for_optimization, channel_name),
-                #     label_visibility="collapsed",
-                # ):
-                #     select_channel_for_optimization(channel_name)
-                st.checkbox(
-                    label="select for optimization",
-                    key=f"{channel_name}_selected",
-                    value=False,
-                    on_change=partial(
-                        select_channel_for_optimization, channel_name
-                    ),
-                    label_visibility="collapsed",
-                )
-            st.markdown(
-                """<hr class="spends-child-seperator">""",
-                unsafe_allow_html=True,
-            )
-            # Bins
-            col = channels_list[i]
-            x_actual = st.session_state["scenario"].channels[col].actual_spends
-            x_modified = (
-                st.session_state["scenario"].channels[col].modified_spends
-            )
-            x_total = x_modified.sum()
-            power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
-            updated_rcs_key = (
-                f"{metrics_selected}#@{panel_selected}#@{channel_name}"
-            )
-            if updated_rcs and updated_rcs_key in list(updated_rcs.keys()):
-                K = updated_rcs[updated_rcs_key]["K"]
-                b = updated_rcs[updated_rcs_key]["b"]
-                a = updated_rcs[updated_rcs_key]["a"]
-                x0 = updated_rcs[updated_rcs_key]["x0"]
-            else:
-                K = st.session_state["rcs"][col]["K"]
-                b = st.session_state["rcs"][col]["b"]
-                a = st.session_state["rcs"][col]["a"]
-                x0 = st.session_state["rcs"][col]["x0"]
-            x_plot = np.linspace(0, 5 * x_actual.sum(), 200)
-            # Append current_channel_spends to the end of x_plot
-            x_plot = np.append(x_plot, current_channel_spends)
-            x, y, marginal_roi = [], [], []
-            for x_p in x_plot:
-                x.append(x_p * x_actual / x_actual.sum())
-            for index in range(len(x_plot)):
-                y.append(s_curve(x[index] / 10**power, K, b, a, x0))
-            for index in range(len(x_plot)):
-                marginal_roi.append(
-                    a
-                    * y[index]
-                    * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
-                )
-            x = (
-                np.sum(x, axis=1)
-                * st.session_state["scenario"].channels[col].conversion_rate
-            )
-            y = np.sum(y, axis=1)
-            marginal_roi = (
-                np.average(marginal_roi, axis=1)
-                / st.session_state["scenario"].channels[col].conversion_rate
-            )
-            roi = y / np.maximum(x, np.finfo(float).eps)
-            roi_current, marginal_roi_current = roi[-1], marginal_roi[-1]
-            x, y, roi, marginal_roi = (
-                x[:-1],
-                y[:-1],
-                roi[:-1],
-                marginal_roi[:-1],
-            )  # Drop data for current spends
-            start_value, end_value, left_value, right_value = (
-                find_segment_value(
-                    x,
-                    roi,
-                    marginal_roi,
-                )
-            )
-            rgba = calculate_rgba(
-                start_value,
-                end_value,
-                left_value,
-                right_value,
-                current_channel_spends,
-            )
-            with bin_placeholder:
-                st.markdown(
-                    f"""
-                    <div style="
-                        border-radius: 12px;
-                        background-color: {rgba};
-                        padding: 10px;
-                        text-align: center;
-                        color: #006EC0;
-                        ">
-                        <p style="margin: 0; font-size: 20px;">ROI: {round(roi_current,1)}</p>
-                        <p style="margin: 0; font-size: 20px;">Marginal ROI: {round(marginal_roi_current,1)}</p>
-                    </div>
-                    """,
-                    unsafe_allow_html=True,
-                )
-    st.session_state["project_dct"]["scenario_planner"]["scenario"] = (
-        st.session_state["scenario"]
-    )
-    with st.expander("See Response Curves", expanded=True):
-        fig = plot_response_curves()
-        st.plotly_chart(fig, use_container_width=True)
-    def update_optimization_bounds(channel_name, bound_type):
-        index = 0 if bound_type == "lower" else 1
-        update_key = (
-            f"{channel_name}_b_lower"
-            if bound_type == "lower"
-            else f"{channel_name}_b_upper"
-        )
-        st.session_state["project_dct"]["scenario_planner"][
-            unique_key
-        ].channels[channel_name].bounds[index] = st.session_state[update_key]
-    def update_optimization_bounds_all(bound_type):
-        index = 0 if bound_type == "lower" else 1
-        update_key = (
-            f"all_b_lower" if bound_type == "lower" else f"all_b_upper"
-        )
-        for channel_name, _channel in st.session_state["project_dct"][
-            "scenario_planner"
-        ][unique_key].channels.items():
-            _channel.bounds[index] = st.session_state[update_key]
-    with st.expander("Optimization setup"):
-        bounds_placeholder = st.container()
-        with bounds_placeholder:
-            st.subheader("Optimization Bounds")
-            with st.container():
-                bounds_columns = st.columns((1, 0.35, 0.35, 1))
-                with bounds_columns[0]:
-                    st.write("##")
-                    st.write("Update all channels")
-                with bounds_columns[1]:
-                    st.number_input(
-                        "Lower",
-                        min_value=-100,
-                        max_value=500,
-                        key=f"all_b_lower",
-                        # label_visibility="hidden",
-                        on_change=update_optimization_bounds_all,
-                        args=("lower",),
-                        step=5,
-                        value=-10,
-                    )
-                with bounds_columns[2]:
-                    st.number_input(
-                        "Higher",
-                        value=10,
-                        min_value=-100,
-                        max_value=500,
-                        key=f"all_b_upper",
-                        # label_visibility="hidden",
-                        on_change=update_optimization_bounds_all,
-                        args=("upper",),
-                        step=5,
-                    )
-                st.divider()
-            st.write("#### Channel wise bounds")
-            # st.divider()
-            # bounds_columns = st.columns((1, 0.35, 0.35, 1))
-            # with bounds_columns[0]:
-            #     st.write("Channel")
-            # with bounds_columns[1]:
-            #     st.write("Lower")
-            # with bounds_columns[2]:
-            #     st.write("Upper")
-            # st.divider()
-        for channel_name, _channel in st.session_state["project_dct"][
-            "scenario_planner"
-        ][unique_key].channels.items():
-            st.session_state[f"{channel_name}_b_lower"] = _channel.bounds[0]
-            st.session_state[f"{channel_name}_b_upper"] = _channel.bounds[1]
-            with bounds_placeholder:
-                with st.container():
-                    bounds_columns = st.columns((1, 0.35, 0.35, 1))
-                    with bounds_columns[0]:
-                        st.write("##")
-                        st.write(channel_name)
-                    with bounds_columns[1]:
-                        st.number_input(
-                            "Lower",
-                            min_value=-100,
-                            max_value=500,
-                            key=f"{channel_name}_b_lower",
-                            label_visibility="hidden",
-                            on_change=update_optimization_bounds,
-                            args=(
-                                channel_name,
-                                "lower",
-                            ),
-                        )
-                    with bounds_columns[2]:
-                        st.number_input(
-                            "Higher",
-                            min_value=-100,
-                            max_value=500,
-                            key=f"{channel_name}_b_upper",
-                            label_visibility="hidden",
-                            on_change=update_optimization_bounds,
-                            args=(
-                                channel_name,
-                                "upper",
-                            ),
-                        )
-                    st.divider()
-    _columns = st.columns(2)
-    with _columns[0]:
-        st.subheader("Save Scenario")
-        scenario_name = st.text_input(
-            "Scenario name",
-            key="scenario_input",
-            placeholder="Scenario name",
-            label_visibility="collapsed",
-        )
-        st.button(
-            "Save",
-            on_click=lambda: save_scenario(scenario_name),
-            disabled=len(st.session_state["scenario_input"]) == 0,
-        )
-    summary_df = pd.DataFrame(st.session_state["acutual_predicted"])
-    summary_df.drop_duplicates(
-        subset="Channel_name", keep="last", inplace=True
-    )
-    summary_df_sorted = summary_df.sort_values(by="Delta", ascending=False)
-    summary_df_sorted["Delta_percent"] = np.round(
-        (
-            (
-                summary_df_sorted["Optimized_spend"]
-                / summary_df_sorted["Actual_spend"]
-            )
-            - 1
-        )
-        * 100,
-        2,
-    )
-    with open("summary_df.pkl", "wb") as f:
-        pickle.dump(summary_df_sorted, f)
-        # st.dataframe(summary_df_sorted)
-        # ___columns=st.columns(3)
-        # with ___columns[2]:
-        #     fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent')
-        #     st.plotly_chart(fig,use_container_width=True)
-        # with ___columns[0]:
-        #     fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend')
-        #     st.plotly_chart(fig,use_container_width=True)
-        # with ___columns[1]:
-        #     fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend')
-        #     st.plotly_chart(fig,use_container_width=True)
-elif auth_status == False:
-    st.error("Username/Password is incorrect")
-if auth_status != True:
-    try:
-        username_forgot_pw, email_forgot_password, random_password = (
-            authenticator.forgot_password("Forgot password")
-        )
-        if username_forgot_pw:
-            st.session_state["config"]["credentials"]["usernames"][
-                username_forgot_pw
-            ]["password"] = stauth.Hasher([random_password]).generate()[0]
-            send_email(email_forgot_password, random_password)
-            st.success("New password sent securely")
-            # Random password to be transferred to user securely
-        elif username_forgot_pw == False:
-            st.error("Username not found")
-    except Exception as e:
-        st.error(e)
-update_db("9_Scenario_Planner.py")