import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import pycaret
import streamlit as st
from streamlit_option_menu import option_menu
import PIL
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont

def main():
    st.set_page_config(layout="wide")

    hide_streamlit_style = """
                <style>
                #MainMenu {visibility: hidden;}
                footer {visibility: hidden;}
                </style>
                """
    st.markdown(hide_streamlit_style, unsafe_allow_html=True)

    with st.sidebar:
        image = Image.open('itaca_logo.png')
        st.image(image, width=150) #,use_column_width=True)
        page = option_menu(menu_title='Menu',
                        menu_icon="robot",
                        options=["Clustering Analysis",
                                    "Anomaly Detection"],
                        icons=["chat-dots",
                                "key"],
                        default_index=0
                        )

        # Additional section below the option menu
        # st.markdown("---")  # Add a separator line
        st.header("Settings")
        
        num_lines = st.text_input("% of lines to be processed:", value=100)
        graph_select = st.checkbox("Show Graphics", value= True)
        feat_imp_select = st.checkbox("Feature Importance", value= False)

        # Define the options for the dropdown list
        numclusters = [2, 3, 4, 5, 6]
        selected_clusters = st.slider("Choose a number of clusters", min_value=2, max_value=10, value=4)
        
        p_remove_multicollinearity = st.checkbox("Remove Multicollinearity", value=False)
        p_multicollinearity_threshold = st.slider("Choose multicollinearity thresholds", min_value=0.0, max_value=1.0, value=0.9)
        # p_remove_outliers = st.checkbox("Remove Outliers", value=False)
        # p_outliers_method = st.selectbox ("Choose an Outlier Method", ["iforest", "ee", "lof"])
        p_transformation = st.checkbox("Choose Power Transform", value = False)
        p_normalize = st.checkbox("Choose Normalize", value = False)
        p_pca = st.checkbox("Choose PCA", value = False)
        p_pca_method = st.selectbox ("Choose a PCA Method", ["linear", "kernel", "incremental"])

    st.title('ITACA Insurance Core AI Module')

    #col1, col2 = st.columns(2)

    if page == "Clustering Analysis":
        #with col1:
        st.header('Clustering Analysis')

        st.write(
            """
            """
        )
        # import pycaret unsupervised models
        from pycaret.clustering import setup, create_model, assign_model, pull, plot_model
        # import ClusteringExperiment
        from pycaret.clustering import ClusteringExperiment

        # Display the list of CSV files
        directory = "./"
        all_files = os.listdir(directory)
        # Filter files to only include CSV files
        csv_files = [file for file in all_files if file.endswith(".csv")]
        # Select a CSV file from the list
        selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)

        # Upload the CSV file
        uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
        
        # Define the unsupervised model
        clusteringmodel = ['kmeans', 'ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics', 'birch']
        selected_model = st.selectbox("Choose a clustering model", clusteringmodel)

        # Read and display the CSV file
        if selected_csv != "None" or uploaded_file is not None:
            if uploaded_file:
                try:
                    delimiter = ','
                    insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
                except ValueError:
                    delimiter = '|'
                    insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
            else:
                insurance_claims = pd.read_csv(selected_csv)

            num_rows = int(insurance_claims.shape[0]*int(num_lines)/100)
            insurance_claims_reduced = insurance_claims.head(num_rows)
            st.write("Rows to be processed: " + str(num_rows))

            all_columns = insurance_claims_reduced.columns.tolist()
            selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
            insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
        
        with st.expander("Inference Description", expanded=True):
            insurance_claims_reduced.describe().T

        with st.expander("Head Map", expanded=True):
            cat_col = insurance_claims_reduced.select_dtypes(include=['object']).columns
            num_col = insurance_claims_reduced.select_dtypes(exclude=['object']).columns

            # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
            # Calculate the correlation matrix
            corr_matrix = insurance_claims_reduced[num_col].corr()
            # Create a Matplotlib figure
            fig, ax = plt.subplots(figsize=(12, 8))
            # Create a heatmap using seaborn
            #st.header("Heat Map")
            sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
            # Set the title for the heatmap
            ax.set_title('Correlation Heatmap')
            # Display the heatmap in Streamlit
            st.pyplot(fig)

        if st.button("Prediction"):
            #insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
            
            s = setup(insurance_claims_reduced, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
                    # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method, 
                    transformation=p_transformation, 
                    normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
            exp_clustering = ClusteringExperiment()
            # init setup on exp
            exp_clustering.setup(insurance_claims_reduced, session_id = 123)

            with st.spinner("Analyzing..."):
                #with col2:
                #st.markdown("<br><br><br><br>", unsafe_allow_html=True)
                # train kmeans model
                cluster_model = create_model(selected_model, num_clusters = selected_clusters)

                cluster_model_2 = assign_model(cluster_model)
                # Calculate summary statistics for each cluster
                cluster_summary = cluster_model_2.groupby('Cluster').agg(['count', 'mean', 'median', 'min', 'max', 
                                                                            'std', 'var', 'sum', ('quantile_25', lambda x: x.quantile(0.25)), 
                                                                            ('quantile_75', lambda x: x.quantile(0.75)), 'skew'])
                
                with st.expander("Cluster Summary", expanded=False):
                    #st.header("Cluster Summary")
                    cluster_summary

                with st.expander("Model Assign", expanded=False):
                    #st.header("Assign Model")
                    cluster_model_2

                # all_metrics = get_metrics()
                # all_metrics

                with st.expander("Clustering Metrics", expanded=False):
                    #st.header("Clustering Metrics")
                    cluster_results = pull()
                    cluster_results

                with st.expander("Clustering Plots", expanded=False):
                    if graph_select:
                        #st.header("Clustering Plots")
                        # plot pca cluster plot 
                        plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
                        
                        if selected_model != 'ap':
                            plot_model(cluster_model, plot = 'tsne', display_format = 'streamlit')
                        
                        if selected_model not in ('ap', 'meanshift', 'dbscan', 'optics'):
                            plot_model(cluster_model, plot = 'elbow', display_format = 'streamlit')
                        
                        if selected_model not in ('ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics'):
                            plot_model(cluster_model, plot = 'silhouette', display_format = 'streamlit')
                        
                        if selected_model not in ('ap', 'sc', 'hclust', 'dbscan', 'optics', 'birch'):
                            plot_model(cluster_model, plot = 'distance', display_format = 'streamlit')
                        
                        if selected_model != 'ap':
                            plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')  

                with st.expander("Feature Importance", expanded=False):
                    # Create a Classification Model to extract feature importance
                    if graph_select and feat_imp_select:
                        #st.header("Feature Importance")
                        from pycaret.classification import setup, create_model, get_config
                        s = setup(cluster_model_2, target = 'Cluster')
                        lr = create_model('lr')
                        
                        # this is how you can recreate the table
                        feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
                        # sort by feature importance value and filter top 10
                        feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
                        # Display the filtered table in Streamlit
                        # st.dataframe(feat_imp)
                        # Display the filtered table as a bar chart in Streamlit
                        st.bar_chart(feat_imp.set_index('Feature'))

    elif page == "Anomaly Detection":
        #with col1:
        st.header('Anomaly Detection')

        st.write(
            """
            """
        )

        # import pycaret anomaly
        from pycaret.anomaly import setup, create_model, assign_model, pull, plot_model
        # import AnomalyExperiment
        from pycaret.anomaly import AnomalyExperiment

        # Display the list of CSV files
        directory = "./"
        all_files = os.listdir(directory)
        # Filter files to only include CSV files
        csv_files = [file for file in all_files if file.endswith(".csv")]
        # Select a CSV file from the list
        selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)
        
        # Upload the CSV file
        uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

        # Define the unsupervised model
        anomalymodel = ['abod', 'cluster', 'cof', 'iforest', 'histogram', 'knn', 'lof', 'svm', 'pca', 'mcd', 'sod', 'sos']
        selected_model = st.selectbox("Choose an anomaly model", anomalymodel)

        # Read and display the CSV file
        if selected_csv != "None" or uploaded_file is not None:
            if uploaded_file:
                try:
                    delimiter = ','
                    insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
                except ValueError:
                    delimiter = '|'
                    insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
            else:
                insurance_claims = pd.read_csv(selected_csv)

            num_rows = int(insurance_claims.shape[0]*int(num_lines)/100)
            insurance_claims_reduced = insurance_claims.head(num_rows)
            st.write("Rows to be processed: " + str(num_rows))

            all_columns = insurance_claims_reduced.columns.tolist()
            selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
            insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()

            with st.expander("Inference Description", expanded=True):
                insurance_claims_reduced.describe().T

            with st.expander("Head Map", expanded=True):
                cat_col = insurance_claims_reduced.select_dtypes(include=['object']).columns
                num_col = insurance_claims_reduced.select_dtypes(exclude=['object']).columns

                # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
                # Calculate the correlation matrix
                corr_matrix = insurance_claims_reduced[num_col].corr()
                # Create a Matplotlib figure
                fig, ax = plt.subplots(figsize=(12, 8))
                # Create a heatmap using seaborn
                #st.header("Heat Map")
                sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
                # Set the title for the heatmap
                ax.set_title('Correlation Heatmap')
                # Display the heatmap in Streamlit
                st.pyplot(fig)
            
            if st.button("Prediction"):
            
                s = setup(insurance_claims_reduced, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
                            # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method, 
                            transformation=p_transformation, 
                            normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)

                exp_anomaly = AnomalyExperiment()
                # init setup on exp
                exp_anomaly.setup(insurance_claims_reduced, session_id = 123)
            
                with st.spinner("Analyzing..."):
                    #with col2:
                    #st.markdown("<br><br><br><br>", unsafe_allow_html=True)
                    # train model
                    anomaly_model = create_model(selected_model)

                    with st.expander("Assign Model", expanded=False):
                        #st.header("Assign Model")
                        anomaly_model_2 = assign_model(anomaly_model)
                        anomaly_model_2

                    with st.expander("Anomaly Metrics", expanded=False):
                        #st.header("Anomaly Metrics")
                        anomaly_results = pull()
                        anomaly_results

                    with st.expander("Anomaly Plots", expanded=False):
                        if graph_select:
                            # plot
                            #st.header("Anomaly Plots")
                            plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
                            plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')

                    with st.expander("Feature Importance", expanded=False):
                        if graph_select and feat_imp_select:
                            # Create a Classification Model to extract feature importance
                            #st.header("Feature Importance")
                            from pycaret.classification import setup, create_model, get_config
                            s = setup(anomaly_model_2, target = 'Anomaly')
                            lr = create_model('lr')
                            # this is how you can recreate the table
                            feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
                            # sort by feature importance value and filter top 10
                            feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
                            # Display the filtered table in Streamlit
                            # st.dataframe(feat_imp)
                            # Display the filtered table as a bar chart in Streamlit
                            st.bar_chart(feat_imp.set_index('Feature'))
try:
    main()
except Exception as e:
    st.sidebar.error(f"An error occurred: {e}")