Spaces:

dperales
/

Fraud_Detection_Pycaret

Runtime error

File size: 6,569 Bytes

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pycaret
import streamlit as st
from streamlit_option_menu import option_menu
import PIL
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont

hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)

with st.sidebar:
    image = Image.open('./itaca_logo.png')
    st.image(image,use_column_width=True)
    page = option_menu(menu_title='Menu',
                       menu_icon="robot",
                       options=["Clustering Analysis",
                                "Anomaly Detection"],
                       icons=["chat-dots",
                              "key"],
                       default_index=0
                       )

st.title('ITACA Insurance Core AI Module')

if page == "Clustering Analysis":
    st.header('Clustering Analysis')

    st.write(
        """
        """
    )

    # import pycaret unsupervised models
    from pycaret.clustering import *
    # import ClusteringExperiment
    from pycaret.clustering import ClusteringExperiment

    # Display the list of CSV files
    directory = "./"
    all_files = os.listdir(directory)
    # Filter files to only include CSV files
    csv_files = [file for file in all_files if file.endswith(".csv")]

    # Select a CSV file from the list
    selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)
        
    # Upload the CSV file
    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
    
    # Define the unsupervised model
    clusteringmodel = ['kmeans', 'ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics', 'birch']
    selected_model = st.selectbox("Choose a clustering model", clusteringmodel)

    # Define the options for the dropdown list
    numclusters = [2, 3, 4, 5, 6]
    # selected_clusters = st.selectbox("Choose a number of clusters", numclusters)
    selected_clusters = st.slider("Choose a number of clusters", min_value=2, max_value=10, value=4)

    # Read and display the CSV file
    if selected_csv != "None" or uploaded_file is not None:
        if uploaded_file:
            try:
                delimiter = ','
                insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
            except ValueError:
                delimiter = '|'
                insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
        else:
            insurance_claims = pd.read_csv(selected_csv)

        s = setup(insurance_claims, session_id = 123)

        exp_clustering = ClusteringExperiment()

        # init setup on exp
        exp_clustering.setup(insurance_claims, session_id = 123)

        if st.button("Prediction"):
            with st.spinner("Analyzing..."):
                # train kmeans model
                cluster_model = create_model(selected_model, num_clusters = selected_clusters)

                cluster_model_2 = assign_model(cluster_model)
                cluster_model_2

                all_metrics = get_metrics()
                all_metrics

                cluster_results = pull()
                cluster_results

                # plot pca cluster plot 
                plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
                
                if selected_model != 'ap':
                    plot_model(cluster_model, plot = 'tsne', display_format = 'streamlit')
                
                if selected_model not in ('ap', 'meanshift', 'dbscan', 'optics'):
                    plot_model(cluster_model, plot = 'elbow', display_format = 'streamlit')
                
                if selected_model not in ('ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics'):
                    plot_model(cluster_model, plot = 'silhouette', display_format = 'streamlit')
                
                if selected_model not in ('ap', 'sc', 'hclust', 'dbscan', 'optics', 'birch'):
                    plot_model(cluster_model, plot = 'distance', display_format = 'streamlit')
                
                if selected_model != 'ap':
                    plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')  

elif page == "Anomaly Detection":
    st.header('Anomaly Detection')

    st.write(
        """
        """
    )

    # import pycaret anomaly
    from pycaret.anomaly import *
    # import AnomalyExperiment
    from pycaret.anomaly import AnomalyExperiment

    # Display the list of CSV files
    directory = "./"
    all_files = os.listdir(directory)
    # Filter files to only include CSV files
    csv_files = [file for file in all_files if file.endswith(".csv")]

    # Select a CSV file from the list
    selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)
    
    # Upload the CSV file
    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

    # Define the unsupervised model
    anomalymodel = ['abod', 'cluster', 'cof', 'iforest', 'histogram', 'knn', 'lof', 'svm', 'pca', 'mcd', 'sod', 'sos']
    selected_model = st.selectbox("Choose an anomaly model", anomalymodel)

    # Read and display the CSV file
    if selected_csv != "None" or uploaded_file is not None:
        if uploaded_file:
            try:
                delimiter = ','
                insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
            except ValueError:
                delimiter = '|'
                insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
        else:
            insurance_claims = pd.read_csv(selected_csv)
        
        s = setup(insurance_claims, session_id = 123)

        exp_anomaly = AnomalyExperiment()

        # init setup on exp
        exp_anomaly.setup(insurance_claims, session_id = 123)

        if st.button("Prediction"):
            with st.spinner("Analyzing..."):
                # train model
                anomaly_model = create_model(selected_model)

                anomaly_model_2 = assign_model(anomaly_model)
                anomaly_model_2

                anomaly_results = pull()
                anomaly_results

                # plot
                plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
                plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')