Spaces:

KwameOO
/

Insurance_Claim_Prediction

Running

App Files Files Community

Insurance_Claim_Prediction / app.py

KwameOO

Update app.py

434dc4c over 1 year ago

raw

history blame contribute delete

8.62 kB

	# Load key libraries and modules
	import streamlit as st
	import os
	import pickle
	import numpy as np
	import pandas as pd
	from xgboost import XGBClassifier


	# Set the page configurations
	st.set_page_config(page_title = "Insurance Prediction App", initial_sidebar_state = "auto")

	# Set the page title
	st.title("Insurance Claim Prediction with Machine Learning")

	# ---- Importing and creating other key elements items
	# Function to import the Machine Learning toolkit
	@st.cache_resource()
	def load_ml_toolkit(relative_path):
	"""
	This function loads the src/toolkit into this file by taking the relative path to the src/toolkit.

	Args:
	relative_path (string, optional): It receives the file path to the ML toolkit for loading.
	Returns:
	file: It returns the pickle file (which contains the Machine Learning items in this case).
	"""

	with open(relative_path, "rb") as file:
	loaded_object = pickle.load(file)
	return loaded_object


	# Function to load the dataset
	@st.cache_resource()
	def load_data(relative_path):
	"""
	This function is used to load the DataFrame into the current file.

	Args:
	relative_path (string): The relative path to the DataFrame to be loaded.

	Returns:
	DataFrame: Returns the DataFrame at the path provided.
	"""

	dataset = pd.read_csv(relative_path)
	return dataset

	# ----- Loading the key components
	# Loading the base dataframe
	rpath = r"data/train_data.csv"
	df_loaded = load_data(rpath)

	# Loading the toolkit
	loaded_toolkit = load_ml_toolkit(r"src/Streamlit_toolkit")
	if "results" not in st.session_state:
	st.session_state["results"] = []

	# Instantiating the elements of the Machine Learning Toolkit
	scaler = loaded_toolkit["scaler"]
	model = loaded_toolkit["model"]
	encoder = loaded_toolkit["encoder"]

	# Import the model
	model = XGBClassifier()
	model.load_model(r"src/xgb_model.json")


	# Define app sections
	header = st.container()
	dataset = st.container()
	features_and_output = st.container()

	# Instantiate a from to receive inputs
	form = st.form(key="Information", clear_on_submit = True)

	# Structure the header section
	with header:
	header.write("This app is built of a machine learning model to predict the whether or not a building will have an insurance claim over a period based on given variables for which you will make inputs (see the input section below). The model was trained based on the DSN Insurance Prediction dataset.")
	header.write("---")

	# Structure the sidebar
	st.sidebar.header("Information on Columns")
	st.sidebar.markdown("""
	- Customer Id: Identification number for the Policy holder
	- YearOfObservation: year of observation for the insured policy
	- Insured_Period: duration of insurance policy in Olusola Insurance. (Ex: Full year insurance, Policy Duration = 1; 6 months = 0.5
	- Residential: is the building a residential building or not
	- Building_Painted: is the building painted or not (N-Painted, V-Not Painted)
	- Building_Fenced: is the building fence or not (N-Fenced, V-Not Fenced)
	- Garden: building has garden or not (V-has garden; O-no garden)
	- Settlement: Area where the building is located. (R- rural area; U- urban area)
	- Building Dimension: Size of the insured building in m2
	- Building_Type: The type of building (Type 1, 2, 3, 4)
	- Date_of_Occupancy: date building was first occupied
	- NumberOfWindows: number of windows in the building
	- Geo Code: Geographical Code of the Insured building
	- Claim: target variable. (0: no claim, 1: at least one claim over insured period).
	""")

	# Structure the dataset section
	with dataset:
	if dataset.checkbox("Preview the dataset"):
	dataset.write(df_loaded.head())
	dataset.write("Take a look at the sidebar for more information on the columns")
	dataset.write("---")

	# Define a list of expected variables
	expected_inputs = ["YearOfObservation", "Insured_Period", "Residential", "Building_Painted", "Building_Fenced", "Garden", "Settlement", "Building Dimension", "Building_Type", "Date_of_Occupancy", "NumberOfWindows"]

	# List of features to encode
	categoricals = ["Building_Painted", "Building_Fenced", "Garden", "Settlement"]

	# List of features to scale
	cols_to_scale = ["YearOfObservation", "Insured_Period", "Residential", "Building Dimension", "Building_Type", "Date_of_Occupancy", "NumberOfWindows"]


	# Structure the features and output section
	with features_and_output:
	features_and_output.subheader("Inputs")
	features_and_output.write("This section captures the inputs to be used in predictions...")

	left_col, right_col = features_and_output.columns(2)

	# Design the input section
	with form:
	left_col.markdown("**Inputs Set 1:")
	YearOfObservation = left_col.number_input("Select a year:", min_value = 2012, step = 1)
	Insured_Period = left_col.selectbox("Insured Period (Full year or half-year):", options = [0.5,1])
	Residential = left_col.radio("Is the building residential (1) or not (0):", options= [0, 1], horizontal = True)
	Building_Painted = left_col.selectbox("Is the building painted (N) or not(V):", options = ["N", "V"])
	Building_Fenced = left_col.selectbox("Is the building fenced (N) or not(V):", options = ["N", "V"])

	right_col.markdown("Inputs Set 2")
	Garden = right_col.radio("Does the building have a garden (V) or not (O):", options = ["V", "O"], horizontal = True)
	Settlement = right_col.radio("Is the building situated in a rural (R) or urban (U) area?:", options = ["R", "U"], horizontal = True)
	Building_Dimension = right_col.number_input("What is the size of the insured building (m2)?", min_value= 1, value= 1)
	Building_Type = right_col.selectbox("What type of building is it?", options = [1,2,3,4])
	Date_of_Occupancy = right_col.number_input("On what date was the building first occupied?", min_value= 1545, value= 1970)
	NumberOfWindows = right_col.select_slider("How many windows does the building have?", options= range(1,11))

	# Submit button
	submitted = form.form_submit_button(label= "Submit")

	# Process inputs from user
	if submitted:
	with features_and_output:
	# Inputs formatting
	input_dict = {
	"YearOfObservation": [YearOfObservation],
	"Insured_Period": [Insured_Period],
	"Residential": [Residential],
	"Building_Painted": [Building_Painted],
	"Building_Fenced": [Building_Fenced],
	"Garden": [Garden],
	"Settlement": [Settlement],
	"Building Dimension": [Building_Dimension],
	"Building_Type": [Building_Type],
	"Date_of_Occupancy": [Date_of_Occupancy],
	"NumberOfWindows": [NumberOfWindows]
	}

	# Converting the input into a dataframe
	input_data = pd.DataFrame.from_dict(input_dict)

	# Encode the categorical columns
	encoded_test_categoricals = encoder.transform(input_data[categoricals])
	encoded_test_categoricals = pd.DataFrame(encoded_test_categoricals, columns = encoder.get_feature_names_out().tolist())

	# Add the encoded categoricals to the DataFrame and dropping the original columns
	input_data = input_data.join(encoded_test_categoricals)
	input_data.drop(columns= categoricals, inplace= True)

	# Scale the numeric columns
	input_data[cols_to_scale] = scaler.transform(input_data[cols_to_scale])

	# Make the prediction
	model_output = model.predict(input_data)
	input_data["Prediction"] = model_output

	if model_output[0] == 0:
	display = "The building does not have a claim over the insured period."
	else:
	display = "The building has a claim over the insured period."
	# Adding the predictions to previous predictions
	st.session_state["results"].append(input_data)
	result = pd.concat(st.session_state["results"])

	# Displaying prediction results
	st.success(f"Prediction: {display}")

	# Expander to display previous predictions
	previous_output = st.expander("Review previous predictions")
	previous_output.dataframe(result, use_container_width= True)