Update app.py
Browse files
app.py
CHANGED
@@ -18,13 +18,31 @@ rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_dete
|
|
18 |
rf_model = joblib.load(rf_model_path)
|
19 |
|
20 |
# Preprocessing for the user inputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# Preprocessing for the user inputs
|
22 |
def preprocess_input(input_data, method="ml"):
|
23 |
if method == "ml":
|
24 |
# For Random Forest prediction, apply necessary transformations like scaling or encoding.
|
25 |
-
|
26 |
# Convert ClaimDate to ordinal (number of days since a particular date)
|
27 |
-
input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).
|
28 |
|
29 |
# Wrap each feature value in a list to create a valid DataFrame
|
30 |
input_df = pd.DataFrame({
|
@@ -51,6 +69,7 @@ def preprocess_input(input_data, method="ml"):
|
|
51 |
input_scaled = scaler.fit_transform(input_df) # Scaling the data
|
52 |
|
53 |
return input_scaled
|
|
|
54 |
elif method == "nlp":
|
55 |
# For NLP-based prediction, concatenate features into a single paragraph
|
56 |
claim_date = input_data['ClaimDate']
|
@@ -80,6 +99,77 @@ def preprocess_input(input_data, method="ml"):
|
|
80 |
return inputs
|
81 |
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
|
85 |
# Title and description for the app
|
|
|
18 |
rf_model = joblib.load(rf_model_path)
|
19 |
|
20 |
# Preprocessing for the user inputs
|
21 |
+
# Preprocessing for the user inputs
|
22 |
+
import streamlit as st
|
23 |
+
import joblib
|
24 |
+
import numpy as np
|
25 |
+
import pandas as pd
|
26 |
+
from sklearn.preprocessing import StandardScaler
|
27 |
+
from huggingface_hub import hf_hub_download
|
28 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
29 |
+
import torch
|
30 |
+
|
31 |
+
# Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
|
32 |
+
nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" # replace with your Hugging Face model path
|
33 |
+
nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
|
34 |
+
nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)
|
35 |
+
|
36 |
+
# Load the Random Forest model for ML-based prediction
|
37 |
+
rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib")
|
38 |
+
rf_model = joblib.load(rf_model_path)
|
39 |
+
|
40 |
# Preprocessing for the user inputs
|
41 |
def preprocess_input(input_data, method="ml"):
|
42 |
if method == "ml":
|
43 |
# For Random Forest prediction, apply necessary transformations like scaling or encoding.
|
|
|
44 |
# Convert ClaimDate to ordinal (number of days since a particular date)
|
45 |
+
input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).dt.toordinal()
|
46 |
|
47 |
# Wrap each feature value in a list to create a valid DataFrame
|
48 |
input_df = pd.DataFrame({
|
|
|
69 |
input_scaled = scaler.fit_transform(input_df) # Scaling the data
|
70 |
|
71 |
return input_scaled
|
72 |
+
|
73 |
elif method == "nlp":
|
74 |
# For NLP-based prediction, concatenate features into a single paragraph
|
75 |
claim_date = input_data['ClaimDate']
|
|
|
99 |
return inputs
|
100 |
|
101 |
|
102 |
+
# Title and description for the app
|
103 |
+
st.title("Insurance Claim Fraud Detection")
|
104 |
+
st.write("""
|
105 |
+
This app predicts whether an insurance claim is fraudulent or legitimate based on user input.
|
106 |
+
You can choose between **ML-based prediction** or **NLP-based prediction**.
|
107 |
+
""")
|
108 |
+
|
109 |
+
# Buttons to select prediction method
|
110 |
+
prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))
|
111 |
+
|
112 |
+
# Input fields for the user (these should match your model features)
|
113 |
+
claim_date = st.date_input("Enter the claim date")
|
114 |
+
claim_amount = st.number_input("Enter the claim amount", min_value=0)
|
115 |
+
patient_age = st.number_input("Enter the patient's age", min_value=0)
|
116 |
+
patient_income = st.number_input("Enter the patient's income", min_value=0)
|
117 |
+
patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
|
118 |
+
provider_specialty = st.text_input("Enter the provider specialty")
|
119 |
+
claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
|
120 |
+
patient_marital_status = st.text_input("Enter the marital status")
|
121 |
+
patient_employment_status = st.text_input("Enter the employment status")
|
122 |
+
provider_location = st.text_input("Enter the provider location")
|
123 |
+
claim_type = st.text_input("Enter the claim type")
|
124 |
+
claim_submission_method = st.text_input("Enter the claim submission method")
|
125 |
+
# ClaimLegitimacy is excluded from input (it’s the target that we want to predict)
|
126 |
+
# claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"])
|
127 |
+
|
128 |
+
# Create a button to trigger prediction
|
129 |
+
if st.button('Predict'):
|
130 |
+
input_data = {
|
131 |
+
"ClaimDate": claim_date,
|
132 |
+
"ClaimAmount": claim_amount,
|
133 |
+
"PatientAge": patient_age,
|
134 |
+
"PatientIncome": patient_income,
|
135 |
+
"PatientGender": patient_gender,
|
136 |
+
"ProviderSpecialty": provider_specialty,
|
137 |
+
"ClaimStatus": claim_status,
|
138 |
+
"PatientMaritalStatus": patient_marital_status,
|
139 |
+
"PatientEmploymentStatus": patient_employment_status,
|
140 |
+
"ProviderLocation": provider_location,
|
141 |
+
"ClaimType": claim_type,
|
142 |
+
"ClaimSubmissionMethod": claim_submission_method,
|
143 |
+
# "ClaimLegitimacy": claim_legitimacy, # Removed since it's the target we want to predict
|
144 |
+
}
|
145 |
+
|
146 |
+
# Preprocess the input data based on the selected method
|
147 |
+
if prediction_method == "ML Prediction":
|
148 |
+
input_scaled = preprocess_input(input_data, method="ml")
|
149 |
+
|
150 |
+
# Get the prediction from the ML model (Random Forest)
|
151 |
+
prediction = rf_model.predict(input_scaled)
|
152 |
+
|
153 |
+
if prediction == 1:
|
154 |
+
st.write("This claim is predicted to be **fraudulent** (ML model).")
|
155 |
+
else:
|
156 |
+
st.write("This claim is predicted to be **legitimate** (ML model).")
|
157 |
+
|
158 |
+
elif prediction_method == "NLP Prediction":
|
159 |
+
inputs = preprocess_input(input_data, method="nlp")
|
160 |
+
|
161 |
+
# Get the prediction from the NLP model (BERT)
|
162 |
+
with torch.no_grad():
|
163 |
+
logits = nlp_model(**inputs).logits
|
164 |
+
predicted_class = torch.argmax(logits, dim=-1).item()
|
165 |
+
|
166 |
+
if predicted_class == 1:
|
167 |
+
st.write("This claim is predicted to be **fraudulent** (NLP model).")
|
168 |
+
else:
|
169 |
+
st.write("This claim is predicted to be **legitimate** (NLP model).")
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
|
174 |
|
175 |
# Title and description for the app
|