File size: 11,641 Bytes
1c02bff e7e9ece 79277df e7e9ece 3b01c14 2393287 e7e9ece c20ddd8 2393287 c20ddd8 3b01c14 c20ddd8 3b01c14 c20ddd8 e7e9ece 2393287 e7e9ece 6d0e637 e7e9ece 6d0e637 2393287 3b01c14 c20ddd8 e7e9ece 6d0e637 e7e9ece 6d0e637 e7e9ece 6d0e637 e7e9ece 6d0e637 e7e9ece 6d0e637 e7e9ece 6d0e637 e7e9ece 6d0e637 e7e9ece |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
#new
import streamlit as st
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" # replace with your Hugging Face model path
nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)
# Load the Random Forest model for ML-based prediction
rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib")
rf_model = joblib.load(rf_model_path)
# Preprocessing for the user inputs
# Preprocessing for the user inputs
import streamlit as st
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" # replace with your Hugging Face model path
nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)
# Load the Random Forest model for ML-based prediction
rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib")
rf_model = joblib.load(rf_model_path)
# Preprocessing for the user inputs
def preprocess_input(input_data, method="ml"):
if method == "ml":
# For Random Forest prediction, apply necessary transformations like scaling or encoding.
# Convert ClaimDate to ordinal (number of days since a particular date)
input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).dt.toordinal()
# Wrap each feature value in a list to create a valid DataFrame
input_df = pd.DataFrame({
'ClaimDate': [input_data['ClaimDate']], # Now converted to ordinal value
'ClaimAmount': [input_data['ClaimAmount']],
'PatientAge': [input_data['PatientAge']],
'PatientIncome': [input_data['PatientIncome']],
'PatientGender': [input_data['PatientGender']],
'ProviderSpecialty': [input_data['ProviderSpecialty']],
'ClaimStatus': [input_data['ClaimStatus']],
'PatientMaritalStatus': [input_data['PatientMaritalStatus']],
'PatientEmploymentStatus': [input_data['PatientEmploymentStatus']],
'ProviderLocation': [input_data['ProviderLocation']],
'ClaimType': [input_data['ClaimType']],
'ClaimSubmissionMethod': [input_data['ClaimSubmissionMethod']],
})
# Apply necessary preprocessing: Encoding and scaling (use the same scaler and encoders as in training)
input_df['PatientGender'] = input_df['PatientGender'].apply(lambda x: 1 if x == 'Male' else 0)
claim_status_mapping = {"Denied": 0, "Pending": 1, "Approved": 2}
input_df['ClaimStatus'] = input_df['ClaimStatus'].map(claim_status_mapping)
scaler = StandardScaler()
input_scaled = scaler.fit_transform(input_df) # Scaling the data
return input_scaled
elif method == "nlp":
# For NLP-based prediction, concatenate features into a single paragraph
claim_date = input_data['ClaimDate']
claim_amount = input_data['ClaimAmount']
patient_age = input_data['PatientAge']
patient_gender = input_data['PatientGender']
provider_specialty = input_data['ProviderSpecialty']
claim_status = input_data['ClaimStatus']
patient_income = input_data['PatientIncome']
patient_marital_status = input_data['PatientMaritalStatus']
patient_employment_status = input_data['PatientEmploymentStatus']
provider_location = input_data['ProviderLocation']
claim_type = input_data['ClaimType']
claim_submission_method = input_data['ClaimSubmissionMethod']
# Create a sentence (paragraph) using the input data
input_text = f"The claim date is {claim_date}, with a claim amount of {claim_amount}. " \
f"The patient is {patient_age} years old, and their gender is {patient_gender}. " \
f"The provider specialty is {provider_specialty}. The claim status is {claim_status}. " \
f"The patient's income is {patient_income}, marital status is {patient_marital_status}, " \
f"and employment status is {patient_employment_status}. The provider location is {provider_location}. " \
f"The claim type is {claim_type}, and the claim submission method is {claim_submission_method}. " \
f"Claim legitimacy: {input_data['ClaimLegitimacy']}."
# Tokenize the input text for NLP
inputs = nlp_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
return inputs
# Title and description for the app
st.title("Insurance Claim Fraud Detection")
st.write("""
This app predicts whether an insurance claim is fraudulent or legitimate based on user input.
You can choose between **ML-based prediction** or **NLP-based prediction**.
""")
# Buttons to select prediction method
prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))
# Input fields for the user (these should match your model features)
claim_date = st.date_input("Enter the claim date")
claim_amount = st.number_input("Enter the claim amount", min_value=0)
patient_age = st.number_input("Enter the patient's age", min_value=0)
patient_income = st.number_input("Enter the patient's income", min_value=0)
patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
provider_specialty = st.text_input("Enter the provider specialty")
claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
patient_marital_status = st.text_input("Enter the marital status")
patient_employment_status = st.text_input("Enter the employment status")
provider_location = st.text_input("Enter the provider location")
claim_type = st.text_input("Enter the claim type")
claim_submission_method = st.text_input("Enter the claim submission method")
# ClaimLegitimacy is excluded from input (it’s the target that we want to predict)
# claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"])
# Create a button to trigger prediction
if st.button('Predict'):
input_data = {
"ClaimDate": claim_date,
"ClaimAmount": claim_amount,
"PatientAge": patient_age,
"PatientIncome": patient_income,
"PatientGender": patient_gender,
"ProviderSpecialty": provider_specialty,
"ClaimStatus": claim_status,
"PatientMaritalStatus": patient_marital_status,
"PatientEmploymentStatus": patient_employment_status,
"ProviderLocation": provider_location,
"ClaimType": claim_type,
"ClaimSubmissionMethod": claim_submission_method,
# "ClaimLegitimacy": claim_legitimacy, # Removed since it's the target we want to predict
}
# Preprocess the input data based on the selected method
if prediction_method == "ML Prediction":
input_scaled = preprocess_input(input_data, method="ml")
# Get the prediction from the ML model (Random Forest)
prediction = rf_model.predict(input_scaled)
if prediction == 1:
st.write("This claim is predicted to be **fraudulent** (ML model).")
else:
st.write("This claim is predicted to be **legitimate** (ML model).")
elif prediction_method == "NLP Prediction":
inputs = preprocess_input(input_data, method="nlp")
# Get the prediction from the NLP model (BERT)
with torch.no_grad():
logits = nlp_model(**inputs).logits
predicted_class = torch.argmax(logits, dim=-1).item()
if predicted_class == 1:
st.write("This claim is predicted to be **fraudulent** (NLP model).")
else:
st.write("This claim is predicted to be **legitimate** (NLP model).")
# Title and description for the app
st.title("Insurance Claim Fraud Detection")
st.write("""
This app predicts whether an insurance claim is fraudulent or legitimate based on user input.
You can choose between **ML-based prediction** or **NLP-based prediction**.
""")
# Buttons to select prediction method
prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))
# Input fields for the user (these should match your model features)
claim_date = st.date_input("Enter the claim date")
claim_amount = st.number_input("Enter the claim amount", min_value=0)
patient_age = st.number_input("Enter the patient's age", min_value=0)
patient_income = st.number_input("Enter the patient's income", min_value=0)
patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
provider_specialty = st.text_input("Enter the provider specialty")
claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
patient_marital_status = st.text_input("Enter the marital status")
patient_employment_status = st.text_input("Enter the employment status")
provider_location = st.text_input("Enter the provider location")
claim_type = st.text_input("Enter the claim type")
claim_submission_method = st.text_input("Enter the claim submission method")
claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"])
# Create a button to trigger prediction
if st.button('Predict'):
input_data = {
"ClaimDate": claim_date,
"ClaimAmount": claim_amount,
"PatientAge": patient_age,
"PatientIncome": patient_income,
"PatientGender": patient_gender,
"ProviderSpecialty": provider_specialty,
"ClaimStatus": claim_status,
"PatientMaritalStatus": patient_marital_status,
"PatientEmploymentStatus": patient_employment_status,
"ProviderLocation": provider_location,
"ClaimType": claim_type,
"ClaimSubmissionMethod": claim_submission_method,
"ClaimLegitimacy": claim_legitimacy,
}
# Preprocess the input data based on the selected method
if prediction_method == "ML Prediction":
input_scaled = preprocess_input(input_data, method="ml")
# Get the prediction from the ML model (Random Forest)
prediction = rf_model.predict(input_scaled)
if prediction == 1:
st.write("This claim is predicted to be **fraudulent** (ML model).")
else:
st.write("This claim is predicted to be **legitimate** (ML model).")
elif prediction_method == "NLP Prediction":
inputs = preprocess_input(input_data, method="nlp")
# Get the prediction from the NLP model (BERT)
with torch.no_grad():
logits = nlp_model(**inputs).logits
predicted_class = torch.argmax(logits, dim=-1).item()
if predicted_class == 1:
st.write("This claim is predicted to be **fraudulent** (NLP model).")
else:
st.write("This claim is predicted to be **legitimate** (NLP model).")
|