|
|
|
import streamlit as st |
|
import joblib |
|
import numpy as np |
|
import pandas as pd |
|
from sklearn.preprocessing import StandardScaler |
|
from huggingface_hub import hf_hub_download |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
|
|
|
|
nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" |
|
nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name) |
|
nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name) |
|
|
|
|
|
rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib") |
|
rf_model = joblib.load(rf_model_path) |
|
|
|
|
|
|
|
import streamlit as st |
|
import joblib |
|
import numpy as np |
|
import pandas as pd |
|
from sklearn.preprocessing import StandardScaler |
|
from huggingface_hub import hf_hub_download |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
|
|
|
|
nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" |
|
nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name) |
|
nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name) |
|
|
|
|
|
rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib") |
|
rf_model = joblib.load(rf_model_path) |
|
|
|
|
|
def preprocess_input(input_data, method="ml"): |
|
if method == "ml": |
|
|
|
|
|
input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).dt.toordinal() |
|
|
|
|
|
input_df = pd.DataFrame({ |
|
'ClaimDate': [input_data['ClaimDate']], |
|
'ClaimAmount': [input_data['ClaimAmount']], |
|
'PatientAge': [input_data['PatientAge']], |
|
'PatientIncome': [input_data['PatientIncome']], |
|
'PatientGender': [input_data['PatientGender']], |
|
'ProviderSpecialty': [input_data['ProviderSpecialty']], |
|
'ClaimStatus': [input_data['ClaimStatus']], |
|
'PatientMaritalStatus': [input_data['PatientMaritalStatus']], |
|
'PatientEmploymentStatus': [input_data['PatientEmploymentStatus']], |
|
'ProviderLocation': [input_data['ProviderLocation']], |
|
'ClaimType': [input_data['ClaimType']], |
|
'ClaimSubmissionMethod': [input_data['ClaimSubmissionMethod']], |
|
}) |
|
|
|
|
|
input_df['PatientGender'] = input_df['PatientGender'].apply(lambda x: 1 if x == 'Male' else 0) |
|
claim_status_mapping = {"Denied": 0, "Pending": 1, "Approved": 2} |
|
input_df['ClaimStatus'] = input_df['ClaimStatus'].map(claim_status_mapping) |
|
|
|
scaler = StandardScaler() |
|
input_scaled = scaler.fit_transform(input_df) |
|
|
|
return input_scaled |
|
|
|
elif method == "nlp": |
|
|
|
claim_date = input_data['ClaimDate'] |
|
claim_amount = input_data['ClaimAmount'] |
|
patient_age = input_data['PatientAge'] |
|
patient_gender = input_data['PatientGender'] |
|
provider_specialty = input_data['ProviderSpecialty'] |
|
claim_status = input_data['ClaimStatus'] |
|
patient_income = input_data['PatientIncome'] |
|
patient_marital_status = input_data['PatientMaritalStatus'] |
|
patient_employment_status = input_data['PatientEmploymentStatus'] |
|
provider_location = input_data['ProviderLocation'] |
|
claim_type = input_data['ClaimType'] |
|
claim_submission_method = input_data['ClaimSubmissionMethod'] |
|
|
|
|
|
input_text = f"The claim date is {claim_date}, with a claim amount of {claim_amount}. " \ |
|
f"The patient is {patient_age} years old, and their gender is {patient_gender}. " \ |
|
f"The provider specialty is {provider_specialty}. The claim status is {claim_status}. " \ |
|
f"The patient's income is {patient_income}, marital status is {patient_marital_status}, " \ |
|
f"and employment status is {patient_employment_status}. The provider location is {provider_location}. " \ |
|
f"The claim type is {claim_type}, and the claim submission method is {claim_submission_method}. " \ |
|
f"Claim legitimacy: {input_data['ClaimLegitimacy']}." |
|
|
|
|
|
inputs = nlp_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
|
return inputs |
|
|
|
|
|
|
|
st.title("Insurance Claim Fraud Detection") |
|
st.write(""" |
|
This app predicts whether an insurance claim is fraudulent or legitimate based on user input. |
|
You can choose between **ML-based prediction** or **NLP-based prediction**. |
|
""") |
|
|
|
|
|
prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction")) |
|
|
|
|
|
claim_date = st.date_input("Enter the claim date") |
|
claim_amount = st.number_input("Enter the claim amount", min_value=0) |
|
patient_age = st.number_input("Enter the patient's age", min_value=0) |
|
patient_income = st.number_input("Enter the patient's income", min_value=0) |
|
patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"]) |
|
provider_specialty = st.text_input("Enter the provider specialty") |
|
claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"]) |
|
patient_marital_status = st.text_input("Enter the marital status") |
|
patient_employment_status = st.text_input("Enter the employment status") |
|
provider_location = st.text_input("Enter the provider location") |
|
claim_type = st.text_input("Enter the claim type") |
|
claim_submission_method = st.text_input("Enter the claim submission method") |
|
|
|
|
|
|
|
|
|
if st.button('Predict'): |
|
input_data = { |
|
"ClaimDate": claim_date, |
|
"ClaimAmount": claim_amount, |
|
"PatientAge": patient_age, |
|
"PatientIncome": patient_income, |
|
"PatientGender": patient_gender, |
|
"ProviderSpecialty": provider_specialty, |
|
"ClaimStatus": claim_status, |
|
"PatientMaritalStatus": patient_marital_status, |
|
"PatientEmploymentStatus": patient_employment_status, |
|
"ProviderLocation": provider_location, |
|
"ClaimType": claim_type, |
|
"ClaimSubmissionMethod": claim_submission_method, |
|
|
|
} |
|
|
|
|
|
if prediction_method == "ML Prediction": |
|
input_scaled = preprocess_input(input_data, method="ml") |
|
|
|
|
|
prediction = rf_model.predict(input_scaled) |
|
|
|
if prediction == 1: |
|
st.write("This claim is predicted to be **fraudulent** (ML model).") |
|
else: |
|
st.write("This claim is predicted to be **legitimate** (ML model).") |
|
|
|
elif prediction_method == "NLP Prediction": |
|
inputs = preprocess_input(input_data, method="nlp") |
|
|
|
|
|
with torch.no_grad(): |
|
logits = nlp_model(**inputs).logits |
|
predicted_class = torch.argmax(logits, dim=-1).item() |
|
|
|
if predicted_class == 1: |
|
st.write("This claim is predicted to be **fraudulent** (NLP model).") |
|
else: |
|
st.write("This claim is predicted to be **legitimate** (NLP model).") |
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("Insurance Claim Fraud Detection") |
|
st.write(""" |
|
This app predicts whether an insurance claim is fraudulent or legitimate based on user input. |
|
You can choose between **ML-based prediction** or **NLP-based prediction**. |
|
""") |
|
|
|
|
|
prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction")) |
|
|
|
|
|
claim_date = st.date_input("Enter the claim date") |
|
claim_amount = st.number_input("Enter the claim amount", min_value=0) |
|
patient_age = st.number_input("Enter the patient's age", min_value=0) |
|
patient_income = st.number_input("Enter the patient's income", min_value=0) |
|
patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"]) |
|
provider_specialty = st.text_input("Enter the provider specialty") |
|
claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"]) |
|
patient_marital_status = st.text_input("Enter the marital status") |
|
patient_employment_status = st.text_input("Enter the employment status") |
|
provider_location = st.text_input("Enter the provider location") |
|
claim_type = st.text_input("Enter the claim type") |
|
claim_submission_method = st.text_input("Enter the claim submission method") |
|
claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"]) |
|
|
|
|
|
if st.button('Predict'): |
|
input_data = { |
|
"ClaimDate": claim_date, |
|
"ClaimAmount": claim_amount, |
|
"PatientAge": patient_age, |
|
"PatientIncome": patient_income, |
|
"PatientGender": patient_gender, |
|
"ProviderSpecialty": provider_specialty, |
|
"ClaimStatus": claim_status, |
|
"PatientMaritalStatus": patient_marital_status, |
|
"PatientEmploymentStatus": patient_employment_status, |
|
"ProviderLocation": provider_location, |
|
"ClaimType": claim_type, |
|
"ClaimSubmissionMethod": claim_submission_method, |
|
"ClaimLegitimacy": claim_legitimacy, |
|
} |
|
|
|
|
|
if prediction_method == "ML Prediction": |
|
input_scaled = preprocess_input(input_data, method="ml") |
|
|
|
|
|
prediction = rf_model.predict(input_scaled) |
|
|
|
if prediction == 1: |
|
st.write("This claim is predicted to be **fraudulent** (ML model).") |
|
else: |
|
st.write("This claim is predicted to be **legitimate** (ML model).") |
|
|
|
elif prediction_method == "NLP Prediction": |
|
inputs = preprocess_input(input_data, method="nlp") |
|
|
|
|
|
with torch.no_grad(): |
|
logits = nlp_model(**inputs).logits |
|
predicted_class = torch.argmax(logits, dim=-1).item() |
|
|
|
if predicted_class == 1: |
|
st.write("This claim is predicted to be **fraudulent** (NLP model).") |
|
else: |
|
st.write("This claim is predicted to be **legitimate** (NLP model).") |
|
|