Spaces:

tajuarAkash
/

Heath_Insurance_Fraud_Prediction

Sleeping

App Files Files Community

tajuarAkash commited on Feb 11

Commit

2393287

verified ·

1 Parent(s): c20ddd8

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -2

app.py CHANGED Viewed

@@ -18,13 +18,31 @@ rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_dete
 rf_model = joblib.load(rf_model_path)
 # Preprocessing for the user inputs
 # Preprocessing for the user inputs
 def preprocess_input(input_data, method="ml"):
     if method == "ml":
         # For Random Forest prediction, apply necessary transformations like scaling or encoding.
         # Convert ClaimDate to ordinal (number of days since a particular date)
-        input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).apply(lambda date: date.toordinal())
         # Wrap each feature value in a list to create a valid DataFrame
         input_df = pd.DataFrame({
@@ -51,6 +69,7 @@ def preprocess_input(input_data, method="ml"):
         input_scaled = scaler.fit_transform(input_df)  # Scaling the data
         return input_scaled
     elif method == "nlp":
         # For NLP-based prediction, concatenate features into a single paragraph
         claim_date = input_data['ClaimDate']
@@ -80,6 +99,77 @@ def preprocess_input(input_data, method="ml"):
         return inputs
 # Title and description for the app

 rf_model = joblib.load(rf_model_path)
 # Preprocessing for the user inputs
+# Preprocessing for the user inputs
+import streamlit as st
+import joblib
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+# Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
+nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP"  # replace with your Hugging Face model path
+nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
+nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)
+# Load the Random Forest model for ML-based prediction
+rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib")
+rf_model = joblib.load(rf_model_path)
 # Preprocessing for the user inputs
 def preprocess_input(input_data, method="ml"):
     if method == "ml":
         # For Random Forest prediction, apply necessary transformations like scaling or encoding.
         # Convert ClaimDate to ordinal (number of days since a particular date)
+        input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).dt.toordinal()
         # Wrap each feature value in a list to create a valid DataFrame
         input_df = pd.DataFrame({
         input_scaled = scaler.fit_transform(input_df)  # Scaling the data
         return input_scaled
     elif method == "nlp":
         # For NLP-based prediction, concatenate features into a single paragraph
         claim_date = input_data['ClaimDate']
         return inputs
+# Title and description for the app
+st.title("Insurance Claim Fraud Detection")
+st.write("""
+This app predicts whether an insurance claim is fraudulent or legitimate based on user input.
+You can choose between **ML-based prediction** or **NLP-based prediction**.
+""")
+# Buttons to select prediction method
+prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))
+# Input fields for the user (these should match your model features)
+claim_date = st.date_input("Enter the claim date")
+claim_amount = st.number_input("Enter the claim amount", min_value=0)
+patient_age = st.number_input("Enter the patient's age", min_value=0)
+patient_income = st.number_input("Enter the patient's income", min_value=0)
+patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
+provider_specialty = st.text_input("Enter the provider specialty")
+claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
+patient_marital_status = st.text_input("Enter the marital status")
+patient_employment_status = st.text_input("Enter the employment status")
+provider_location = st.text_input("Enter the provider location")
+claim_type = st.text_input("Enter the claim type")
+claim_submission_method = st.text_input("Enter the claim submission method")
+# ClaimLegitimacy is excluded from input (it’s the target that we want to predict)
+# claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"])
+# Create a button to trigger prediction
+if st.button('Predict'):
+    input_data = {
+        "ClaimDate": claim_date,
+        "ClaimAmount": claim_amount,
+        "PatientAge": patient_age,
+        "PatientIncome": patient_income,
+        "PatientGender": patient_gender,
+        "ProviderSpecialty": provider_specialty,
+        "ClaimStatus": claim_status,
+        "PatientMaritalStatus": patient_marital_status,
+        "PatientEmploymentStatus": patient_employment_status,
+        "ProviderLocation": provider_location,
+        "ClaimType": claim_type,
+        "ClaimSubmissionMethod": claim_submission_method,
+        # "ClaimLegitimacy": claim_legitimacy,  # Removed since it's the target we want to predict
+    }
+    # Preprocess the input data based on the selected method
+    if prediction_method == "ML Prediction":
+        input_scaled = preprocess_input(input_data, method="ml")
+        # Get the prediction from the ML model (Random Forest)
+        prediction = rf_model.predict(input_scaled)
+        if prediction == 1:
+            st.write("This claim is predicted to be **fraudulent** (ML model).")
+        else:
+            st.write("This claim is predicted to be **legitimate** (ML model).")
+    elif prediction_method == "NLP Prediction":
+        inputs = preprocess_input(input_data, method="nlp")
+        # Get the prediction from the NLP model (BERT)
+        with torch.no_grad():
+            logits = nlp_model(**inputs).logits
+        predicted_class = torch.argmax(logits, dim=-1).item()
+        if predicted_class == 1:
+            st.write("This claim is predicted to be **fraudulent** (NLP model).")
+        else:
+            st.write("This claim is predicted to be **legitimate** (NLP model).")
 # Title and description for the app