tajuarAkash commited on
Commit
2393287
·
verified ·
1 Parent(s): c20ddd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -2
app.py CHANGED
@@ -18,13 +18,31 @@ rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_dete
18
  rf_model = joblib.load(rf_model_path)
19
 
20
  # Preprocessing for the user inputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Preprocessing for the user inputs
22
  def preprocess_input(input_data, method="ml"):
23
  if method == "ml":
24
  # For Random Forest prediction, apply necessary transformations like scaling or encoding.
25
-
26
  # Convert ClaimDate to ordinal (number of days since a particular date)
27
- input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).apply(lambda date: date.toordinal())
28
 
29
  # Wrap each feature value in a list to create a valid DataFrame
30
  input_df = pd.DataFrame({
@@ -51,6 +69,7 @@ def preprocess_input(input_data, method="ml"):
51
  input_scaled = scaler.fit_transform(input_df) # Scaling the data
52
 
53
  return input_scaled
 
54
  elif method == "nlp":
55
  # For NLP-based prediction, concatenate features into a single paragraph
56
  claim_date = input_data['ClaimDate']
@@ -80,6 +99,77 @@ def preprocess_input(input_data, method="ml"):
80
  return inputs
81
 
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
 
85
  # Title and description for the app
 
18
  rf_model = joblib.load(rf_model_path)
19
 
20
  # Preprocessing for the user inputs
21
+ # Preprocessing for the user inputs
22
+ import streamlit as st
23
+ import joblib
24
+ import numpy as np
25
+ import pandas as pd
26
+ from sklearn.preprocessing import StandardScaler
27
+ from huggingface_hub import hf_hub_download
28
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
29
+ import torch
30
+
31
+ # Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
32
+ nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" # replace with your Hugging Face model path
33
+ nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
34
+ nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)
35
+
36
+ # Load the Random Forest model for ML-based prediction
37
+ rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib")
38
+ rf_model = joblib.load(rf_model_path)
39
+
40
  # Preprocessing for the user inputs
41
  def preprocess_input(input_data, method="ml"):
42
  if method == "ml":
43
  # For Random Forest prediction, apply necessary transformations like scaling or encoding.
 
44
  # Convert ClaimDate to ordinal (number of days since a particular date)
45
+ input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).dt.toordinal()
46
 
47
  # Wrap each feature value in a list to create a valid DataFrame
48
  input_df = pd.DataFrame({
 
69
  input_scaled = scaler.fit_transform(input_df) # Scaling the data
70
 
71
  return input_scaled
72
+
73
  elif method == "nlp":
74
  # For NLP-based prediction, concatenate features into a single paragraph
75
  claim_date = input_data['ClaimDate']
 
99
  return inputs
100
 
101
 
102
+ # Title and description for the app
103
+ st.title("Insurance Claim Fraud Detection")
104
+ st.write("""
105
+ This app predicts whether an insurance claim is fraudulent or legitimate based on user input.
106
+ You can choose between **ML-based prediction** or **NLP-based prediction**.
107
+ """)
108
+
109
+ # Buttons to select prediction method
110
+ prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))
111
+
112
+ # Input fields for the user (these should match your model features)
113
+ claim_date = st.date_input("Enter the claim date")
114
+ claim_amount = st.number_input("Enter the claim amount", min_value=0)
115
+ patient_age = st.number_input("Enter the patient's age", min_value=0)
116
+ patient_income = st.number_input("Enter the patient's income", min_value=0)
117
+ patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
118
+ provider_specialty = st.text_input("Enter the provider specialty")
119
+ claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
120
+ patient_marital_status = st.text_input("Enter the marital status")
121
+ patient_employment_status = st.text_input("Enter the employment status")
122
+ provider_location = st.text_input("Enter the provider location")
123
+ claim_type = st.text_input("Enter the claim type")
124
+ claim_submission_method = st.text_input("Enter the claim submission method")
125
+ # ClaimLegitimacy is excluded from input (it’s the target that we want to predict)
126
+ # claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"])
127
+
128
+ # Create a button to trigger prediction
129
+ if st.button('Predict'):
130
+ input_data = {
131
+ "ClaimDate": claim_date,
132
+ "ClaimAmount": claim_amount,
133
+ "PatientAge": patient_age,
134
+ "PatientIncome": patient_income,
135
+ "PatientGender": patient_gender,
136
+ "ProviderSpecialty": provider_specialty,
137
+ "ClaimStatus": claim_status,
138
+ "PatientMaritalStatus": patient_marital_status,
139
+ "PatientEmploymentStatus": patient_employment_status,
140
+ "ProviderLocation": provider_location,
141
+ "ClaimType": claim_type,
142
+ "ClaimSubmissionMethod": claim_submission_method,
143
+ # "ClaimLegitimacy": claim_legitimacy, # Removed since it's the target we want to predict
144
+ }
145
+
146
+ # Preprocess the input data based on the selected method
147
+ if prediction_method == "ML Prediction":
148
+ input_scaled = preprocess_input(input_data, method="ml")
149
+
150
+ # Get the prediction from the ML model (Random Forest)
151
+ prediction = rf_model.predict(input_scaled)
152
+
153
+ if prediction == 1:
154
+ st.write("This claim is predicted to be **fraudulent** (ML model).")
155
+ else:
156
+ st.write("This claim is predicted to be **legitimate** (ML model).")
157
+
158
+ elif prediction_method == "NLP Prediction":
159
+ inputs = preprocess_input(input_data, method="nlp")
160
+
161
+ # Get the prediction from the NLP model (BERT)
162
+ with torch.no_grad():
163
+ logits = nlp_model(**inputs).logits
164
+ predicted_class = torch.argmax(logits, dim=-1).item()
165
+
166
+ if predicted_class == 1:
167
+ st.write("This claim is predicted to be **fraudulent** (NLP model).")
168
+ else:
169
+ st.write("This claim is predicted to be **legitimate** (NLP model).")
170
+
171
+
172
+
173
 
174
 
175
  # Title and description for the app