Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
8 |
import torch
|
9 |
|
10 |
# Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
|
11 |
-
nlp_model_name = "tajuarAkash/
|
12 |
nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
|
13 |
nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)
|
14 |
|
@@ -33,10 +33,34 @@ def preprocess_input(input_data, method="ml"):
|
|
33 |
return input_scaled
|
34 |
|
35 |
elif method == "nlp":
|
36 |
-
# For NLP-based prediction,
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
return inputs
|
39 |
|
|
|
40 |
# Title and description for the app
|
41 |
st.title("Insurance Claim Fraud Detection")
|
42 |
st.write("""
|
@@ -48,26 +72,41 @@ You can choose between **ML-based prediction** or **NLP-based prediction**.
|
|
48 |
prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))
|
49 |
|
50 |
# Input fields for the user (these should match your model features)
|
|
|
51 |
claim_amount = st.number_input("Enter the claim amount", min_value=0)
|
52 |
patient_age = st.number_input("Enter the patient's age", min_value=0)
|
53 |
patient_income = st.number_input("Enter the patient's income", min_value=0)
|
54 |
patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
|
|
|
55 |
claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
#
|
59 |
if st.button('Predict'):
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
-
|
70 |
-
|
|
|
71 |
|
72 |
# Get the prediction from the ML model (Random Forest)
|
73 |
prediction = rf_model.predict(input_scaled)
|
@@ -78,12 +117,7 @@ if st.button('Predict'):
|
|
78 |
st.write("This claim is predicted to be **legitimate** (ML model).")
|
79 |
|
80 |
elif prediction_method == "NLP Prediction":
|
81 |
-
|
82 |
-
"generated_sentence_without_gemini": claim_text, # Text input for NLP
|
83 |
-
}
|
84 |
-
|
85 |
-
# Preprocess the input data for NLP
|
86 |
-
inputs = preprocess_input(input_data_nlp, method="nlp")
|
87 |
|
88 |
# Get the prediction from the NLP model (BERT)
|
89 |
with torch.no_grad():
|
|
|
8 |
import torch
|
9 |
|
10 |
# Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
|
11 |
+
nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest" # replace with your Hugging Face model path
|
12 |
nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
|
13 |
nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)
|
14 |
|
|
|
33 |
return input_scaled
|
34 |
|
35 |
elif method == "nlp":
|
36 |
+
# For NLP-based prediction, concatenate features into a single paragraph
|
37 |
+
claim_date = input_data['ClaimDate']
|
38 |
+
claim_amount = input_data['ClaimAmount']
|
39 |
+
patient_age = input_data['PatientAge']
|
40 |
+
patient_gender = input_data['PatientGender']
|
41 |
+
provider_specialty = input_data['ProviderSpecialty']
|
42 |
+
claim_status = input_data['ClaimStatus']
|
43 |
+
patient_income = input_data['PatientIncome']
|
44 |
+
patient_marital_status = input_data['PatientMaritalStatus']
|
45 |
+
patient_employment_status = input_data['PatientEmploymentStatus']
|
46 |
+
provider_location = input_data['ProviderLocation']
|
47 |
+
claim_type = input_data['ClaimType']
|
48 |
+
claim_submission_method = input_data['ClaimSubmissionMethod']
|
49 |
+
|
50 |
+
# Create a sentence (paragraph) using the input data
|
51 |
+
input_text = f"The claim date is {claim_date}, with a claim amount of {claim_amount}. " \
|
52 |
+
f"The patient is {patient_age} years old, and their gender is {patient_gender}. " \
|
53 |
+
f"The provider specialty is {provider_specialty}. The claim status is {claim_status}. " \
|
54 |
+
f"The patient's income is {patient_income}, marital status is {patient_marital_status}, " \
|
55 |
+
f"and employment status is {patient_employment_status}. The provider location is {provider_location}. " \
|
56 |
+
f"The claim type is {claim_type}, and the claim submission method is {claim_submission_method}. " \
|
57 |
+
f"Claim legitimacy: {input_data['ClaimLegitimacy']}."
|
58 |
+
|
59 |
+
# Tokenize the input text for NLP
|
60 |
+
inputs = nlp_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
61 |
return inputs
|
62 |
|
63 |
+
|
64 |
# Title and description for the app
|
65 |
st.title("Insurance Claim Fraud Detection")
|
66 |
st.write("""
|
|
|
72 |
prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))
|
73 |
|
74 |
# Input fields for the user (these should match your model features)
|
75 |
+
claim_date = st.date_input("Enter the claim date")
|
76 |
claim_amount = st.number_input("Enter the claim amount", min_value=0)
|
77 |
patient_age = st.number_input("Enter the patient's age", min_value=0)
|
78 |
patient_income = st.number_input("Enter the patient's income", min_value=0)
|
79 |
patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
|
80 |
+
provider_specialty = st.text_input("Enter the provider specialty")
|
81 |
claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
|
82 |
+
patient_marital_status = st.text_input("Enter the marital status")
|
83 |
+
patient_employment_status = st.text_input("Enter the employment status")
|
84 |
+
provider_location = st.text_input("Enter the provider location")
|
85 |
+
claim_type = st.text_input("Enter the claim type")
|
86 |
+
claim_submission_method = st.text_input("Enter the claim submission method")
|
87 |
+
claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"])
|
88 |
|
89 |
+
# Create a button to trigger prediction
|
90 |
if st.button('Predict'):
|
91 |
+
input_data = {
|
92 |
+
"ClaimDate": claim_date,
|
93 |
+
"ClaimAmount": claim_amount,
|
94 |
+
"PatientAge": patient_age,
|
95 |
+
"PatientIncome": patient_income,
|
96 |
+
"PatientGender": patient_gender,
|
97 |
+
"ProviderSpecialty": provider_specialty,
|
98 |
+
"ClaimStatus": claim_status,
|
99 |
+
"PatientMaritalStatus": patient_marital_status,
|
100 |
+
"PatientEmploymentStatus": patient_employment_status,
|
101 |
+
"ProviderLocation": provider_location,
|
102 |
+
"ClaimType": claim_type,
|
103 |
+
"ClaimSubmissionMethod": claim_submission_method,
|
104 |
+
"ClaimLegitimacy": claim_legitimacy,
|
105 |
+
}
|
106 |
|
107 |
+
# Preprocess the input data based on the selected method
|
108 |
+
if prediction_method == "ML Prediction":
|
109 |
+
input_scaled = preprocess_input(input_data, method="ml")
|
110 |
|
111 |
# Get the prediction from the ML model (Random Forest)
|
112 |
prediction = rf_model.predict(input_scaled)
|
|
|
117 |
st.write("This claim is predicted to be **legitimate** (ML model).")
|
118 |
|
119 |
elif prediction_method == "NLP Prediction":
|
120 |
+
inputs = preprocess_input(input_data, method="nlp")
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
# Get the prediction from the NLP model (BERT)
|
123 |
with torch.no_grad():
|