Spaces:

tajuarAkash
/

Heath_Insurance_Fraud_Prediction

Sleeping

App Files Files Community

tajuarAkash commited on Feb 11

Commit

6d0e637

verified ·

1 Parent(s): f0aefbc

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -21

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 # Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
-nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP"  # replace with your Hugging Face model path
 nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
 nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)
@@ -33,10 +33,34 @@ def preprocess_input(input_data, method="ml"):
         return input_scaled
     elif method == "nlp":
-        # For NLP-based prediction, tokenize the input for BERT model
-        inputs = nlp_tokenizer(input_data['generated_sentence_without_gemini'], return_tensors="pt", padding=True, truncation=True, max_length=512)
         return inputs
 # Title and description for the app
 st.title("Insurance Claim Fraud Detection")
 st.write("""
@@ -48,26 +72,41 @@ You can choose between **ML-based prediction** or **NLP-based prediction**.
 prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))
 # Input fields for the user (these should match your model features)
 claim_amount = st.number_input("Enter the claim amount", min_value=0)
 patient_age = st.number_input("Enter the patient's age", min_value=0)
 patient_income = st.number_input("Enter the patient's income", min_value=0)
 patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
 claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
-claim_text = st.text_area("Enter the claim description (for NLP prediction)")
-# Button to trigger prediction
 if st.button('Predict'):
-    if prediction_method == "ML Prediction":
-        input_data_ml = {
-            "ClaimAmount": [claim_amount],
-            "PatientAge": [patient_age],
-            "PatientIncome": [patient_income],
-            "PatientGender": [patient_gender],
-            "ClaimStatus": [claim_status],
-        }
-        # Preprocess the input data for ML
-        input_scaled = preprocess_input(input_data_ml, method="ml")
         # Get the prediction from the ML model (Random Forest)
         prediction = rf_model.predict(input_scaled)
@@ -78,12 +117,7 @@ if st.button('Predict'):
             st.write("This claim is predicted to be **legitimate** (ML model).")
     elif prediction_method == "NLP Prediction":
-        input_data_nlp = {
-            "generated_sentence_without_gemini": claim_text,  # Text input for NLP
-        }
-        # Preprocess the input data for NLP
-        inputs = preprocess_input(input_data_nlp, method="nlp")
         # Get the prediction from the NLP model (BERT)
         with torch.no_grad():

 import torch
 # Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
+nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest"  # replace with your Hugging Face model path
 nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
 nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)
         return input_scaled
     elif method == "nlp":
+        # For NLP-based prediction, concatenate features into a single paragraph
+        claim_date = input_data['ClaimDate']
+        claim_amount = input_data['ClaimAmount']
+        patient_age = input_data['PatientAge']
+        patient_gender = input_data['PatientGender']
+        provider_specialty = input_data['ProviderSpecialty']
+        claim_status = input_data['ClaimStatus']
+        patient_income = input_data['PatientIncome']
+        patient_marital_status = input_data['PatientMaritalStatus']
+        patient_employment_status = input_data['PatientEmploymentStatus']
+        provider_location = input_data['ProviderLocation']
+        claim_type = input_data['ClaimType']
+        claim_submission_method = input_data['ClaimSubmissionMethod']
+        # Create a sentence (paragraph) using the input data
+        input_text = f"The claim date is {claim_date}, with a claim amount of {claim_amount}. " \
+                     f"The patient is {patient_age} years old, and their gender is {patient_gender}. " \
+                     f"The provider specialty is {provider_specialty}. The claim status is {claim_status}. " \
+                     f"The patient's income is {patient_income}, marital status is {patient_marital_status}, " \
+                     f"and employment status is {patient_employment_status}. The provider location is {provider_location}. " \
+                     f"The claim type is {claim_type}, and the claim submission method is {claim_submission_method}. " \
+                     f"Claim legitimacy: {input_data['ClaimLegitimacy']}."
+        # Tokenize the input text for NLP
+        inputs = nlp_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
         return inputs
 # Title and description for the app
 st.title("Insurance Claim Fraud Detection")
 st.write("""
 prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))
 # Input fields for the user (these should match your model features)
+claim_date = st.date_input("Enter the claim date")
 claim_amount = st.number_input("Enter the claim amount", min_value=0)
 patient_age = st.number_input("Enter the patient's age", min_value=0)
 patient_income = st.number_input("Enter the patient's income", min_value=0)
 patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
+provider_specialty = st.text_input("Enter the provider specialty")
 claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
+patient_marital_status = st.text_input("Enter the marital status")
+patient_employment_status = st.text_input("Enter the employment status")
+provider_location = st.text_input("Enter the provider location")
+claim_type = st.text_input("Enter the claim type")
+claim_submission_method = st.text_input("Enter the claim submission method")
+claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"])
+# Create a button to trigger prediction
 if st.button('Predict'):
+    input_data = {
+        "ClaimDate": claim_date,
+        "ClaimAmount": claim_amount,
+        "PatientAge": patient_age,
+        "PatientIncome": patient_income,
+        "PatientGender": patient_gender,
+        "ProviderSpecialty": provider_specialty,
+        "ClaimStatus": claim_status,
+        "PatientMaritalStatus": patient_marital_status,
+        "PatientEmploymentStatus": patient_employment_status,
+        "ProviderLocation": provider_location,
+        "ClaimType": claim_type,
+        "ClaimSubmissionMethod": claim_submission_method,
+        "ClaimLegitimacy": claim_legitimacy,
+    }
+    # Preprocess the input data based on the selected method
+    if prediction_method == "ML Prediction":
+        input_scaled = preprocess_input(input_data, method="ml")
         # Get the prediction from the ML model (Random Forest)
         prediction = rf_model.predict(input_scaled)
             st.write("This claim is predicted to be **legitimate** (ML model).")
     elif prediction_method == "NLP Prediction":
+        inputs = preprocess_input(input_data, method="nlp")
         # Get the prediction from the NLP model (BERT)
         with torch.no_grad():