File size: 6,367 Bytes
cff8376
 
 
 
4ed9a17
cff8376
4ed9a17
cff8376
 
4ed9a17
 
cff8376
4ed9a17
 
 
 
 
 
 
cff8376
4ed9a17
 
 
cff8376
4ed9a17
 
 
 
cff8376
4ed9a17
cff8376
4ed9a17
 
cff8376
4ed9a17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eceb2da
 
 
 
cff8376
4ed9a17
 
cff8376
4ed9a17
 
 
 
 
 
cff8376
4ed9a17
 
 
 
 
cff8376
4ed9a17
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import streamlit as st
import pandas as pd
import joblib
import json
import numpy as np
from datetime import datetime
from sklearn.neighbors import LocalOutlierFactor


# βœ… Tabs for Application & Model Evaluation
app, model_eval = st.tabs(["Application", "πŸ“Š Model Evaluation"])

# ---------------- APPLICATION TAB ---------------- #
with app:
    # Load trained models
    iso_forest = joblib.load("isolation_forest_model.pkl")
    one_class_svm = joblib.load("one_class_svm_model.pkl")
    lof_model = joblib.load("local_outlier_factor_model.pkl")  # LOF model trained earlier
    lof_threshold = joblib.load("lof_threshold.pkl")  # Precomputed threshold for LOF

    # Load location mapping
    with open("location_mapping.json", "r") as f:
        location_mapping = json.load(f)

    # Manual mapping for categorical variables
    transaction_type_mapping = {"Debit": 0, "Credit": 1}
    channel_mapping = {"ATM": 0, "Online": 1, "Branch": 2}
    day_of_week_mapping = {"Monday": 0, "Tuesday": 1, "Wednesday": 2, "Thursday": 3, "Friday": 4, "Saturday": 5, "Sunday": 6}

    st.title("Anomaly Detection for Bank Transactions")

    # Sidebar for model selection
    model_choice = st.sidebar.radio("Select Anomaly Detection Model", ["Isolation Forest", "One-Class SVM", "Local Outlier Factor"])

    # User inputs
    date = st.date_input("Select Transaction Date")
    time = st.time_input("Select Transaction Time")
    location = st.selectbox("Select Location", options=list(location_mapping.keys()))
    transaction_type = st.radio("Transaction Type", options=["Debit", "Credit"])
    channel = st.radio("Transaction Channel", options=["ATM", "Online", "Branch"])
    transaction_duration = st.slider("Transaction Duration (seconds)", min_value=0, max_value=600, value=30)
    login_attempts = st.number_input("Login Attempts", min_value=0)
    transaction_amount = st.number_input("Transaction Amount", min_value=0.0, format="%.2f")

    if st.button("Check for Anomaly"):
        # Convert date to day of the week
        day_of_week = day_of_week_mapping[date.strftime('%A')]
        
        # Convert time to total seconds since midnight
        total_seconds = time.hour * 3600 + time.minute * 60
        
        # Convert categorical values to numeric
        location_encoded = location_mapping.get(location, -1)  # Default to -1 if not found
        transaction_type_encoded = transaction_type_mapping[transaction_type]
        channel_encoded = channel_mapping[channel]
        
        # Ensure the order of features matches training
        input_data = pd.DataFrame([[
            transaction_type_encoded, location_encoded, channel_encoded, total_seconds,  
            transaction_duration, login_attempts, day_of_week, transaction_amount  
        ]], columns=[
            "TransactionType", "Location", "Channel", "Time", 
            "TransactionDuration", "LoginAttempts", "DayOfWeek", "TransactionAmount"
        ])

        if model_choice == "Isolation Forest":
            prediction = iso_forest.predict(input_data)[0]
            anomaly_label = "Anomalous" if prediction == -1 else "Normal"
        
        elif model_choice == "One-Class SVM":
            prediction = one_class_svm.predict(input_data)[0]
            anomaly_label = "Anomalous" if prediction == -1 else "Normal"
        
        elif model_choice == "Local Outlier Factor":
            # Get the distance of input_data from the neighbors
            distances, _ = lof_model.kneighbors(input_data)
            avg_distance = np.mean(distances)
            
            # Compare with the LOF threshold
            anomaly_label = "Anomalous" if avg_distance > lof_threshold else "Normal"
        
        # Display result
        st.write(f"### The transaction is: **{anomaly_label}**")


# ---------------- MODEL EVALUATION TAB ---------------- #
with model_eval:
    st.header("Model Evaluation")
    st.write("The Anomaly Detection model was trained to classify bank transactions as 'Anomalous' or 'Normal'. The dataset was taken from Kaggle.")
    st.write("Dataset by Vala Khorasani : [Kaggle Link](https://www.kaggle.com/datasets/valakhorasani/bank-transaction-dataset-for-fraud-detection)")

    
    # Sidebar to choose which model's evaluation to display
    eval_model_choice = st.sidebar.radio("Select Model for Evaluation", ["Isolation Forest", "One-Class SVM", "Local Outlier Factor"])
    
    # Display evaluation metrics based on selected model
    if eval_model_choice == "Isolation Forest":
        st.image("Anomaly_IF_counts.png", caption="Anomaly Counts - Isolation Forest", use_column_width=True)
        st.write("### πŸ“Œ Isolation Forest Performance")
        st.write("- Detects anomalies based on random sub-sampling of data.")
        st.write("- **Lower False Positives** in structured transaction data.")
    
    elif eval_model_choice == "One-Class SVM":
        st.image("Anomaly_OCSVM_counts.png", caption="Anomaly Counts - One-Class SVM", use_column_width=True)
        st.write("### πŸ“Œ One-Class SVM Performance")
        st.write("- Uses a hyperplane to separate normal from anomalous data.")
        st.write("- **Better suited for small datasets** but may be computationally expensive.")
    
    elif eval_model_choice == "Local Outlier Factor":
        st.image("Anomaly_LOF_counts.png", caption="Anomaly Counts - Local Outlier Factor", use_column_width=True)
        st.write("### πŸ“Œ Local Outlier Factor (LOF) Performance")
        st.write("- Uses density-based analysis to detect anomalies.")
        st.write("- **Best for identifying local anomalies**, but requires careful tuning of `k-neighbors`.")
    
    st.image("silhouette_scores.png", caption="Silhouette Scores for All Models", use_column_width=True)
    
    st.header("Comparison")
    st.write("OCSVM (One-Class SVM) is likely overfitting or being too aggressive in marking transactions as anomalies. Its silhouette score of 0.00 suggests poor cluster structure, meaning it does not effectively separate normal vs. anomalous transactions. LOF (Local Outlier Factor) is the best-performing model because it has the highest silhouette score (0.16), indicating it maintains a clear distinction between anomalies and normal transactions. Isolation Forest (IF) is a close second, performing slightly worse than LOF but still better than OCSVM.")