Spaces:
Running
Running
Commit
Β·
4ed9a17
1
Parent(s):
4e7f4de
Added more Models
Browse files- Anomaly_IF_counts.png +0 -0
- Anomaly_LOF_counts.png +0 -0
- Anomaly_OCSVM_counts.png +0 -0
- app.py +100 -42
- boxplot_transaction_amount.png +0 -0
- cleaned_transactions.csv +0 -0
- local_outlier_factor_model.pkl +3 -0
- lof_threshold.pkl +3 -0
- main.ipynb +0 -0
- one_class_svm_model.pkl +3 -0
- silhouette_scores.png +0 -0
Anomaly_IF_counts.png
ADDED
![]() |
Anomaly_LOF_counts.png
ADDED
![]() |
Anomaly_OCSVM_counts.png
ADDED
![]() |
app.py
CHANGED
@@ -2,58 +2,116 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
import joblib
|
4 |
import json
|
|
|
5 |
from datetime import datetime
|
|
|
6 |
|
7 |
-
# Load trained model
|
8 |
-
iso_forest = joblib.load("isolation_forest_model.pkl")
|
9 |
|
10 |
-
#
|
11 |
-
|
12 |
-
location_mapping = json.load(f)
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
18 |
|
19 |
-
|
|
|
|
|
20 |
|
|
|
|
|
|
|
|
|
21 |
|
|
|
22 |
|
23 |
-
#
|
24 |
-
|
25 |
-
time = st.time_input("Select Transaction Time")
|
26 |
-
location = st.selectbox("Select Location", options=list(location_mapping.keys()))
|
27 |
-
transaction_type = st.radio("Transaction Type", options=["Debit", "Credit"])
|
28 |
-
channel = st.radio("Transaction Channel", options=["ATM", "Online", "Branch"])
|
29 |
-
transaction_duration = st.slider("Transaction Duration (seconds)", min_value=0, max_value=600, value=30)
|
30 |
-
login_attempts = st.number_input("Login Attempts", min_value=0)
|
31 |
-
transaction_amount = st.number_input("Transaction Amount", min_value=0.0, format="%.2f")
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
"TransactionType", "Location", "Channel", "Time",
|
51 |
-
"TransactionDuration", "LoginAttempts", "DayOfWeek", "TransactionAmount" # <-- Corrected order
|
52 |
-
])
|
53 |
-
|
54 |
-
# Predict anomaly
|
55 |
-
prediction = iso_forest.predict(input_data)[0]
|
56 |
-
anomaly_label = "Anomalous" if prediction == -1 else "Normal"
|
57 |
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import pandas as pd
|
3 |
import joblib
|
4 |
import json
|
5 |
+
import numpy as np
|
6 |
from datetime import datetime
|
7 |
+
from sklearn.neighbors import LocalOutlierFactor
|
8 |
|
|
|
|
|
9 |
|
10 |
+
# β
Tabs for Application & Model Evaluation
|
11 |
+
app, model_eval = st.tabs(["Application", "π Model Evaluation"])
|
|
|
12 |
|
13 |
+
# ---------------- APPLICATION TAB ---------------- #
|
14 |
+
with app:
|
15 |
+
# Load trained models
|
16 |
+
iso_forest = joblib.load("isolation_forest_model.pkl")
|
17 |
+
one_class_svm = joblib.load("one_class_svm_model.pkl")
|
18 |
+
lof_model = joblib.load("local_outlier_factor_model.pkl") # LOF model trained earlier
|
19 |
+
lof_threshold = joblib.load("lof_threshold.pkl") # Precomputed threshold for LOF
|
20 |
|
21 |
+
# Load location mapping
|
22 |
+
with open("location_mapping.json", "r") as f:
|
23 |
+
location_mapping = json.load(f)
|
24 |
|
25 |
+
# Manual mapping for categorical variables
|
26 |
+
transaction_type_mapping = {"Debit": 0, "Credit": 1}
|
27 |
+
channel_mapping = {"ATM": 0, "Online": 1, "Branch": 2}
|
28 |
+
day_of_week_mapping = {"Monday": 0, "Tuesday": 1, "Wednesday": 2, "Thursday": 3, "Friday": 4, "Saturday": 5, "Sunday": 6}
|
29 |
|
30 |
+
st.title("Anomaly Detection for Bank Transactions")
|
31 |
|
32 |
+
# Sidebar for model selection
|
33 |
+
model_choice = st.sidebar.radio("Select Anomaly Detection Model", ["Isolation Forest", "One-Class SVM", "Local Outlier Factor"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
# User inputs
|
36 |
+
date = st.date_input("Select Transaction Date")
|
37 |
+
time = st.time_input("Select Transaction Time")
|
38 |
+
location = st.selectbox("Select Location", options=list(location_mapping.keys()))
|
39 |
+
transaction_type = st.radio("Transaction Type", options=["Debit", "Credit"])
|
40 |
+
channel = st.radio("Transaction Channel", options=["ATM", "Online", "Branch"])
|
41 |
+
transaction_duration = st.slider("Transaction Duration (seconds)", min_value=0, max_value=600, value=30)
|
42 |
+
login_attempts = st.number_input("Login Attempts", min_value=0)
|
43 |
+
transaction_amount = st.number_input("Transaction Amount", min_value=0.0, format="%.2f")
|
44 |
+
|
45 |
+
if st.button("Check for Anomaly"):
|
46 |
+
# Convert date to day of the week
|
47 |
+
day_of_week = day_of_week_mapping[date.strftime('%A')]
|
48 |
+
|
49 |
+
# Convert time to total seconds since midnight
|
50 |
+
total_seconds = time.hour * 3600 + time.minute * 60
|
51 |
+
|
52 |
+
# Convert categorical values to numeric
|
53 |
+
location_encoded = location_mapping.get(location, -1) # Default to -1 if not found
|
54 |
+
transaction_type_encoded = transaction_type_mapping[transaction_type]
|
55 |
+
channel_encoded = channel_mapping[channel]
|
56 |
+
|
57 |
+
# Ensure the order of features matches training
|
58 |
+
input_data = pd.DataFrame([[
|
59 |
+
transaction_type_encoded, location_encoded, channel_encoded, total_seconds,
|
60 |
+
transaction_duration, login_attempts, day_of_week, transaction_amount
|
61 |
+
]], columns=[
|
62 |
+
"TransactionType", "Location", "Channel", "Time",
|
63 |
+
"TransactionDuration", "LoginAttempts", "DayOfWeek", "TransactionAmount"
|
64 |
+
])
|
65 |
+
|
66 |
+
if model_choice == "Isolation Forest":
|
67 |
+
prediction = iso_forest.predict(input_data)[0]
|
68 |
+
anomaly_label = "Anomalous" if prediction == -1 else "Normal"
|
69 |
+
|
70 |
+
elif model_choice == "One-Class SVM":
|
71 |
+
prediction = one_class_svm.predict(input_data)[0]
|
72 |
+
anomaly_label = "Anomalous" if prediction == -1 else "Normal"
|
73 |
+
|
74 |
+
elif model_choice == "Local Outlier Factor":
|
75 |
+
# Get the distance of input_data from the neighbors
|
76 |
+
distances, _ = lof_model.kneighbors(input_data)
|
77 |
+
avg_distance = np.mean(distances)
|
78 |
+
|
79 |
+
# Compare with the LOF threshold
|
80 |
+
anomaly_label = "Anomalous" if avg_distance > lof_threshold else "Normal"
|
81 |
+
|
82 |
+
# Display result
|
83 |
+
st.write(f"### The transaction is: **{anomaly_label}**")
|
84 |
+
|
85 |
+
|
86 |
+
# ---------------- MODEL EVALUATION TAB ---------------- #
|
87 |
+
with model_eval:
|
88 |
+
st.header("π Model Performance Metrics")
|
89 |
|
90 |
+
# Sidebar to choose which model's evaluation to display
|
91 |
+
eval_model_choice = st.sidebar.radio("Select Model for Evaluation", ["Isolation Forest", "One-Class SVM", "Local Outlier Factor"])
|
92 |
|
93 |
+
# Display evaluation metrics based on selected model
|
94 |
+
if eval_model_choice == "Isolation Forest":
|
95 |
+
st.image("Anomaly_IF_counts.png", caption="Anomaly Counts - Isolation Forest", use_column_width=True)
|
96 |
+
st.write("### π Isolation Forest Performance")
|
97 |
+
st.write("- Detects anomalies based on random sub-sampling of data.")
|
98 |
+
st.write("- **Lower False Positives** in structured transaction data.")
|
99 |
|
100 |
+
elif eval_model_choice == "One-Class SVM":
|
101 |
+
st.image("Anomaly_OCSVM_counts.png", caption="Anomaly Counts - One-Class SVM", use_column_width=True)
|
102 |
+
st.write("### π One-Class SVM Performance")
|
103 |
+
st.write("- Uses a hyperplane to separate normal from anomalous data.")
|
104 |
+
st.write("- **Better suited for small datasets** but may be computationally expensive.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
elif eval_model_choice == "Local Outlier Factor":
|
107 |
+
st.image("Anomaly_LOF_counts.png", caption="Anomaly Counts - Local Outlier Factor", use_column_width=True)
|
108 |
+
st.write("### π Local Outlier Factor (LOF) Performance")
|
109 |
+
st.write("- Uses density-based analysis to detect anomalies.")
|
110 |
+
st.write("- **Best for identifying local anomalies**, but requires careful tuning of `k-neighbors`.")
|
111 |
+
|
112 |
+
st.image("silhouette_scores.png", caption="Silhouette Scores for All Models", use_column_width=True)
|
113 |
+
|
114 |
+
st.header("Comparison")
|
115 |
+
st.write("OCSVM (One-Class SVM) is likely overfitting or being too aggressive in marking transactions as anomalies. Its silhouette score of 0.00 suggests poor cluster structure, meaning it does not effectively separate normal vs. anomalous transactions. LOF (Local Outlier Factor) is the best-performing model because it has the highest silhouette score (0.16), indicating it maintains a clear distinction between anomalies and normal transactions. Isolation Forest (IF) is a close second, performing slightly worse than LOF but still better than OCSVM.")
|
116 |
+
|
117 |
+
|
boxplot_transaction_amount.png
ADDED
![]() |
cleaned_transactions.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
local_outlier_factor_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55b4d234fd78b07e802b61b9bb3068e689292f69d3c01e720e89d2c659eb2cdd
|
3 |
+
size 806085
|
lof_threshold.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2368780ca28a9edb8eddf0fd571b650efa2623d358a7a0e183f1ee12a16945c7
|
3 |
+
size 116
|
main.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
one_class_svm_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0d2d2ac14a8d63c7718450c87429541c614e87f9e4bba136596c3a0f438b49e
|
3 |
+
size 212863
|
silhouette_scores.png
ADDED
![]() |