CristopherWVSU commited on
Commit
4ed9a17
Β·
1 Parent(s): 4e7f4de

Added more Models

Browse files
Anomaly_IF_counts.png ADDED
Anomaly_LOF_counts.png ADDED
Anomaly_OCSVM_counts.png ADDED
app.py CHANGED
@@ -2,58 +2,116 @@ import streamlit as st
2
  import pandas as pd
3
  import joblib
4
  import json
 
5
  from datetime import datetime
 
6
 
7
- # Load trained model
8
- iso_forest = joblib.load("isolation_forest_model.pkl")
9
 
10
- # Load location mapping
11
- with open("location_mapping.json", "r") as f:
12
- location_mapping = json.load(f)
13
 
14
- # Manual mapping for categorical variables
15
- transaction_type_mapping = {"Debit": 0, "Credit": 1}
16
- channel_mapping = {"ATM": 0, "Online": 1, "Branch": 2}
17
- day_of_week_mapping = {"Monday": 0, "Tuesday": 1, "Wednesday": 2, "Thursday": 3, "Friday": 4, "Saturday": 5, "Sunday": 6}
 
 
 
18
 
19
- st.title("Anomaly Detection for Bank Transactions")
 
 
20
 
 
 
 
 
21
 
 
22
 
23
- # User inputs
24
- date = st.date_input("Select Transaction Date")
25
- time = st.time_input("Select Transaction Time")
26
- location = st.selectbox("Select Location", options=list(location_mapping.keys()))
27
- transaction_type = st.radio("Transaction Type", options=["Debit", "Credit"])
28
- channel = st.radio("Transaction Channel", options=["ATM", "Online", "Branch"])
29
- transaction_duration = st.slider("Transaction Duration (seconds)", min_value=0, max_value=600, value=30)
30
- login_attempts = st.number_input("Login Attempts", min_value=0)
31
- transaction_amount = st.number_input("Transaction Amount", min_value=0.0, format="%.2f")
32
 
33
- if st.button("Check for Anomaly"):
34
- # Convert date to day of the week
35
- day_of_week = day_of_week_mapping[date.strftime('%A')]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Convert time to total seconds since midnight
38
- total_seconds = time.hour * 3600 + time.minute * 60
39
 
40
- # Convert categorical values to numeric
41
- location_encoded = location_mapping.get(location, -1) # Default to -1 if not found
42
- transaction_type_encoded = transaction_type_mapping[transaction_type]
43
- channel_encoded = channel_mapping[channel]
 
 
44
 
45
- # Ensure the order of features matches training
46
- input_data = pd.DataFrame([[
47
- transaction_type_encoded, location_encoded, channel_encoded, total_seconds,
48
- transaction_duration, login_attempts, day_of_week, transaction_amount # <-- Corrected order
49
- ]], columns=[
50
- "TransactionType", "Location", "Channel", "Time",
51
- "TransactionDuration", "LoginAttempts", "DayOfWeek", "TransactionAmount" # <-- Corrected order
52
- ])
53
-
54
- # Predict anomaly
55
- prediction = iso_forest.predict(input_data)[0]
56
- anomaly_label = "Anomalous" if prediction == -1 else "Normal"
57
 
58
- # Display result
59
- st.write(f"### The transaction is: **{anomaly_label}**")
 
 
 
 
 
 
 
 
 
 
 
2
  import pandas as pd
3
  import joblib
4
  import json
5
+ import numpy as np
6
  from datetime import datetime
7
+ from sklearn.neighbors import LocalOutlierFactor
8
 
 
 
9
 
10
+ # βœ… Tabs for Application & Model Evaluation
11
+ app, model_eval = st.tabs(["Application", "πŸ“Š Model Evaluation"])
 
12
 
13
+ # ---------------- APPLICATION TAB ---------------- #
14
+ with app:
15
+ # Load trained models
16
+ iso_forest = joblib.load("isolation_forest_model.pkl")
17
+ one_class_svm = joblib.load("one_class_svm_model.pkl")
18
+ lof_model = joblib.load("local_outlier_factor_model.pkl") # LOF model trained earlier
19
+ lof_threshold = joblib.load("lof_threshold.pkl") # Precomputed threshold for LOF
20
 
21
+ # Load location mapping
22
+ with open("location_mapping.json", "r") as f:
23
+ location_mapping = json.load(f)
24
 
25
+ # Manual mapping for categorical variables
26
+ transaction_type_mapping = {"Debit": 0, "Credit": 1}
27
+ channel_mapping = {"ATM": 0, "Online": 1, "Branch": 2}
28
+ day_of_week_mapping = {"Monday": 0, "Tuesday": 1, "Wednesday": 2, "Thursday": 3, "Friday": 4, "Saturday": 5, "Sunday": 6}
29
 
30
+ st.title("Anomaly Detection for Bank Transactions")
31
 
32
+ # Sidebar for model selection
33
+ model_choice = st.sidebar.radio("Select Anomaly Detection Model", ["Isolation Forest", "One-Class SVM", "Local Outlier Factor"])
 
 
 
 
 
 
 
34
 
35
+ # User inputs
36
+ date = st.date_input("Select Transaction Date")
37
+ time = st.time_input("Select Transaction Time")
38
+ location = st.selectbox("Select Location", options=list(location_mapping.keys()))
39
+ transaction_type = st.radio("Transaction Type", options=["Debit", "Credit"])
40
+ channel = st.radio("Transaction Channel", options=["ATM", "Online", "Branch"])
41
+ transaction_duration = st.slider("Transaction Duration (seconds)", min_value=0, max_value=600, value=30)
42
+ login_attempts = st.number_input("Login Attempts", min_value=0)
43
+ transaction_amount = st.number_input("Transaction Amount", min_value=0.0, format="%.2f")
44
+
45
+ if st.button("Check for Anomaly"):
46
+ # Convert date to day of the week
47
+ day_of_week = day_of_week_mapping[date.strftime('%A')]
48
+
49
+ # Convert time to total seconds since midnight
50
+ total_seconds = time.hour * 3600 + time.minute * 60
51
+
52
+ # Convert categorical values to numeric
53
+ location_encoded = location_mapping.get(location, -1) # Default to -1 if not found
54
+ transaction_type_encoded = transaction_type_mapping[transaction_type]
55
+ channel_encoded = channel_mapping[channel]
56
+
57
+ # Ensure the order of features matches training
58
+ input_data = pd.DataFrame([[
59
+ transaction_type_encoded, location_encoded, channel_encoded, total_seconds,
60
+ transaction_duration, login_attempts, day_of_week, transaction_amount
61
+ ]], columns=[
62
+ "TransactionType", "Location", "Channel", "Time",
63
+ "TransactionDuration", "LoginAttempts", "DayOfWeek", "TransactionAmount"
64
+ ])
65
+
66
+ if model_choice == "Isolation Forest":
67
+ prediction = iso_forest.predict(input_data)[0]
68
+ anomaly_label = "Anomalous" if prediction == -1 else "Normal"
69
+
70
+ elif model_choice == "One-Class SVM":
71
+ prediction = one_class_svm.predict(input_data)[0]
72
+ anomaly_label = "Anomalous" if prediction == -1 else "Normal"
73
+
74
+ elif model_choice == "Local Outlier Factor":
75
+ # Get the distance of input_data from the neighbors
76
+ distances, _ = lof_model.kneighbors(input_data)
77
+ avg_distance = np.mean(distances)
78
+
79
+ # Compare with the LOF threshold
80
+ anomaly_label = "Anomalous" if avg_distance > lof_threshold else "Normal"
81
+
82
+ # Display result
83
+ st.write(f"### The transaction is: **{anomaly_label}**")
84
+
85
+
86
+ # ---------------- MODEL EVALUATION TAB ---------------- #
87
+ with model_eval:
88
+ st.header("πŸ“Š Model Performance Metrics")
89
 
90
+ # Sidebar to choose which model's evaluation to display
91
+ eval_model_choice = st.sidebar.radio("Select Model for Evaluation", ["Isolation Forest", "One-Class SVM", "Local Outlier Factor"])
92
 
93
+ # Display evaluation metrics based on selected model
94
+ if eval_model_choice == "Isolation Forest":
95
+ st.image("Anomaly_IF_counts.png", caption="Anomaly Counts - Isolation Forest", use_column_width=True)
96
+ st.write("### πŸ“Œ Isolation Forest Performance")
97
+ st.write("- Detects anomalies based on random sub-sampling of data.")
98
+ st.write("- **Lower False Positives** in structured transaction data.")
99
 
100
+ elif eval_model_choice == "One-Class SVM":
101
+ st.image("Anomaly_OCSVM_counts.png", caption="Anomaly Counts - One-Class SVM", use_column_width=True)
102
+ st.write("### πŸ“Œ One-Class SVM Performance")
103
+ st.write("- Uses a hyperplane to separate normal from anomalous data.")
104
+ st.write("- **Better suited for small datasets** but may be computationally expensive.")
 
 
 
 
 
 
 
105
 
106
+ elif eval_model_choice == "Local Outlier Factor":
107
+ st.image("Anomaly_LOF_counts.png", caption="Anomaly Counts - Local Outlier Factor", use_column_width=True)
108
+ st.write("### πŸ“Œ Local Outlier Factor (LOF) Performance")
109
+ st.write("- Uses density-based analysis to detect anomalies.")
110
+ st.write("- **Best for identifying local anomalies**, but requires careful tuning of `k-neighbors`.")
111
+
112
+ st.image("silhouette_scores.png", caption="Silhouette Scores for All Models", use_column_width=True)
113
+
114
+ st.header("Comparison")
115
+ st.write("OCSVM (One-Class SVM) is likely overfitting or being too aggressive in marking transactions as anomalies. Its silhouette score of 0.00 suggests poor cluster structure, meaning it does not effectively separate normal vs. anomalous transactions. LOF (Local Outlier Factor) is the best-performing model because it has the highest silhouette score (0.16), indicating it maintains a clear distinction between anomalies and normal transactions. Isolation Forest (IF) is a close second, performing slightly worse than LOF but still better than OCSVM.")
116
+
117
+
boxplot_transaction_amount.png ADDED
cleaned_transactions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
local_outlier_factor_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55b4d234fd78b07e802b61b9bb3068e689292f69d3c01e720e89d2c659eb2cdd
3
+ size 806085
lof_threshold.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2368780ca28a9edb8eddf0fd571b650efa2623d358a7a0e183f1ee12a16945c7
3
+ size 116
main.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
one_class_svm_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0d2d2ac14a8d63c7718450c87429541c614e87f9e4bba136596c3a0f438b49e
3
+ size 212863
silhouette_scores.png ADDED