CristopherWVSU commited on
Commit
cff8376
·
1 Parent(s): b86941d

Initial Push

Browse files
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import joblib
4
+ import json
5
+ from datetime import datetime
6
+
7
+ # Load trained model
8
+ iso_forest = joblib.load("isolation_forest_model.pkl")
9
+
10
+ # Load location mapping
11
+ with open("location_mapping.json", "r") as f:
12
+ location_mapping = json.load(f)
13
+
14
+ # Manual mapping for categorical variables
15
+ transaction_type_mapping = {"Debit": 0, "Credit": 1}
16
+ channel_mapping = {"ATM": 0, "Online": 1, "Branch": 2}
17
+ day_of_week_mapping = {"Monday": 0, "Tuesday": 1, "Wednesday": 2, "Thursday": 3, "Friday": 4, "Saturday": 5, "Sunday": 6}
18
+
19
+ st.title("Anomaly Detection for Bank Transactions")
20
+
21
+
22
+
23
+ # User inputs
24
+ date = st.date_input("Select Transaction Date")
25
+ time = st.time_input("Select Transaction Time")
26
+ location = st.selectbox("Select Location", options=list(location_mapping.keys()))
27
+ transaction_type = st.radio("Transaction Type", options=["Debit", "Credit"])
28
+ channel = st.radio("Transaction Channel", options=["ATM", "Online", "Branch"])
29
+ transaction_duration = st.slider("Transaction Duration (seconds)", min_value=0, max_value=600, value=30)
30
+ login_attempts = st.number_input("Login Attempts", min_value=0)
31
+ transaction_amount = st.number_input("Transaction Amount", min_value=0.0, format="%.2f")
32
+
33
+ if st.button("Check for Anomaly"):
34
+ # Convert date to day of the week
35
+ day_of_week = day_of_week_mapping[date.strftime('%A')]
36
+
37
+ # Convert time to total seconds since midnight
38
+ total_seconds = time.hour * 3600 + time.minute * 60
39
+
40
+ # Convert categorical values to numeric
41
+ location_encoded = location_mapping.get(location, -1) # Default to -1 if not found
42
+ transaction_type_encoded = transaction_type_mapping[transaction_type]
43
+ channel_encoded = channel_mapping[channel]
44
+
45
+ # Ensure the order of features matches training
46
+ input_data = pd.DataFrame([[
47
+ transaction_type_encoded, location_encoded, channel_encoded, total_seconds,
48
+ transaction_duration, login_attempts, day_of_week, transaction_amount # <-- Corrected order
49
+ ]], columns=[
50
+ "TransactionType", "Location", "Channel", "Time",
51
+ "TransactionDuration", "LoginAttempts", "DayOfWeek", "TransactionAmount" # <-- Corrected order
52
+ ])
53
+
54
+ # Predict anomaly
55
+ prediction = iso_forest.predict(input_data)[0]
56
+ anomaly_label = "Anomalous" if prediction == -1 else "Normal"
57
+
58
+ # Display result
59
+ st.write(f"### The transaction is: **{anomaly_label}**")
bank_transactions_data_2.csv ADDED
The diff for this file is too large to render. See raw diff
 
cleaned_transactions.csv ADDED
The diff for this file is too large to render. See raw diff
 
correlation_matrix.png ADDED
isolation_forest_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06ad59646c63b0d8d88c3a64149e475c4dcb7daba8a3518d2ef339e7e7fd83d
3
+ size 1607544
location_mapping.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Albuquerque": 0,
3
+ "Atlanta": 1,
4
+ "Austin": 2,
5
+ "Baltimore": 3,
6
+ "Boston": 4,
7
+ "Charlotte": 5,
8
+ "Chicago": 6,
9
+ "Colorado Springs": 7,
10
+ "Columbus": 8,
11
+ "Dallas": 9,
12
+ "Denver": 10,
13
+ "Detroit": 11,
14
+ "El Paso": 12,
15
+ "Fort Worth": 13,
16
+ "Fresno": 14,
17
+ "Houston": 15,
18
+ "Indianapolis": 16,
19
+ "Jacksonville": 17,
20
+ "Kansas City": 18,
21
+ "Las Vegas": 19,
22
+ "Los Angeles": 20,
23
+ "Louisville": 21,
24
+ "Memphis": 22,
25
+ "Mesa": 23,
26
+ "Miami": 24,
27
+ "Milwaukee": 25,
28
+ "Nashville": 26,
29
+ "New York": 27,
30
+ "Oklahoma City": 28,
31
+ "Omaha": 29,
32
+ "Philadelphia": 30,
33
+ "Phoenix": 31,
34
+ "Portland": 32,
35
+ "Raleigh": 33,
36
+ "Sacramento": 34,
37
+ "San Antonio": 35,
38
+ "San Diego": 36,
39
+ "San Francisco": 37,
40
+ "San Jose": 38,
41
+ "Seattle": 39,
42
+ "Tucson": 40,
43
+ "Virginia Beach": 41,
44
+ "Washington": 42
45
+ }
main.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ joblib
4
+ json
5
+ datetime
6
+ scikit-learn