Markndrei commited on
Commit
818bac1
ยท
verified ยท
1 Parent(s): f460ec4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -76
app.py CHANGED
@@ -1,85 +1,172 @@
1
  import streamlit as st
 
2
  import pandas as pd
 
3
  import numpy as np
4
- from sklearn.ensemble import RandomForestClassifier
 
 
 
 
5
  from sklearn.model_selection import train_test_split
6
- from sklearn.metrics import accuracy_score, classification_report
7
- from datasets import load_dataset
8
 
9
- # Load dataset from Hugging Face
10
- dataset = load_dataset("Nooha/cc_fraud_detection_dataset", split="train")
11
- df = pd.DataFrame(dataset)
12
-
13
- # Select relevant features and target variable
14
- X = df[['Amount', 'Time', 'V1', 'V2', 'V3']]
15
- y = df['Class']
16
-
17
- # Split dataset into training and testing sets
18
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
19
-
20
- # Train a RandomForestClassifier model
21
- model = RandomForestClassifier(n_estimators=100, random_state=42)
22
- model.fit(X_train, y_train)
23
-
24
- y_pred = model.predict(X_test)
25
-
26
- # Model Performance Metrics
27
- accuracy = accuracy_score(y_test, y_pred)
28
- class_report_df = pd.DataFrame(classification_report(y_test, y_pred, output_dict=True)).transpose()
29
-
30
- # Application Title
31
- st.title('๐Ÿ’ณ Credit Card Fraud Detection System')
32
-
33
- st.markdown(
34
- """
35
- ## ๐Ÿ“– Introduction
36
- Welcome to the **Credit Card Fraud Detection System**! This tool analyzes credit card transactions to detect fraudulent activity using a **Random Forest model**.
37
- """
38
- )
39
-
40
- # Tab Structure
41
- tab1, tab2, tab3 = st.tabs(['๐Ÿ“Š Dataset Preview', '๐Ÿ“ˆ Model Performance', '๐Ÿ” Fraud Prediction'])
42
-
43
- # Dataset Preview
44
- with tab1:
45
- st.markdown(
46
- """
47
- ## ๐Ÿ“Š Dataset Preview
48
- Below is a sample of the credit card transaction dataset used for fraud detection.
49
- """
50
- )
51
- st.dataframe(df.head())
52
-
53
- # Model Performance
54
- with tab2:
55
- st.markdown(
56
- """
57
- ## ๐Ÿ“ˆ Model Performance
58
- - **Accuracy:** Measures overall model performance.
59
- - **Classification Report:** Precision, recall, and F1-score breakdown.
60
- """
61
- )
62
 
63
- st.write(f"**๐Ÿ“Œ Model Accuracy:** {accuracy:.2%}")
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- st.markdown("### ๐Ÿ“‹ Classification Report")
66
- st.dataframe(class_report_df)
67
-
68
- # Fraud Prediction
69
- with tab3:
70
- st.markdown("""
71
- ## ๐Ÿ” Fraud Prediction
72
- Enter transaction details below to predict if it's fraudulent.
73
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- amount_input = st.number_input("๐Ÿ’ต Transaction Amount", min_value=0.0, value=100.0, step=1.0)
76
- time_input = st.number_input("โณ Transaction Time", min_value=0.0, value=50000.0, step=1000.0)
77
- v1_input = st.number_input("๐Ÿ”ข Feature V1", value=0.0, step=0.1)
78
- v2_input = st.number_input("๐Ÿ”ข Feature V2", value=0.0, step=0.1)
79
- v3_input = st.number_input("๐Ÿ”ข Feature V3", value=0.0, step=0.1)
80
-
81
- if st.button("๐Ÿ”Ž Predict Fraud"):
82
- input_data = np.array([[amount_input, time_input, v1_input, v2_input, v3_input]])
83
- prediction = model.predict(input_data)[0]
84
- result = "๐Ÿšจ Fraudulent" if prediction == 1 else "โœ… Legitimate"
85
- st.success(f"### ๐ŸŽฏ Prediction: **{result}**")
 
1
  import streamlit as st
2
+ from datasets import load_dataset
3
  import pandas as pd
4
+ import joblib
5
  import numpy as np
6
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+ import altair as alt
10
+ from sklearn.preprocessing import StandardScaler
11
  from sklearn.model_selection import train_test_split
 
 
12
 
13
+ # Cache the dataset and model to avoid reloading on every visit
14
+ @st.cache_data
15
+ def load_data():
16
+ dataset = load_dataset("Nooha/cc_fraud_detection_dataset")
17
+ df = pd.DataFrame(dataset['train'])
18
+ df = df.rename(columns={'Class': 'is_fraud'})
19
+ return df
20
+
21
+ @st.cache_resource
22
+ def load_model():
23
+ return joblib.load("cc_fraud_model.pkl")
24
+
25
+ @st.cache_resource
26
+ def load_scaler():
27
+ return joblib.load("cc_fraud_scaler.pkl")
28
+
29
+ # Feature explanations
30
+ feature_info = {
31
+ "city_pop": "City Population - The number of residents in the city where the transaction took place. Example: 5000, 250000, 1000000.",
32
+ "cc_num": "Credit Card Number (Anonymized) - A unique identifier for the credit card used. Example: 1234567890123456, 9876543210987654.",
33
+ "unix_time": "Transaction Timestamp in Unix Time - Represents the time since January 1, 1970. Example: 1625097600 (2021-07-01 00:00:00 UTC).",
34
+ "amt": "Transaction Amount - The amount spent in the transaction. Example: 5.99, 100.50, 999.99.",
35
+ "acct_num": "Account Number (Anonymized) - A unique identifier for the linked bank account. Example: 1122334455, 9988776655.",
36
+ "zip": "Zip Code of Transaction Location - The postal code where the transaction occurred. Example: 10001 (NY), 94105 (SF)."
37
+ }
38
+
39
+ def get_random_choices(df, feature, num_choices=5):
40
+ return np.random.choice(df[feature].dropna().unique(), num_choices, replace=False).tolist()
41
+
42
+ def main():
43
+ st.title("๐Ÿ’ณ Credit Card Fraud Detection Application")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ with st.expander("๐Ÿ” **About This Application**", expanded=False):
46
+ st.markdown("""
47
+ This application is designed to help you detect fraudulent credit card transactions using machine learning. ๐Ÿš€
48
+ It uses the **Nooha/cc_fraud_detection_dataset** from Hugging Face, which contains anonymized credit card transactions.
49
+ """)
50
+
51
+ with st.expander("โš ๏ธ **Why Fraud Detection Matters**", expanded=False):
52
+ st.markdown("""
53
+ ๐Ÿ’ฐ Credit card fraud is a significant issue in the financial industry, costing billions of dollars annually.
54
+ Detecting fraudulent transactions in real-time is crucial to prevent financial losses and protect customers. ๐Ÿ”
55
+ This app demonstrates how machine learning can be used to identify suspicious transactions.
56
+ """)
57
 
58
+ with st.expander("โš™๏ธ **How It Works**", expanded=False):
59
+ st.markdown("""
60
+ ๐Ÿ›  **Features of this application:**
61
+ 1. ๐Ÿ“Š **Dataset Preview**: Explore the dataset used to train the model.
62
+ 2. ๐Ÿ“ˆ **Model Performance**: Evaluate the performance of the trained model using accuracy, classification reports, and a confusion matrix.
63
+ 3. ๐Ÿ”Ž **Test Prediction**: Input transaction details and get real-time predictions on whether the transaction is fraudulent or legitimate.
64
+
65
+ โœ… Let's get started!
66
  """)
67
+
68
+ df = load_data()
69
+ model = load_model()
70
+ scaler = load_scaler()
71
+
72
+ numeric_df = df.select_dtypes(include=['number'])
73
+ X = numeric_df.drop(columns=['is_fraud'])
74
+ y = numeric_df['is_fraud']
75
+
76
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
77
+ X_train_scaled = scaler.transform(X_train)
78
+ X_test_scaled = scaler.transform(X_test)
79
+
80
+ tab1, tab2, tab3 = st.tabs(["๐Ÿ“„ Dataset Preview", "๐Ÿ“Š Model Performance", "๐Ÿ” Fraud Prediction"])
81
+
82
+ with tab1:
83
+ st.header("๐Ÿ“„ Dataset Overview")
84
+ col1, col2 = st.columns(2)
85
+ with col1:
86
+ st.dataframe(df.head(20))
87
+ with col2:
88
+ st.metric("๐Ÿ›’ Total Transactions", f"{len(df):,}")
89
+ st.metric("๐Ÿšจ Fraudulent Transactions", f"{df['is_fraud'].sum():,} ({df['is_fraud'].mean() * 100:.2f}%)")
90
+
91
+ chart = alt.Chart(df).mark_bar().encode(
92
+ x=alt.X('is_fraud:O', title='Fraud Status'),
93
+ y=alt.Y('count()', title='Count'),
94
+ color=alt.Color('is_fraud:N', scale=alt.Scale(domain=[0, 1], range=['green', 'red']))
95
+ )
96
+ st.altair_chart(chart, use_container_width=True)
97
+
98
+ with tab2:
99
+ st.header("๐Ÿ“Š Model Performance")
100
+ y_pred = model.predict(X_test_scaled)
101
+ accuracy = accuracy_score(y_test, y_pred)
102
+ st.metric("๐ŸŽฏ Model Accuracy", f"{accuracy:.4f}")
103
+
104
+ report_dict = classification_report(y_test, y_pred, target_names=['Not Fraud', 'Fraud'], output_dict=True)
105
+ report_df = pd.DataFrame(report_dict).T.round(3)
106
+ st.dataframe(report_df.style.format("{:.3f}"))
107
+
108
+ cm = confusion_matrix(y_test, y_pred)
109
+ fig, ax = plt.subplots()
110
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Fraud', 'Fraud'], yticklabels=['Not Fraud', 'Fraud'])
111
+ plt.xlabel("Predicted")
112
+ plt.ylabel("Actual")
113
+ st.pyplot(fig)
114
+
115
+ with tab3:
116
+ st.header("๐Ÿ” Fraud Prediction")
117
+ st.markdown("๐Ÿ’ก Select transaction details below.")
118
+
119
+ # Define feature descriptions
120
+ feature_descriptions = {
121
+ "acct_num": "๐Ÿ“Œ **Account Number** - Unique identifier for the transaction account.",
122
+ "amt": "๐Ÿ’ฐ **Transaction Amount** - The total amount involved in the transaction.",
123
+ "unix_time": "โณ **Unix Timestamp** - The time when the transaction occurred (in Unix format).",
124
+ "zip": "๐Ÿ“ฎ **ZIP Code** - Postal code for the transaction location.",
125
+ "city_pop": "๐ŸŒ† **City Population** - The number of residents in the city where the transaction took place.",
126
+ "cc_num": "๐Ÿ’ณ **Credit Card Number** - Anonymized credit card number used for the transaction."
127
+ }
128
+
129
+ available_features = X.columns.tolist()
130
+
131
+ # Feature selection UI
132
+ selected_features = st.multiselect("๐ŸŽ›๏ธ Select Features to Use", available_features, default=available_features[:3])
133
+
134
+ # Display descriptions of selected features
135
+ for feature in selected_features:
136
+ st.markdown(feature_descriptions.get(feature, "โ„น๏ธ No description available for this feature."))
137
+
138
+ input_data = {}
139
+
140
+ # Ensure all required columns are present
141
+ for feature in X.columns:
142
+ if feature not in input_data:
143
+ input_data[feature] = 0 # Default value
144
+
145
+ input_df = pd.DataFrame([input_data])
146
+
147
+ col1, col2 = st.columns(2)
148
+ for i, feature in enumerate(selected_features):
149
+ choices = get_random_choices(df, feature)
150
+ with (col1 if i % 2 == 0 else col2):
151
+ input_data[feature] = st.selectbox(f"๐Ÿ”ข {feature}", choices)
152
+
153
+ if st.button("๐Ÿš€ Predict Fraudulence"):
154
+ input_df = pd.DataFrame([input_data])
155
+ input_scaled = scaler.transform(input_df)
156
+ prediction = model.predict(input_scaled)
157
+ confidence = model.predict_proba(input_scaled)[0]
158
+
159
+ st.subheader("๐Ÿง Prediction Result")
160
+ if prediction[0] == 1:
161
+ st.toast("๐Ÿšจ Fraudulent Transaction Detected! ๐Ÿ”ด", icon='โš ๏ธ')
162
+ st.error("This transaction is likely fraudulent.")
163
+ else:
164
+ st.toast("โœ… Legitimate Transaction ๐ŸŸข", icon='โœ”๏ธ')
165
+ st.success("This transaction appears legitimate.")
166
+
167
+ st.progress(int(max(confidence) * 100))
168
+ st.write(f"๐ŸŽฏ **Confidence:** {max(confidence) * 100:.2f}%")
169
+
170
 
171
+ if __name__ == "__main__":
172
+ main()