Spaces:

Markndrei
/

fraud_detection_model

Running

App Files Files Community

Markndrei commited on Mar 3

Commit

f460ec4

verified ·

1 Parent(s): 256cd18

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -85

app.py CHANGED Viewed

@@ -1,85 +1,85 @@
-import pandas as pd
-import numpy as np
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score, classification_report
-import streamlit as st
-import altair as alt
-try:
-    # Load the data
-    df = pd.read_csv("fraud_data.csv")
-    # Prepare the data for the model
-    X = df[['TransactionAmount', 'CustomerAge', 'TransactionFrequency']]
-    y = df['IsFraud']
-except FileNotFoundError:
-    st.write("Error: Data file not found.")
-    st.stop()
-except Exception as e:
-    st.write(f"An error occurred: {e}")
-    st.stop()
-# Split the data into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-# Create and train a Random Forest Classifier model
-model = RandomForestClassifier(n_estimators=100, random_state=42)
-model.fit(X_train, y_train)
-# Make predictions on the testing set
-y_pred = model.predict(X_test)
-# Evaluate the model's performance
-accuracy = accuracy_score(y_test, y_pred)
-report = classification_report(y_test, y_pred, output_dict=True)
-# Create a Streamlit app
-st.title("Fraud Detection System")
-# Create tabs
-tab1, tab2, tab3 = st.tabs(["Data Visualization", "Model Performance", "Fraud Prediction"])
-# Tab 1: Data Visualization
-with tab1:
-    st.write("### Fraud Data")
-    st.write(df)
-    # Scatter plot
-    st.write("### Scatter Plot of Features")
-    for col in ['TransactionAmount', 'CustomerAge', 'TransactionFrequency']:
-        st.write(f"**{col} vs Fraudulent Transactions**")
-        st.altair_chart(
-            alt.Chart(df).mark_circle().encode(
-                x=col,
-                y='IsFraud',
-                tooltip=[col, 'IsFraud']
-            ).interactive(),
-            use_container_width=True
-        )
-# Tab 2: Model Performance
-with tab2:
-    st.write("### Model Performance")
-    st.write(f"Accuracy: {accuracy:.2f}")
-    st.write("Classification Report:")
-    st.json(report)
-# Tab 3: Fraud Prediction
-with tab3:
-    st.write("### Predict Fraudulent Transactions")
-    amount_input = st.number_input("Transaction Amount", min_value=1.0, value=100.0, step=1.0)
-    age_input = st.number_input("Customer Age", min_value=18, value=30, step=1)
-    frequency_input = st.slider("Transaction Frequency (past month)", min_value=1, max_value=100, value=5, step=1)
-    if st.button("Predict"):
-        # Create input array for prediction
-        input_data = [[amount_input, age_input, frequency_input]]
-        # Make prediction
-        prediction = model.predict(input_data)[0]
-        result = "Fraudulent" if prediction == 1 else "Legitimate"
-        st.write(f"### Prediction: {result}")

+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, classification_report
+from datasets import load_dataset
+# Load dataset from Hugging Face
+dataset = load_dataset("Nooha/cc_fraud_detection_dataset", split="train")
+df = pd.DataFrame(dataset)
+# Select relevant features and target variable
+X = df[['Amount', 'Time', 'V1', 'V2', 'V3']]
+y = df['Class']
+# Split dataset into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Train a RandomForestClassifier model
+model = RandomForestClassifier(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
+y_pred = model.predict(X_test)
+# Model Performance Metrics
+accuracy = accuracy_score(y_test, y_pred)
+class_report_df = pd.DataFrame(classification_report(y_test, y_pred, output_dict=True)).transpose()
+# Application Title
+st.title('💳 Credit Card Fraud Detection System')
+st.markdown(
+    """
+    ## 📖 Introduction
+    Welcome to the **Credit Card Fraud Detection System**! This tool analyzes credit card transactions to detect fraudulent activity using a **Random Forest model**.
+    """
+)
+# Tab Structure
+tab1, tab2, tab3 = st.tabs(['📊 Dataset Preview', '📈 Model Performance', '🔍 Fraud Prediction'])
+# Dataset Preview
+with tab1:
+    st.markdown(
+        """
+        ## 📊 Dataset Preview
+        Below is a sample of the credit card transaction dataset used for fraud detection.
+        """
+    )
+    st.dataframe(df.head())
+# Model Performance
+with tab2:
+    st.markdown(
+        """
+        ## 📈 Model Performance
+        - **Accuracy:** Measures overall model performance.
+        - **Classification Report:** Precision, recall, and F1-score breakdown.
+        """
+    )
+    st.write(f"**📌 Model Accuracy:** {accuracy:.2%}")
+    st.markdown("### 📋 Classification Report")
+    st.dataframe(class_report_df)
+# Fraud Prediction
+with tab3:
+    st.markdown("""
+        ## 🔍 Fraud Prediction
+        Enter transaction details below to predict if it's fraudulent.
+        """)
+    amount_input = st.number_input("💵 Transaction Amount", min_value=0.0, value=100.0, step=1.0)
+    time_input = st.number_input("⏳ Transaction Time", min_value=0.0, value=50000.0, step=1000.0)
+    v1_input = st.number_input("🔢 Feature V1", value=0.0, step=0.1)
+    v2_input = st.number_input("🔢 Feature V2", value=0.0, step=0.1)
+    v3_input = st.number_input("🔢 Feature V3", value=0.0, step=0.1)
+    if st.button("🔎 Predict Fraud"):
+        input_data = np.array([[amount_input, time_input, v1_input, v2_input, v3_input]])
+        prediction = model.predict(input_data)[0]
+        result = "🚨 Fraudulent" if prediction == 1 else "✅ Legitimate"
+        st.success(f"### 🎯 Prediction: **{result}**")