File size: 2,800 Bytes
256cd18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import streamlit as st
import altair as alt

try:
    # Load the data
    df = pd.read_csv("fraud_data.csv")

    # Prepare the data for the model
    X = df[['TransactionAmount', 'CustomerAge', 'TransactionFrequency']]
    y = df['IsFraud']

except FileNotFoundError:
    st.write("Error: Data file not found.")
    st.stop()

except Exception as e:
    st.write(f"An error occurred: {e}")
    st.stop()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a Random Forest Classifier model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=True)

# Create a Streamlit app
st.title("Fraud Detection System")

# Create tabs
tab1, tab2, tab3 = st.tabs(["Data Visualization", "Model Performance", "Fraud Prediction"])

# Tab 1: Data Visualization
with tab1:
    st.write("### Fraud Data")
    st.write(df)

    # Scatter plot
    st.write("### Scatter Plot of Features")
    for col in ['TransactionAmount', 'CustomerAge', 'TransactionFrequency']:
        st.write(f"**{col} vs Fraudulent Transactions**")
        st.altair_chart(
            alt.Chart(df).mark_circle().encode(
                x=col,
                y='IsFraud',
                tooltip=[col, 'IsFraud']
            ).interactive(),
            use_container_width=True
        )

# Tab 2: Model Performance
with tab2:
    st.write("### Model Performance")
    st.write(f"Accuracy: {accuracy:.2f}")
    st.write("Classification Report:")
    st.json(report)

# Tab 3: Fraud Prediction
with tab3:
    st.write("### Predict Fraudulent Transactions")
    amount_input = st.number_input("Transaction Amount", min_value=1.0, value=100.0, step=1.0)
    age_input = st.number_input("Customer Age", min_value=18, value=30, step=1)
    frequency_input = st.slider("Transaction Frequency (past month)", min_value=1, max_value=100, value=5, step=1)

    if st.button("Predict"):
        # Create input array for prediction
        input_data = [[amount_input, age_input, frequency_input]]
        
        # Make prediction
        prediction = model.predict(input_data)[0]
        result = "Fraudulent" if prediction == 1 else "Legitimate"
        
        st.write(f"### Prediction: {result}")