import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, classification_report import streamlit as st import altair as alt try: # Load the data df = pd.read_csv("fraud_data.csv") # Prepare the data for the model X = df[['TransactionAmount', 'CustomerAge', 'TransactionFrequency']] y = df['IsFraud'] except FileNotFoundError: st.write("Error: Data file not found.") st.stop() except Exception as e: st.write(f"An error occurred: {e}") st.stop() # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create and train a Random Forest Classifier model model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train) # Make predictions on the testing set y_pred = model.predict(X_test) # Evaluate the model's performance accuracy = accuracy_score(y_test, y_pred) report = classification_report(y_test, y_pred, output_dict=True) # Create a Streamlit app st.title("Fraud Detection System") # Create tabs tab1, tab2, tab3 = st.tabs(["Data Visualization", "Model Performance", "Fraud Prediction"]) # Tab 1: Data Visualization with tab1: st.write("### Fraud Data") st.write(df) # Scatter plot st.write("### Scatter Plot of Features") for col in ['TransactionAmount', 'CustomerAge', 'TransactionFrequency']: st.write(f"**{col} vs Fraudulent Transactions**") st.altair_chart( alt.Chart(df).mark_circle().encode( x=col, y='IsFraud', tooltip=[col, 'IsFraud'] ).interactive(), use_container_width=True ) # Tab 2: Model Performance with tab2: st.write("### Model Performance") st.write(f"Accuracy: {accuracy:.2f}") st.write("Classification Report:") st.json(report) # Tab 3: Fraud Prediction with tab3: st.write("### Predict Fraudulent Transactions") amount_input = st.number_input("Transaction Amount", min_value=1.0, value=100.0, step=1.0) age_input = st.number_input("Customer Age", min_value=18, value=30, step=1) frequency_input = st.slider("Transaction Frequency (past month)", min_value=1, max_value=100, value=5, step=1) if st.button("Predict"): # Create input array for prediction input_data = [[amount_input, age_input, frequency_input]] # Make prediction prediction = model.predict(input_data)[0] result = "Fraudulent" if prediction == 1 else "Legitimate" st.write(f"### Prediction: {result}")