File size: 4,070 Bytes
3adc61e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Custom Streamlit styling with sticky navbar
st.markdown(
"""
<style>
body {
background-color: #1E1E1E;
color: #FFFFFF;
font-family: 'Arial', sans-serif;
}
.stButton>button {
background-color: #4A90E2;
color: #FFFFFF;
border-radius: 15px;
padding: 12px 24px;
font-size: 16px;
font-weight: bold;
}
.title {
color: #64FFDA;
text-shadow: 1px 1px #FF4C4C;
}
.stTabs [data-testid="stHorizontalBlock"] {
position: sticky;
top: 0;
background-color: #1E1E1E;
z-index: 10;
}
</style>
""",
unsafe_allow_html=True
)
# Load the Telco Customer Churn dataset
st.title("๐ฒ Telco Customer Churn Prediction")
st.markdown("<h2 class='title'>Predict whether a customer will churn! ๐</h2>", unsafe_allow_html=True)
# Load dataset
file_path = 'WA_Fn-UseC_-Telco-Customer-Churn.csv'
df = pd.read_csv(file_path)
# Preprocess data and train model (runs once)
df = df[['tenure', 'MonthlyCharges', 'TotalCharges', 'Churn']]
df = df.replace(" ", np.nan).dropna()
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'])
df['Churn'] = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)
# Define features and target
X = df.drop('Churn', axis=1)
y = df['Churn']
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train Support Vector Machine Model
model = SVC(kernel='linear', probability=True, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Top Tabs Navigation
tab1, tab2, tab3 = st.tabs(["๐ Dataset", "๐ Visualization", "๐ฎ Prediction"])
# Dataset Section
with tab1:
st.write("### ๐ Dataset Preview")
st.dataframe(df.head())
# Visualization Section
with tab2:
# Display model performance
accuracy = accuracy_score(y_test, y_pred)
st.write("### ๐ฅ Model Performance")
st.write(f"**โ
Model Accuracy:** {accuracy:.2f}")
# Visualizing performance
st.write("### ๐ Performance Breakdown")
conf_matrix = confusion_matrix(y_test, y_pred)
st.write("Confusion Matrix:")
fig, ax = plt.subplots()
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='coolwarm', ax=ax)
st.pyplot(fig)
# Prediction Section
with tab3:
st.write("### ๐ฎ Predict Customer Churn")
st.markdown("Adjust the stats below to simulate a customer scenario!")
tenure = st.slider("Customer Tenure (Months)", min_value=0, max_value=72, value=12)
monthly_charges = st.slider("Monthly Charges ($)", min_value=0, max_value=200, value=50)
total_charges = st.slider("Total Charges ($)", min_value=0, max_value=10000, value=600)
if st.button("โจ Predict Churn"):
input_data = scaler.transform([[tenure, monthly_charges, total_charges]])
prediction = model.predict(input_data)[0]
prediction_proba = model.predict_proba(input_data)[0]
st.subheader("๐ฎ Prediction Result")
result_text = "๐จ Customer is likely to CHURN!" if prediction == 1 else "โ
Customer is likely to STAY."
st.success(result_text) if prediction == 0 else st.error(result_text)
st.write(f"Confidence: {prediction_proba[prediction]:.2f}")
# Churn/Stay Bar Chart
st.write("### ๐ Churn Probability Breakdown")
fig, ax = plt.subplots()
ax.bar(["Stay", "Churn"], [prediction_proba[0], prediction_proba[1]], color=["#64FFDA", "#FF4C4C"])
ax.set_ylim(0, 1)
ax.set_ylabel("Probability")
ax.set_title("Customer Churn Probability")
st.pyplot(fig)
|