Michael Rey
initial commit
3adc61e
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Custom Streamlit styling with sticky navbar
st.markdown(
"""
<style>
body {
background-color: #1E1E1E;
color: #FFFFFF;
font-family: 'Arial', sans-serif;
}
.stButton>button {
background-color: #4A90E2;
color: #FFFFFF;
border-radius: 15px;
padding: 12px 24px;
font-size: 16px;
font-weight: bold;
}
.title {
color: #64FFDA;
text-shadow: 1px 1px #FF4C4C;
}
.stTabs [data-testid="stHorizontalBlock"] {
position: sticky;
top: 0;
background-color: #1E1E1E;
z-index: 10;
}
</style>
""",
unsafe_allow_html=True
)
# Load the Telco Customer Churn dataset
st.title("๐Ÿ“ฒ Telco Customer Churn Prediction")
st.markdown("<h2 class='title'>Predict whether a customer will churn! ๐Ÿš€</h2>", unsafe_allow_html=True)
# Load dataset
file_path = 'WA_Fn-UseC_-Telco-Customer-Churn.csv'
df = pd.read_csv(file_path)
# Preprocess data and train model (runs once)
df = df[['tenure', 'MonthlyCharges', 'TotalCharges', 'Churn']]
df = df.replace(" ", np.nan).dropna()
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'])
df['Churn'] = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)
# Define features and target
X = df.drop('Churn', axis=1)
y = df['Churn']
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train Support Vector Machine Model
model = SVC(kernel='linear', probability=True, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Top Tabs Navigation
tab1, tab2, tab3 = st.tabs(["๐Ÿ“Š Dataset", "๐Ÿ“ˆ Visualization", "๐Ÿ”ฎ Prediction"])
# Dataset Section
with tab1:
st.write("### ๐Ÿ“Š Dataset Preview")
st.dataframe(df.head())
# Visualization Section
with tab2:
# Display model performance
accuracy = accuracy_score(y_test, y_pred)
st.write("### ๐Ÿ”ฅ Model Performance")
st.write(f"**โœ… Model Accuracy:** {accuracy:.2f}")
# Visualizing performance
st.write("### ๐Ÿ“Š Performance Breakdown")
conf_matrix = confusion_matrix(y_test, y_pred)
st.write("Confusion Matrix:")
fig, ax = plt.subplots()
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='coolwarm', ax=ax)
st.pyplot(fig)
# Prediction Section
with tab3:
st.write("### ๐Ÿ”ฎ Predict Customer Churn")
st.markdown("Adjust the stats below to simulate a customer scenario!")
tenure = st.slider("Customer Tenure (Months)", min_value=0, max_value=72, value=12)
monthly_charges = st.slider("Monthly Charges ($)", min_value=0, max_value=200, value=50)
total_charges = st.slider("Total Charges ($)", min_value=0, max_value=10000, value=600)
if st.button("โœจ Predict Churn"):
input_data = scaler.transform([[tenure, monthly_charges, total_charges]])
prediction = model.predict(input_data)[0]
prediction_proba = model.predict_proba(input_data)[0]
st.subheader("๐Ÿ”ฎ Prediction Result")
result_text = "๐Ÿšจ Customer is likely to CHURN!" if prediction == 1 else "โœ… Customer is likely to STAY."
st.success(result_text) if prediction == 0 else st.error(result_text)
st.write(f"Confidence: {prediction_proba[prediction]:.2f}")
# Churn/Stay Bar Chart
st.write("### ๐Ÿ“Š Churn Probability Breakdown")
fig, ax = plt.subplots()
ax.bar(["Stay", "Churn"], [prediction_proba[0], prediction_proba[1]], color=["#64FFDA", "#FF4C4C"])
ax.set_ylim(0, 1)
ax.set_ylabel("Probability")
ax.set_title("Customer Churn Probability")
st.pyplot(fig)