Spaces:
Sleeping
Sleeping
File size: 4,997 Bytes
96ef6c7 9fb0d26 96ef6c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import streamlit as st
from datasets import load_dataset
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from huggingface_hub import hf_hub_download
# Load the dataset
dataset = load_dataset("louiecerv/customer_churn")["train"]
# Define repository details
repo_id = "louiecerv/churn_prediction_model"
filename = "churn_prediction_model.pkl"
# Download and cache the model automatically
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
# Load the model
with open(model_path, "rb") as f:
model = pickle.load(f)
def main():
# Streamlit app
st.title("Customer Churn Prediction App")
about = """## π€ Hugging Face for Machine Learning
This app showcases the power of **Hugging Face** for ML applications:
- π **Datasets**: Easily access and share datasets.
- π§ **Models**: Download and use pre-trained models or upload your own.
- π **Spaces**: Deploy your app effortlessly.
π **Explore Hugging Face** and build your own ML-powered projects!
---
π *Developed with Streamlit & Hugging Face π€*
**Created by: Louie F. Cervantes, M.Eng. (Information Engineering) (c) 2025 West Visayas State University**
"""
with st.expander("About thiss app"):
st.markdown(about)
# --- Dataset Exploration ---
st.header("Dataset Exploration")
st.write("Let's explore the customer churn dataset:")
# Display dataset information
st.subheader("Dataset Sample")
st.write(dataset.to_pandas().head())
# Split data
# Convert dataset to Pandas DataFrame
df = dataset.to_pandas()
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
# Define X_train and y_train
X_train = train_data.drop('churn', axis=1)
y_train = train_data['churn']
# Show dataset size
st.write(f"**Dataset Size:** {len(dataset)} rows")
# Visualize churn distribution
st.subheader("Churn Distribution")
fig, ax = plt.subplots()
sns.countplot(x='churn', data=dataset.to_pandas(), ax=ax)
ax.set_xticks([0, 1]) # Set tick locations
ax.set_xticklabels(["No Churn", "Churn"])
st.pyplot(fig)
# --- Feature Importance ---
st.header("Feature Importance")
st.write("Understanding which features contribute most to churn prediction:")
# Get feature importances (coefficients)
feature_importance = pd.DataFrame({'Feature': X_train.columns,
'Importance': model.coef_[0]})
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
# Plot feature importances
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance, ax=ax)
st.pyplot(fig)
# --- Churn Prediction ---
st.header("Predict Customer Churn")
st.write("Enter customer data to predict churn:")
# Input features
tenure = st.number_input("Tenure (months)", min_value=1, step=1)
monthly_charges = st.number_input("Monthly Charges", min_value=0.0, step=0.01)
total_charges = st.number_input("Total Charges", min_value=0.0, step=0.01)
churn = st.selectbox("Churn", ['Yes', 'No'])
contract = st.selectbox("Contract", ['Month-to-month', 'One year', 'Two year'])
internet_service = st.selectbox("Internet Service", ['DSL', 'Fiber optic', 'No'])
# Preprocess input features
input_features = {
'tenure': tenure,
'monthly_charges': monthly_charges,
'total_charges': total_charges,
'contract_One year': int(contract == 'One year'),
'contract_Two year': int(contract == 'Two year'),
'internet_service_Fiber optic': int(internet_service == 'Fiber optic'),
'internet_service_No': int(internet_service == 'No')
}
input_df = pd.DataFrame([input_features])
# Ensure feature names match those used during training
train_columns = X_train.columns
input_df = input_df[train_columns] # Reorder columns to match train data
# Make prediction
if st.button("Predict Churn"):
prediction = model.predict(input_df)[0]
probability = model.predict_proba(input_df)[0][1] # Probability of churn
if prediction == 1:
st.write("This customer is likely to **churn**.")
else:
st.write("This customer is likely to **stay**.")
st.write(f"Churn Probability: {probability:.2f}")
# --- Hugging Face Explanation ---
st.header("Hugging Face for Machine Learning")
st.write(
"""
This app showcases the power of Hugging Face for building ML applications.
- **Datasets:** Easily access and share datasets.
- **Models:** Download and use pre-trained models or upload your own.
- **Spaces:** Deploy your app with a simple interface.
Explore Hugging Face and build your own amazing ML projects!
"""
)
if __name__ == "__main__":
main() |