File size: 4,997 Bytes
96ef6c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fb0d26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96ef6c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import streamlit as st
from datasets import load_dataset
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from huggingface_hub import hf_hub_download

# Load the dataset
dataset = load_dataset("louiecerv/customer_churn")["train"]

# Define repository details
repo_id = "louiecerv/churn_prediction_model"
filename = "churn_prediction_model.pkl"

# Download and cache the model automatically
model_path = hf_hub_download(repo_id=repo_id, filename=filename)

# Load the model
with open(model_path, "rb") as f:
    model = pickle.load(f)

def main():
    # Streamlit app
    st.title("Customer Churn Prediction App")

    about = """## πŸ€— Hugging Face for Machine Learning

This app showcases the power of **Hugging Face** for ML applications:
- πŸ“‚ **Datasets**: Easily access and share datasets.
- 🧠 **Models**: Download and use pre-trained models or upload your own.
- 🌍 **Spaces**: Deploy your app effortlessly.

πŸ‘‰ **Explore Hugging Face** and build your own ML-powered projects!

---
πŸš€ *Developed with Streamlit & Hugging Face πŸ€—*

**Created by: Louie F. Cervantes, M.Eng. (Information Engineering) (c) 2025 West Visayas State University**
"""
    with st.expander("About thiss app"):
        st.markdown(about)
    
    # --- Dataset Exploration ---
    st.header("Dataset Exploration")
    st.write("Let's explore the customer churn dataset:")

    # Display dataset information
    st.subheader("Dataset Sample")
    st.write(dataset.to_pandas().head())

    # Split data
    # Convert dataset to Pandas DataFrame
    df = dataset.to_pandas()
    train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

    # Define X_train and y_train
    X_train = train_data.drop('churn', axis=1)
    y_train = train_data['churn']

    # Show dataset size
    st.write(f"**Dataset Size:** {len(dataset)} rows")

    # Visualize churn distribution
    st.subheader("Churn Distribution")
    fig, ax = plt.subplots()
    sns.countplot(x='churn', data=dataset.to_pandas(), ax=ax)
    ax.set_xticks([0, 1])  # Set tick locations
    ax.set_xticklabels(["No Churn", "Churn"])
    st.pyplot(fig)

    # --- Feature Importance ---
    st.header("Feature Importance")
    st.write("Understanding which features contribute most to churn prediction:")

    # Get feature importances (coefficients)
    feature_importance = pd.DataFrame({'Feature': X_train.columns,
                                    'Importance': model.coef_[0]})
    feature_importance = feature_importance.sort_values(by='Importance', ascending=False)

    # Plot feature importances
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.barplot(x='Importance', y='Feature', data=feature_importance, ax=ax)
    st.pyplot(fig)

    # --- Churn Prediction ---
    st.header("Predict Customer Churn")
    st.write("Enter customer data to predict churn:")

    # Input features
    tenure = st.number_input("Tenure (months)", min_value=1, step=1)
    monthly_charges = st.number_input("Monthly Charges", min_value=0.0, step=0.01)
    total_charges = st.number_input("Total Charges", min_value=0.0, step=0.01)
    churn = st.selectbox("Churn", ['Yes', 'No'])
    contract = st.selectbox("Contract", ['Month-to-month', 'One year', 'Two year'])
    internet_service = st.selectbox("Internet Service", ['DSL', 'Fiber optic', 'No'])

    # Preprocess input features
    input_features = {
        'tenure': tenure,
        'monthly_charges': monthly_charges,
        'total_charges': total_charges,
        'contract_One year': int(contract == 'One year'),
        'contract_Two year': int(contract == 'Two year'),
        'internet_service_Fiber optic': int(internet_service == 'Fiber optic'),
        'internet_service_No': int(internet_service == 'No')
    }
    input_df = pd.DataFrame([input_features])

    # Ensure feature names match those used during training
    train_columns = X_train.columns
    input_df = input_df[train_columns]  # Reorder columns to match train data

    # Make prediction
    if st.button("Predict Churn"):
        prediction = model.predict(input_df)[0]
        probability = model.predict_proba(input_df)[0][1]  # Probability of churn
        if prediction == 1:
            st.write("This customer is likely to **churn**.")
        else:
            st.write("This customer is likely to **stay**.")
        st.write(f"Churn Probability: {probability:.2f}")

    # --- Hugging Face Explanation ---
    st.header("Hugging Face for Machine Learning")
    st.write(
        """
        This app showcases the power of Hugging Face for building ML applications.
        - **Datasets:** Easily access and share datasets.
        - **Models:** Download and use pre-trained models or upload your own.
        - **Spaces:** Deploy your app with a simple interface.
        
        Explore Hugging Face and build your own amazing ML projects!
        """
    )

if __name__ == "__main__":
    main()