Spaces:

louiecerv
/

customer_churn

Sleeping

App Files Files Community

louiecerv commited on Jan 25

Commit

96ef6c7

1 Parent(s): f913295

sync with remote

Browse files

Files changed (3) hide show

app.py +124 -0
churn_prediction_model.pkl +3 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import streamlit as st
+from datasets import load_dataset
+import pandas as pd
+import pickle
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.model_selection import train_test_split
+from huggingface_hub import hf_hub_download
+# Load the dataset
+dataset = load_dataset("louiecerv/customer_churn")["train"]
+# Define repository details
+repo_id = "louiecerv/churn_prediction_model"
+filename = "churn_prediction_model.pkl"
+# Download and cache the model automatically
+model_path = hf_hub_download(repo_id=repo_id, filename=filename)
+# Load the model
+with open(model_path, "rb") as f:
+    model = pickle.load(f)
+def main():
+    # Streamlit app
+    st.title("Customer Churn Prediction App")
+    st.write("This app demonstrates how to use Hugging Face Datasets and Models for a "
+            "real-world classification task: predicting customer churn.")
+    # --- Dataset Exploration ---
+    st.header("Dataset Exploration")
+    st.write("Let's explore the customer churn dataset:")
+    # Display dataset information
+    st.subheader("Dataset Sample")
+    st.write(dataset.to_pandas().head())
+    # Split data
+    # Convert dataset to Pandas DataFrame
+    df = dataset.to_pandas()
+    train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
+    # Define X_train and y_train
+    X_train = train_data.drop('churn', axis=1)
+    y_train = train_data['churn']
+    # Show dataset size
+    st.write(f"**Dataset Size:** {len(dataset)} rows")
+    # Visualize churn distribution
+    st.subheader("Churn Distribution")
+    fig, ax = plt.subplots()
+    sns.countplot(x='churn', data=dataset.to_pandas(), ax=ax)
+    ax.set_xticks([0, 1])  # Set tick locations
+    ax.set_xticklabels(["No Churn", "Churn"])
+    st.pyplot(fig)
+    # --- Feature Importance ---
+    st.header("Feature Importance")
+    st.write("Understanding which features contribute most to churn prediction:")
+    # Get feature importances (coefficients)
+    feature_importance = pd.DataFrame({'Feature': X_train.columns,
+                                    'Importance': model.coef_[0]})
+    feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
+    # Plot feature importances
+    fig, ax = plt.subplots(figsize=(10, 6))
+    sns.barplot(x='Importance', y='Feature', data=feature_importance, ax=ax)
+    st.pyplot(fig)
+    # --- Churn Prediction ---
+    st.header("Predict Customer Churn")
+    st.write("Enter customer data to predict churn:")
+    # Input features
+    tenure = st.number_input("Tenure (months)", min_value=1, step=1)
+    monthly_charges = st.number_input("Monthly Charges", min_value=0.0, step=0.01)
+    total_charges = st.number_input("Total Charges", min_value=0.0, step=0.01)
+    churn = st.selectbox("Churn", ['Yes', 'No'])
+    contract = st.selectbox("Contract", ['Month-to-month', 'One year', 'Two year'])
+    internet_service = st.selectbox("Internet Service", ['DSL', 'Fiber optic', 'No'])
+    # Preprocess input features
+    input_features = {
+        'tenure': tenure,
+        'monthly_charges': monthly_charges,
+        'total_charges': total_charges,
+        'contract_One year': int(contract == 'One year'),
+        'contract_Two year': int(contract == 'Two year'),
+        'internet_service_Fiber optic': int(internet_service == 'Fiber optic'),
+        'internet_service_No': int(internet_service == 'No')
+    }
+    input_df = pd.DataFrame([input_features])
+    # Ensure feature names match those used during training
+    train_columns = X_train.columns
+    input_df = input_df[train_columns]  # Reorder columns to match train data
+    # Make prediction
+    if st.button("Predict Churn"):
+        prediction = model.predict(input_df)[0]
+        probability = model.predict_proba(input_df)[0][1]  # Probability of churn
+        if prediction == 1:
+            st.write("This customer is likely to **churn**.")
+        else:
+            st.write("This customer is likely to **stay**.")
+        st.write(f"Churn Probability: {probability:.2f}")
+    # --- Hugging Face Explanation ---
+    st.header("Hugging Face for Machine Learning")
+    st.write(
+        """
+        This app showcases the power of Hugging Face for building ML applications.
+        - **Datasets:** Easily access and share datasets.
+        - **Models:** Download and use pre-trained models or upload your own.
+        - **Spaces:** Deploy your app with a simple interface.
+        Explore Hugging Face and build your own amazing ML projects!
+        """
+    )
+if __name__ == "__main__":
+    main()

churn_prediction_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a98e018dad9148716364367ffdf9cdb9f95071a1fdae22cb0aed6439517a7305
+size 996

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+datasets
+pandas
+matplotlib
+seaborn
+scikit-learn