File size: 4,863 Bytes
65b3efa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c857a68
 
65b3efa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c857a68
65b3efa
 
 
 
 
 
 
 
 
 
 
c857a68
 
 
 
 
 
 
 
65b3efa
c857a68
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from mlxtend.frequent_patterns import apriori, association_rules

# Title of the app
st.title("Wardrobe Wonders: Customer Analytics")

# File uploader for Excel or CSV file
uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"])

if uploaded_file is not None:
    # Load data from the uploaded file
    if uploaded_file.name.endswith('.xlsx'):
        data = pd.read_excel(uploaded_file)
    else:
        data = pd.read_csv(uploaded_file)

    # Check for required fields
    required_fields = ['Customer_ID', 'Gender', 'Purchase_Category', 'Purchase_Frequency', 'Age', 'Income']
    for field in required_fields:
        if field not in data.columns:
            st.error(f"Missing required field: {field}")
            st.stop()

    # One-hot encode categorical features for the predictive model
    data_encoded = pd.get_dummies(data, columns=['Gender', 'Purchase_Category'], drop_first=True)

    # Assuming 'Return' variable can be derived from a condition, like Purchase_Frequency > 2
    data_encoded['Return'] = (data['Purchase_Frequency'] > 2).astype(int)  # Example condition for return status

    X = data_encoded.drop(columns=['Customer_ID', 'Return'])  # Features
    y = data_encoded['Return']  # Return as the target variable

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Fit the logistic regression model
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)

    # Sidebar for model selection
    model_choice = st.sidebar.selectbox("Select Analysis Type", ['Association Rules', 'Customers Who Will Return'])

    if model_choice == 'Association Rules':
        st.header("Association Rules")
        st.write("Select a product to see its top associated items.")

        # Create a basket format for the purchase categories
        basket = data.groupby(['Customer_ID', 'Purchase_Category'])['Purchase_Frequency'].sum().unstack().reset_index().fillna(0)
        basket = basket.set_index('Customer_ID')
        basket = basket.applymap(lambda x: 1 if x > 0 else 0)  # Convert to boolean (1 or 0)

        # Apply the Apriori algorithm
        frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)  # Lower support to capture more itemsets
        
        # Generate association rules
        rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

        # Convert frozensets to strings for better display
        rules['antecedents'] = rules['antecedents'].apply(lambda x: list(x)[0])  # Convert frozenset to string
        rules['consequents'] = rules['consequents'].apply(lambda x: list(x)[0])  # Convert frozenset to string
        
        # Display unique rules
        unique_rules = rules.drop_duplicates(subset=['antecedents', 'consequents'])

        # Product selection for association rule analysis
        selected_product = st.selectbox("Select Product", unique_rules['antecedents'].unique())
        
        # Get top associated items
        top_associations = unique_rules[unique_rules['antecedents'] == selected_product].nlargest(5, 'lift')
        
        # Display top associations in a customer-friendly format
        if not top_associations.empty:
            st.write(f"Top associations for **{selected_product}**:")
            for index, row in top_associations.iterrows():
                st.write(f"- **{row['consequents']}**: Support = {row['support']:.2f}, Confidence = {row['confidence']:.2f}, Lift = {row['lift']:.2f}")
        else:
            st.write("No associations found for the selected product.")
    
    elif model_choice == 'Customers Who Will Return':

        st.header("Customers Predicted to Return")
        
        # Make predictions for all customers
        predictions = model.predict(X)
        
        # Add predictions to the original data
        data['Predicted_Return'] = predictions
        
        # Filter customers who will return
        customers_will_return = data[data['Predicted_Return'] == 1]
        
        # Group by Customer_ID and aggregate other fields (you can customize this as needed)
        customers_will_return = customers_will_return.groupby('Customer_ID').agg({
            'Age': 'first',  # or 'mean', depending on your preference
            'Gender': 'first',
            'Income': 'first',
            'Purchase_Frequency': 'sum'  # Aggregate if there are multiple entries
        }).reset_index()
        
        # Display the customers who will return
        st.dataframe(customers_will_return)