Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.model_selection import train_test_split | |
from mlxtend.frequent_patterns import apriori, association_rules | |
# Title of the app | |
st.title("Wardrobe Wonders: Customer Analytics") | |
# File uploader for Excel or CSV file | |
uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"]) | |
if uploaded_file is not None: | |
# Load data from the uploaded file | |
if uploaded_file.name.endswith('.xlsx'): | |
data = pd.read_excel(uploaded_file) | |
else: | |
data = pd.read_csv(uploaded_file) | |
# Check for required fields | |
required_fields = ['Customer_ID', 'Gender', 'Purchase_Category', 'Purchase_Frequency', 'Age', 'Income'] | |
for field in required_fields: | |
if field not in data.columns: | |
st.error(f"Missing required field: {field}") | |
st.stop() | |
# One-hot encode categorical features for the predictive model | |
data_encoded = pd.get_dummies(data, columns=['Gender', 'Purchase_Category'], drop_first=True) | |
# Assuming 'Return' variable can be derived from a condition, like Purchase_Frequency > 2 | |
data_encoded['Return'] = (data['Purchase_Frequency'] > 2).astype(int) # Example condition for return status | |
X = data_encoded.drop(columns=['Customer_ID', 'Return']) # Features | |
y = data_encoded['Return'] # Return as the target variable | |
# Train-test split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Fit the logistic regression model | |
model = LogisticRegression(max_iter=1000) | |
model.fit(X_train, y_train) | |
# Sidebar for model selection | |
model_choice = st.sidebar.selectbox("Select Analysis Type", ['Association Rules', 'Customers Who Will Return']) | |
if model_choice == 'Association Rules': | |
st.header("Association Rules") | |
st.write("Select a product to see its top associated items.") | |
# Create a basket format for the purchase categories | |
basket = data.groupby(['Customer_ID', 'Purchase_Category'])['Purchase_Frequency'].sum().unstack().reset_index().fillna(0) | |
basket = basket.set_index('Customer_ID') | |
basket = basket.applymap(lambda x: 1 if x > 0 else 0) # Convert to boolean (1 or 0) | |
# Apply the Apriori algorithm | |
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True) # Lower support to capture more itemsets | |
# Generate association rules | |
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1) | |
# Convert frozensets to strings for better display | |
rules['antecedents'] = rules['antecedents'].apply(lambda x: list(x)[0]) # Convert frozenset to string | |
rules['consequents'] = rules['consequents'].apply(lambda x: list(x)[0]) # Convert frozenset to string | |
# Display unique rules | |
unique_rules = rules.drop_duplicates(subset=['antecedents', 'consequents']) | |
# Product selection for association rule analysis | |
selected_product = st.selectbox("Select Product", unique_rules['antecedents'].unique()) | |
# Get top associated items | |
top_associations = unique_rules[unique_rules['antecedents'] == selected_product].nlargest(5, 'lift') | |
# Display top associations in a customer-friendly format | |
if not top_associations.empty: | |
st.write(f"Top associations for **{selected_product}**:") | |
for index, row in top_associations.iterrows(): | |
st.write(f"- **{row['consequents']}**: Support = {row['support']:.2f}, Confidence = {row['confidence']:.2f}, Lift = {row['lift']:.2f}") | |
else: | |
st.write("No associations found for the selected product.") | |
elif model_choice == 'Customers Who Will Return': | |
st.header("Customers Predicted to Return") | |
# Make predictions for all customers | |
predictions = model.predict(X) | |
# Add predictions to the original data | |
data['Predicted_Return'] = predictions | |
# Filter customers who will return | |
customers_will_return = data[data['Predicted_Return'] == 1] | |
# Group by Customer_ID and aggregate other fields (you can customize this as needed) | |
customers_will_return = customers_will_return.groupby('Customer_ID').agg({ | |
'Age': 'first', # or 'mean', depending on your preference | |
'Gender': 'first', | |
'Income': 'first', | |
'Purchase_Frequency': 'sum' # Aggregate if there are multiple entries | |
}).reset_index() | |
# Display the customers who will return | |
st.dataframe(customers_will_return) | |