mednow's picture
Upload app.py
c857a68 verified
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from mlxtend.frequent_patterns import apriori, association_rules
# Title of the app
st.title("Wardrobe Wonders: Customer Analytics")
# File uploader for Excel or CSV file
uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"])
if uploaded_file is not None:
# Load data from the uploaded file
if uploaded_file.name.endswith('.xlsx'):
data = pd.read_excel(uploaded_file)
else:
data = pd.read_csv(uploaded_file)
# Check for required fields
required_fields = ['Customer_ID', 'Gender', 'Purchase_Category', 'Purchase_Frequency', 'Age', 'Income']
for field in required_fields:
if field not in data.columns:
st.error(f"Missing required field: {field}")
st.stop()
# One-hot encode categorical features for the predictive model
data_encoded = pd.get_dummies(data, columns=['Gender', 'Purchase_Category'], drop_first=True)
# Assuming 'Return' variable can be derived from a condition, like Purchase_Frequency > 2
data_encoded['Return'] = (data['Purchase_Frequency'] > 2).astype(int) # Example condition for return status
X = data_encoded.drop(columns=['Customer_ID', 'Return']) # Features
y = data_encoded['Return'] # Return as the target variable
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Fit the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
# Sidebar for model selection
model_choice = st.sidebar.selectbox("Select Analysis Type", ['Association Rules', 'Customers Who Will Return'])
if model_choice == 'Association Rules':
st.header("Association Rules")
st.write("Select a product to see its top associated items.")
# Create a basket format for the purchase categories
basket = data.groupby(['Customer_ID', 'Purchase_Category'])['Purchase_Frequency'].sum().unstack().reset_index().fillna(0)
basket = basket.set_index('Customer_ID')
basket = basket.applymap(lambda x: 1 if x > 0 else 0) # Convert to boolean (1 or 0)
# Apply the Apriori algorithm
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True) # Lower support to capture more itemsets
# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
# Convert frozensets to strings for better display
rules['antecedents'] = rules['antecedents'].apply(lambda x: list(x)[0]) # Convert frozenset to string
rules['consequents'] = rules['consequents'].apply(lambda x: list(x)[0]) # Convert frozenset to string
# Display unique rules
unique_rules = rules.drop_duplicates(subset=['antecedents', 'consequents'])
# Product selection for association rule analysis
selected_product = st.selectbox("Select Product", unique_rules['antecedents'].unique())
# Get top associated items
top_associations = unique_rules[unique_rules['antecedents'] == selected_product].nlargest(5, 'lift')
# Display top associations in a customer-friendly format
if not top_associations.empty:
st.write(f"Top associations for **{selected_product}**:")
for index, row in top_associations.iterrows():
st.write(f"- **{row['consequents']}**: Support = {row['support']:.2f}, Confidence = {row['confidence']:.2f}, Lift = {row['lift']:.2f}")
else:
st.write("No associations found for the selected product.")
elif model_choice == 'Customers Who Will Return':
st.header("Customers Predicted to Return")
# Make predictions for all customers
predictions = model.predict(X)
# Add predictions to the original data
data['Predicted_Return'] = predictions
# Filter customers who will return
customers_will_return = data[data['Predicted_Return'] == 1]
# Group by Customer_ID and aggregate other fields (you can customize this as needed)
customers_will_return = customers_will_return.groupby('Customer_ID').agg({
'Age': 'first', # or 'mean', depending on your preference
'Gender': 'first',
'Income': 'first',
'Purchase_Frequency': 'sum' # Aggregate if there are multiple entries
}).reset_index()
# Display the customers who will return
st.dataframe(customers_will_return)