import streamlit as st import pandas as pd import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from mlxtend.frequent_patterns import apriori, association_rules # Title of the app st.title("Wardrobe Wonders: Customer Analytics") # File uploader for Excel or CSV file uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"]) if uploaded_file is not None: # Load data from the uploaded file if uploaded_file.name.endswith('.xlsx'): data = pd.read_excel(uploaded_file) else: data = pd.read_csv(uploaded_file) # Check for required fields required_fields = ['Customer_ID', 'Gender', 'Purchase_Category', 'Purchase_Frequency', 'Age', 'Income'] for field in required_fields: if field not in data.columns: st.error(f"Missing required field: {field}") st.stop() # One-hot encode categorical features for the predictive model data_encoded = pd.get_dummies(data, columns=['Gender', 'Purchase_Category'], drop_first=True) # Assuming 'Return' variable can be derived from a condition, like Purchase_Frequency > 2 data_encoded['Return'] = (data['Purchase_Frequency'] > 2).astype(int) # Example condition for return status X = data_encoded.drop(columns=['Customer_ID', 'Return']) # Features y = data_encoded['Return'] # Return as the target variable # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Fit the logistic regression model model = LogisticRegression(max_iter=1000) model.fit(X_train, y_train) # Sidebar for model selection model_choice = st.sidebar.selectbox("Select Analysis Type", ['Association Rules', 'Customers Who Will Return']) if model_choice == 'Association Rules': st.header("Association Rules") st.write("Select a product to see its top associated items.") # Create a basket format for the purchase categories basket = data.groupby(['Customer_ID', 'Purchase_Category'])['Purchase_Frequency'].sum().unstack().reset_index().fillna(0) basket = basket.set_index('Customer_ID') basket = basket.applymap(lambda x: 1 if x > 0 else 0) # Convert to boolean (1 or 0) # Apply the Apriori algorithm frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True) # Lower support to capture more itemsets # Generate association rules rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1) # Convert frozensets to strings for better display rules['antecedents'] = rules['antecedents'].apply(lambda x: list(x)[0]) # Convert frozenset to string rules['consequents'] = rules['consequents'].apply(lambda x: list(x)[0]) # Convert frozenset to string # Display unique rules unique_rules = rules.drop_duplicates(subset=['antecedents', 'consequents']) # Product selection for association rule analysis selected_product = st.selectbox("Select Product", unique_rules['antecedents'].unique()) # Get top associated items top_associations = unique_rules[unique_rules['antecedents'] == selected_product].nlargest(5, 'lift') # Display top associations in a customer-friendly format if not top_associations.empty: st.write(f"Top associations for **{selected_product}**:") for index, row in top_associations.iterrows(): st.write(f"- **{row['consequents']}**: Support = {row['support']:.2f}, Confidence = {row['confidence']:.2f}, Lift = {row['lift']:.2f}") else: st.write("No associations found for the selected product.") elif model_choice == 'Customers Who Will Return': st.header("Customers Predicted to Return") # Make predictions for all customers predictions = model.predict(X) # Add predictions to the original data data['Predicted_Return'] = predictions # Filter customers who will return customers_will_return = data[data['Predicted_Return'] == 1] # Group by Customer_ID and aggregate other fields (you can customize this as needed) customers_will_return = customers_will_return.groupby('Customer_ID').agg({ 'Age': 'first', # or 'mean', depending on your preference 'Gender': 'first', 'Income': 'first', 'Purchase_Frequency': 'sum' # Aggregate if there are multiple entries }).reset_index() # Display the customers who will return st.dataframe(customers_will_return)