Spaces:

mednow
/

wardrobe-wonders

Sleeping

App Files Files Community

wardrobe-wonders / app.py

mednow

Upload app.py

c857a68 verified 9 months ago

raw

history blame contribute delete

4.86 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import train_test_split
	from mlxtend.frequent_patterns import apriori, association_rules

	# Title of the app
	st.title("Wardrobe Wonders: Customer Analytics")

	# File uploader for Excel or CSV file
	uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"])

	if uploaded_file is not None:
	# Load data from the uploaded file
	if uploaded_file.name.endswith('.xlsx'):
	data = pd.read_excel(uploaded_file)
	else:
	data = pd.read_csv(uploaded_file)

	# Check for required fields
	required_fields = ['Customer_ID', 'Gender', 'Purchase_Category', 'Purchase_Frequency', 'Age', 'Income']
	for field in required_fields:
	if field not in data.columns:
	st.error(f"Missing required field: {field}")
	st.stop()

	# One-hot encode categorical features for the predictive model
	data_encoded = pd.get_dummies(data, columns=['Gender', 'Purchase_Category'], drop_first=True)

	# Assuming 'Return' variable can be derived from a condition, like Purchase_Frequency > 2
	data_encoded['Return'] = (data['Purchase_Frequency'] > 2).astype(int) # Example condition for return status

	X = data_encoded.drop(columns=['Customer_ID', 'Return']) # Features
	y = data_encoded['Return'] # Return as the target variable

	# Train-test split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Fit the logistic regression model
	model = LogisticRegression(max_iter=1000)
	model.fit(X_train, y_train)

	# Sidebar for model selection
	model_choice = st.sidebar.selectbox("Select Analysis Type", ['Association Rules', 'Customers Who Will Return'])

	if model_choice == 'Association Rules':
	st.header("Association Rules")
	st.write("Select a product to see its top associated items.")

	# Create a basket format for the purchase categories
	basket = data.groupby(['Customer_ID', 'Purchase_Category'])['Purchase_Frequency'].sum().unstack().reset_index().fillna(0)
	basket = basket.set_index('Customer_ID')
	basket = basket.applymap(lambda x: 1 if x > 0 else 0) # Convert to boolean (1 or 0)

	# Apply the Apriori algorithm
	frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True) # Lower support to capture more itemsets

	# Generate association rules
	rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

	# Convert frozensets to strings for better display
	rules['antecedents'] = rules['antecedents'].apply(lambda x: list(x)[0]) # Convert frozenset to string
	rules['consequents'] = rules['consequents'].apply(lambda x: list(x)[0]) # Convert frozenset to string

	# Display unique rules
	unique_rules = rules.drop_duplicates(subset=['antecedents', 'consequents'])

	# Product selection for association rule analysis
	selected_product = st.selectbox("Select Product", unique_rules['antecedents'].unique())

	# Get top associated items
	top_associations = unique_rules[unique_rules['antecedents'] == selected_product].nlargest(5, 'lift')

	# Display top associations in a customer-friendly format
	if not top_associations.empty:
	st.write(f"Top associations for {selected_product}:")
	for index, row in top_associations.iterrows():
	st.write(f"- {row['consequents']}: Support = {row['support']:.2f}, Confidence = {row['confidence']:.2f}, Lift = {row['lift']:.2f}")
	else:
	st.write("No associations found for the selected product.")

	elif model_choice == 'Customers Who Will Return':

	st.header("Customers Predicted to Return")

	# Make predictions for all customers
	predictions = model.predict(X)

	# Add predictions to the original data
	data['Predicted_Return'] = predictions

	# Filter customers who will return
	customers_will_return = data[data['Predicted_Return'] == 1]

	# Group by Customer_ID and aggregate other fields (you can customize this as needed)
	customers_will_return = customers_will_return.groupby('Customer_ID').agg({
	'Age': 'first', # or 'mean', depending on your preference
	'Gender': 'first',
	'Income': 'first',
	'Purchase_Frequency': 'sum' # Aggregate if there are multiple entries
	}).reset_index()

	# Display the customers who will return
	st.dataframe(customers_will_return)