import pandas as pd import numpy as np from sklearn.metrics.pairwise import cosine_similarity from scipy import sparse # Load the purchase history data from Excel file and convert Purchase_Date to datetime purchase_history = pd.read_excel('datasetsample.xlsx', sheet_name='Transaction History', parse_dates=['Purchase_Date']) # Add this parameter # Ensure Customer_Id is treated as string purchase_history['Customer_Id'] = purchase_history['Customer_Id'].astype(str) # Create a dictionary to store product categories product_categories = purchase_history[['Product_Id', 'Category']].drop_duplicates().set_index('Product_Id')['Category'].to_dict() # Count the number of purchases for each user and product combination purchase_counts = purchase_history.groupby(['Customer_Id', 'Product_Id']).size().unstack(fill_value=0) # Convert the purchase counts to a sparse matrix sparse_purchase_counts = sparse.csr_matrix(purchase_counts) # Compute the cosine similarity matrix between the products cosine_similarities = cosine_similarity(sparse_purchase_counts.T) def get_customer_items_and_recommendations(user_id, n=5): """ Get both purchased items and recommendations for a user Parameters: user_id (str): The customer ID as string n (int): Number of recommendations to return Returns: tuple: (purchased_items_info, recommended_items_info) """ # Convert user_id to string if it's not already user_id = str(user_id) # Check if user exists in the data if user_id not in purchase_counts.index: return [], [] # Get purchased items purchased_items = list(purchase_counts.columns[purchase_counts.loc[user_id] > 0]) # Get purchased items details purchased_items_info = [] user_purchases = purchase_history[purchase_history['Customer_Id'] == user_id] for item in purchased_items: item_purchases = user_purchases[user_purchases['Product_Id'] == item] total_amount = item_purchases['Amount (In Dollars)'].sum() last_purchase = pd.to_datetime(item_purchases['Purchase_Date'].max()) # Ensure datetime category = product_categories.get(item, 'Unknown') purchased_items_info.append({ 'product_id': item, 'category': category, 'total_amount': total_amount, 'last_purchase': last_purchase }) # Get the user's index in the matrix user_idx = purchase_counts.index.get_loc(user_id) # Get the user's purchase history user_history = sparse_purchase_counts[user_idx].toarray().flatten() # Compute similarities and get recommendations similarities = cosine_similarities.dot(user_history) purchased_indices = np.where(user_history > 0)[0] similarities[purchased_indices] = 0 recommended_indices = np.argsort(similarities)[::-1][:n] recommended_items = list(purchase_counts.columns[recommended_indices]) # Remove any purchased items from recommendations recommended_items = [item for item in recommended_items if item not in purchased_items] # Get recommended items details recommended_items_info = [ { 'product_id': item, 'category': product_categories.get(item, 'Unknown') } for item in recommended_items ] return purchased_items_info, recommended_items_info def display_customer_profile(user_id, n=5): """ Display customer's purchase history and recommendations with categories Parameters: user_id (str): The customer ID as string n (int): Number of recommendations to return """ purchased_items_info, recommended_items_info = get_customer_items_and_recommendations(user_id, n) print(f"\nCustomer Profile for {user_id}") print("-" * 70) print("\nPurchase History:") if purchased_items_info: print(f"{'Product ID':<15} {'Category':<20} {'Total Amount':>12} {'Last Purchase'}") print("-" * 70) for item in purchased_items_info: print(f"{item['product_id']:<15} {item['category']:<20} ${item['total_amount']:>11.2f} {item['last_purchase'].strftime('%Y-%m-%d')}") else: print("No purchase history found") print("\nRecommended Items:") if recommended_items_info: print(f"{'Product ID':<15} {'Category':<20}") print("-" * 35) for item in recommended_items_info: print(f"{item['product_id']:<15} {item['category']:<20}") else: print("No recommendations available") print("-" * 70) # Example usage: customer_id = 'CUST2025A' # Replace with your actual customer ID display_customer_profile(customer_id)