import streamlit as st import pandas as pd import joblib import seaborn as sns import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import LabelEncoder st.title("🛒 Online Shopping Recommendation System") # Load dataset csv_path = "file.csv" df = pd.read_csv(csv_path) # Handle categorical columns by encoding them label_encoders = {} for col in df.select_dtypes(include=['object']).columns: le = LabelEncoder() df[col] = le.fit_transform(df[col]) label_encoders[col] = le # Select relevant features features = ['Avg_Price', 'Delivery_Charges', 'Discount_pct', 'Online_Spend', 'Offline_Spend', 'Tenure_Months'] target = 'Coupon_Status' df = df.dropna() # Remove missing values X = df[features] y = df[target] # Split data into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train Decision Tree model model = DecisionTreeClassifier() model.fit(X_train, y_train) # Save the model joblib.dump(model, "decision_tree_model.pkl") # Streamlit app with three tabs tab1, tab2, tab3 = st.tabs(["📊 Dataset & Summary", "📈 Data Visualization", "🔍 Prediction"]) # Tab 1: Dataset & Summary with tab1: st.subheader("Dataset Overview") st.write(df) # Show full dataset st.write("### Summary Statistics") st.write(df.describe()) # Tab 2: Data Visualization with tab2: st.subheader("📊 Correlation Matrix") fig, ax = plt.subplots(figsize=(10, 6)) sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f", ax=ax) st.pyplot(fig) st.subheader("📊 Pairplot") pairplot_fig = sns.pairplot(df[features]) st.pyplot(pairplot_fig) st.subheader("📊 Feature Importance (Decision Tree)") feature_importance_fig, ax = plt.subplots() feature_importances = pd.Series(model.feature_importances_, index=features) feature_importances.nlargest(6).plot(kind='barh', ax=ax) st.pyplot(feature_importance_fig) # Tab 3: Prediction with tab3: st.subheader("🔍 Make a Prediction") # User inputs avg_price = st.number_input("Average Price of Product", min_value=0.0, step=1.0) delivery_charges = st.number_input("Delivery Charges", min_value=0.0, step=0.5) discount_pct = st.number_input("Discount Percentage", min_value=0.0, max_value=100.0, step=1.0) online_spend = st.number_input("Online Spend", min_value=0.0, step=10.0) offline_spend = st.number_input("Offline Spend", min_value=0.0, step=10.0) tenure = st.number_input("Tenure in Months", min_value=0, step=1) if st.button("Predict Coupon Usage"): # Load trained model model = joblib.load("decision_tree_model.pkl") # Make prediction prediction = model.predict([[avg_price, delivery_charges, discount_pct, online_spend, offline_spend, tenure]]) # Display result if prediction[0] == 1: st.success("The customer is likely to use the coupon! 🎉") else: st.warning("The customer may not use the coupon.")