Spaces:

Gillie2004
/

Online_Shopping_Recommendation_System_using_Decision_Trees

Running

App Files Files Community

3v324v23 commited on Mar 3

Commit

26e2233

1 Parent(s): fe2ce5b

final

Browse files

Files changed (4) hide show

app.py +91 -0
decision_tree_model.pkl +3 -0
file.csv +0 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import streamlit as st
+import pandas as pd
+import joblib
+import seaborn as sns
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.preprocessing import LabelEncoder
+st.title("🛒 Online Shopping Recommendation System")
+# Load dataset
+csv_path = "file.csv"
+df = pd.read_csv(csv_path)
+# Handle categorical columns by encoding them
+label_encoders = {}
+for col in df.select_dtypes(include=['object']).columns:
+    le = LabelEncoder()
+    df[col] = le.fit_transform(df[col])
+    label_encoders[col] = le
+# Select relevant features
+features = ['Avg_Price', 'Delivery_Charges', 'Discount_pct', 'Online_Spend', 'Offline_Spend', 'Tenure_Months']
+target = 'Coupon_Status'
+df = df.dropna()  # Remove missing values
+X = df[features]
+y = df[target]
+# Split data into train and test sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Train Decision Tree model
+model = DecisionTreeClassifier()
+model.fit(X_train, y_train)
+# Save the model
+joblib.dump(model, "decision_tree_model.pkl")
+# Streamlit app with three tabs
+tab1, tab2, tab3 = st.tabs(["📊 Dataset & Summary", "📈 Data Visualization", "🔍 Prediction"])
+# Tab 1: Dataset & Summary
+with tab1:
+    st.subheader("Dataset Overview")
+    st.write(df)  # Show full dataset
+    st.write("### Summary Statistics")
+    st.write(df.describe())
+# Tab 2: Data Visualization
+with tab2:
+    st.subheader("📊 Correlation Matrix")
+    fig, ax = plt.subplots(figsize=(10, 6))
+    sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f", ax=ax)
+    st.pyplot(fig)
+    st.subheader("📊 Pairplot")
+    pairplot_fig = sns.pairplot(df[features])
+    st.pyplot(pairplot_fig)
+    st.subheader("📊 Feature Importance (Decision Tree)")
+    feature_importance_fig, ax = plt.subplots()
+    feature_importances = pd.Series(model.feature_importances_, index=features)
+    feature_importances.nlargest(6).plot(kind='barh', ax=ax)
+    st.pyplot(feature_importance_fig)
+# Tab 3: Prediction
+with tab3:
+    st.subheader("🔍 Make a Prediction")
+    # User inputs
+    avg_price = st.number_input("Average Price of Product", min_value=0.0, step=1.0)
+    delivery_charges = st.number_input("Delivery Charges", min_value=0.0, step=0.5)
+    discount_pct = st.number_input("Discount Percentage", min_value=0.0, max_value=100.0, step=1.0)
+    online_spend = st.number_input("Online Spend", min_value=0.0, step=10.0)
+    offline_spend = st.number_input("Offline Spend", min_value=0.0, step=10.0)
+    tenure = st.number_input("Tenure in Months", min_value=0, step=1)
+    if st.button("Predict Coupon Usage"):
+        # Load trained model
+        model = joblib.load("decision_tree_model.pkl")
+        # Make prediction
+        prediction = model.predict([[avg_price, delivery_charges, discount_pct, online_spend, offline_spend, tenure]])
+        # Display result
+        if prediction[0] == 1:
+            st.success("The customer is likely to use the coupon! 🎉")
+        else:
+            st.warning("The customer may not use the coupon.")

decision_tree_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3762309125ef413b35393051171c87e08b80b1746b89fbb4f6a68a70ed2de28
+size 3289825

file.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+pandas
+joblib
+seaborn
+matplotlib
+scikit-learn