|
import streamlit as st |
|
import pandas as pd |
|
import joblib |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.tree import DecisionTreeClassifier |
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
st.title("π Online Shopping Recommendation System") |
|
|
|
|
|
csv_path = "file.csv" |
|
df = pd.read_csv(csv_path) |
|
|
|
|
|
label_encoders = {} |
|
for col in df.select_dtypes(include=['object']).columns: |
|
le = LabelEncoder() |
|
df[col] = le.fit_transform(df[col]) |
|
label_encoders[col] = le |
|
|
|
|
|
features = ['Avg_Price', 'Delivery_Charges', 'Discount_pct', 'Online_Spend', 'Offline_Spend', 'Tenure_Months'] |
|
target = 'Coupon_Status' |
|
|
|
df = df.dropna() |
|
X = df[features] |
|
y = df[target] |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
model = DecisionTreeClassifier() |
|
model.fit(X_train, y_train) |
|
|
|
|
|
joblib.dump(model, "decision_tree_model.pkl") |
|
|
|
|
|
tab1, tab2, tab3 = st.tabs(["π Dataset & Summary", "π Data Visualization", "π Prediction"]) |
|
|
|
|
|
with tab1: |
|
st.subheader("Dataset Overview") |
|
st.write(df) |
|
st.write("### Summary Statistics") |
|
st.write(df.describe()) |
|
|
|
|
|
with tab2: |
|
st.subheader("π Correlation Matrix") |
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f", ax=ax) |
|
st.pyplot(fig) |
|
|
|
st.subheader("π Pairplot") |
|
pairplot_fig = sns.pairplot(df[features]) |
|
st.pyplot(pairplot_fig) |
|
|
|
st.subheader("π Feature Importance (Decision Tree)") |
|
feature_importance_fig, ax = plt.subplots() |
|
feature_importances = pd.Series(model.feature_importances_, index=features) |
|
feature_importances.nlargest(6).plot(kind='barh', ax=ax) |
|
st.pyplot(feature_importance_fig) |
|
|
|
|
|
with tab3: |
|
st.subheader("π Make a Prediction") |
|
|
|
|
|
avg_price = st.number_input("Average Price of Product", min_value=0.0, step=1.0) |
|
delivery_charges = st.number_input("Delivery Charges", min_value=0.0, step=0.5) |
|
discount_pct = st.number_input("Discount Percentage", min_value=0.0, max_value=100.0, step=1.0) |
|
online_spend = st.number_input("Online Spend", min_value=0.0, step=10.0) |
|
offline_spend = st.number_input("Offline Spend", min_value=0.0, step=10.0) |
|
tenure = st.number_input("Tenure in Months", min_value=0, step=1) |
|
|
|
if st.button("Predict Coupon Usage"): |
|
|
|
model = joblib.load("decision_tree_model.pkl") |
|
|
|
|
|
prediction = model.predict([[avg_price, delivery_charges, discount_pct, online_spend, offline_spend, tenure]]) |
|
|
|
|
|
if prediction[0] == 1: |
|
st.success("The customer is likely to use the coupon! π") |
|
else: |
|
st.warning("The customer may not use the coupon.") |
|
|