Spaces:

slickdata
/

sales_prediction

Sleeping

App Files Files Community

slickdata commited on Jul 10, 2023

Commit

2520f96

1 Parent(s): fafefa5

upload other files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
Best_model.joblib +3 -0
R2data.csv +3 -0
explore_page.py +8 -0
predict_page.py +113 -0
reg_notebook.ipynb +643 -0
requirements .txt +12 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+R2data.csv filter=lfs diff=lfs merge=lfs -text

Best_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bb07da03faa9a032414ed0f3fbc7a9842e6267d1e7e32b5ea6564478d12c68d
+size 466109

R2data.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28cb9d18ea37db2a26cc7689b1c46d40d959555c35c55924b08317fdb0b37591
+size 30441540

explore_page.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import streamlit as st
+def show_explore_page():
+    st.title("Favorita Stores Sales Explore Predict Page")

predict_page.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import streamlit as st
+import pickle
+import numpy as np
+import pandas as pd
+from sklearn.impute import SimpleImputer
+from xgboost import XGBRegressor
+from sklearn.preprocessing import LabelEncoder
+from sklearn.preprocessing import StandardScaler
+import joblib
+# Load the numerical imputer
+#num_imputer = joblib.load("numerical_imputer.joblib")
+# Load the categorical imputer
+#cat_imputer = joblib.load("categorical_imputer.joblib")
+# Load the scaler
+#scaler = joblib.load("scaler.joblib")
+# Load the label encoder for 'family' feature
+#le_family = joblib.load("le_family.joblib")
+# Load the label encoder for 'holiday_type' feature
+#le_holiday_type = joblib.load("le_holiday_type.joblib")
+# Load the label encoder for 'city' feature
+#le_city = joblib.load("le_city.joblib")
+# Load the final model
+regressor = joblib.load("Best_model.joblib")
+#@st.cache_resource()
+def show_predict_page():
+     # Add a title and subtitle
+    st.write("<center><h1>Predicting Sales App</h1></center>", unsafe_allow_html=True)
+    # Add a subtitle or description
+    st.write("This app predict sales by the using machine learning, based on certain input parameters. Simply enter the required information and click 'Predict' to get a sales prediction!")
+    st.subheader("Enter the following details to predict sales")
+    input_data = {
+        'store_nbr': st.slider("store_nbr", step=1, min_value=0, max_value=54),
+        'onpromotion': st.number_input("onpromotion, 0 - 800", min_value=0, max_value=800),
+        'transactions': st.number_input("Number of Transactions, 0 - 10000", min_value=0, max_value=10000),
+        'oil_price': st.number_input("oil_price, 1 - 200", step=1, min_value=0, max_value=200),
+        'cluster': st.slider("cluster", step=1, min_value=0, max_value=17),
+        'day': st.slider("day", 1, 31, 1),
+        'year': st.selectbox("year", [1970]),
+        'month': st.slider("month", 1, 12, 1),
+        #'dayofmonth': st.slider("dayofmonth", 1, 31, 1),
+        #'dayofweek': st.slider("dayofweek, 0=Sun and 6=Sat", step=1, min_value=1, max_value=6),
+        'family': st.selectbox("products", ['AUTOMOTIVE', 'Personal Care', 'Beverages', 'STATIONERY', 'Food', 'CLEANING', 'HARDWARE', 'Home and Kitchen', 'Clothing', 'PET SUPPLIES', 'ELECTRONICS']),
+        'holiday_type': st.selectbox("holiday_type", ['Workday', 'holiday']),
+        'city': st.selectbox("City", ['Salinas', 'Quito', 'Cayambe', 'Latacunga', 'Riobamba', 'Ibarra', 'Santo Domingo', 'Guaranda', 'Ambato', 'Guayaquil', 'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Cuenca', 'Loja', 'Machala', 'Esmeraldas', 'El Carmen', 'Libertad', 'Manta', 'Puyo'])
+    }
+# Create a button to make a prediction
+    if st.button("Predict", key="predict_button", help="Click to make a prediction."):
+        # Convert the input data to a pandas DataFrame
+        input_df = pd.DataFrame([input_data])
+# Selecting categorical and numerical columns separately
+#        cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
+#        num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']
+# Apply the imputers
+#        input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
+#        input_df_imputed_num = num_imputer.transform(input_df[num_columns])
+# Convert the NumPy arrays to DataFrames
+#        input_df_imputed_cat = pd.DataFrame(input_df_imputed_cat, columns=cat_columns)
+#        input_df_imputed_num = pd.DataFrame(input_df_imputed_num, columns=num_columns)
+# Scale the numerical columns
+#        input_df_scaled = scaler.transform(input_df_imputed_num)
+#        input_scaled_df = pd.DataFrame(input_df_scaled , columns = num_columns)
+#        input_df_imputed  = pd.concat([input_df_imputed_cat, input_scaled_df], axis=1)
+ # Encode the categorical columns
+        # Encode the categorical columns
+#        input_df_imputed['family'] = le_family.transform(input_df_imputed['family'])
+#        input_df_imputed['holiday_type'] = le_holiday_type.transform(input_df_imputed['holiday_type'])
+#        input_df_imputed['city'] = le_city.transform(input_df_imputed['city'])
+        #input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat))
+        #input_encoded_df.columns = input_encoded_df.columns.astype(str)
+#joining the cat encoded and num scaled
+#        final_df = input_df_imputed
+# Make a prediction
+        prediction = round(regressor.predict(input_df)[0], 2)
+# Display the prediction
+     #st.write(f"The predicted sales are: {prediction}.")
+# Display the prediction
+        st.subheader("Sales Prediction")
+        st.write("The predicted sales for the company is:", prediction)

reg_notebook.ipynb ADDED Viewed

	@@ -0,0 +1,643 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data handling\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "# Vizualisation (Matplotlib, Plotly, Seaborn, etc. )\n",
+    "import matplotlib.pyplot as plt\n",
+    "# EDA (pandas-profiling, etc. )\n",
+    "...\n",
+    "\n",
+    "# Feature Processing (Scikit-learn processing, etc. )\n",
+    "from sklearn.metrics import mean_squared_error, mean_squared_log_error\n",
+    "\n",
+    "# Machine Learning (Scikit-learn Estimators, Catboost, LightGBM, etc. )\n",
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "from sklearn.ensemble import ExtraTreesRegressor\n",
+    "from xgboost import XGBRegressor\n",
+    "from sklearn.ensemble import GradientBoostingRegressor\n",
+    "from sklearn.preprocessing import OneHotEncoder\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.compose import ColumnTransformer\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Other packages\n",
+    "from joblib import dump\n",
+    "import os\n",
+    "import pickle\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>store_nbr</th>\n",
+       "      <th>family</th>\n",
+       "      <th>sales</th>\n",
+       "      <th>onpromotion</th>\n",
+       "      <th>transactions</th>\n",
+       "      <th>holiday_type</th>\n",
+       "      <th>oil_price</th>\n",
+       "      <th>city</th>\n",
+       "      <th>cluster</th>\n",
+       "      <th>day</th>\n",
+       "      <th>year</th>\n",
+       "      <th>month</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>date</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1970-01-01 00:00:00.000002013</th>\n",
+       "      <td>25</td>\n",
+       "      <td>AUTOMOTIVE</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>770</td>\n",
+       "      <td>Workday</td>\n",
+       "      <td>93.14</td>\n",
+       "      <td>Salinas</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1970</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1970-01-01 00:00:00.000002013</th>\n",
+       "      <td>25</td>\n",
+       "      <td>Personal Care</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>770</td>\n",
+       "      <td>Workday</td>\n",
+       "      <td>93.14</td>\n",
+       "      <td>Salinas</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1970</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1970-01-01 00:00:00.000002013</th>\n",
+       "      <td>25</td>\n",
+       "      <td>Personal Care</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>770</td>\n",
+       "      <td>Workday</td>\n",
+       "      <td>93.14</td>\n",
+       "      <td>Salinas</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1970</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1970-01-01 00:00:00.000002013</th>\n",
+       "      <td>25</td>\n",
+       "      <td>Beverages</td>\n",
+       "      <td>810.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>770</td>\n",
+       "      <td>Workday</td>\n",
+       "      <td>93.14</td>\n",
+       "      <td>Salinas</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1970</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1970-01-01 00:00:00.000002013</th>\n",
+       "      <td>25</td>\n",
+       "      <td>STATIONERY</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>770</td>\n",
+       "      <td>Workday</td>\n",
+       "      <td>93.14</td>\n",
+       "      <td>Salinas</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1970</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                               store_nbr         family  sales  onpromotion  \\\n",
+       "date                                                                          \n",
+       "1970-01-01 00:00:00.000002013         25     AUTOMOTIVE    0.0            0   \n",
+       "1970-01-01 00:00:00.000002013         25  Personal Care    0.0            0   \n",
+       "1970-01-01 00:00:00.000002013         25  Personal Care    2.0            0   \n",
+       "1970-01-01 00:00:00.000002013         25      Beverages  810.0            0   \n",
+       "1970-01-01 00:00:00.000002013         25     STATIONERY    0.0            0   \n",
+       "\n",
+       "                               transactions holiday_type  oil_price     city  \\\n",
+       "date                                                                           \n",
+       "1970-01-01 00:00:00.000002013           770      Workday      93.14  Salinas   \n",
+       "1970-01-01 00:00:00.000002013           770      Workday      93.14  Salinas   \n",
+       "1970-01-01 00:00:00.000002013           770      Workday      93.14  Salinas   \n",
+       "1970-01-01 00:00:00.000002013           770      Workday      93.14  Salinas   \n",
+       "1970-01-01 00:00:00.000002013           770      Workday      93.14  Salinas   \n",
+       "\n",
+       "                               cluster  day  year  month  \n",
+       "date                                                      \n",
+       "1970-01-01 00:00:00.000002013        1    1  1970      1  \n",
+       "1970-01-01 00:00:00.000002013        1    1  1970      1  \n",
+       "1970-01-01 00:00:00.000002013        1    1  1970      1  \n",
+       "1970-01-01 00:00:00.000002013        1    1  1970      1  \n",
+       "1970-01-01 00:00:00.000002013        1    1  1970      1  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.read_csv('R2data.csv')\n",
+    "data.drop(columns=['Unnamed: 0'], inplace=True)\n",
+    "\n",
+    "# Convert the date column to a datetime object\n",
+    "data['date'] = pd.to_datetime(data['date'])\n",
+    "\n",
+    "# Set the date column as the index\n",
+    "data = data.set_index('date')\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y = data['sales']                         # Target Variable\n",
+    "X = data.drop('sales', axis = 1)          # Independent Variable"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "numeric_transformer = Pipeline(steps = [('num_imputer',SimpleImputer(strategy = 'mean')),('scaler',StandardScaler())])\n",
+    "categorical_transformer = Pipeline(steps = [('cat_imputer',SimpleImputer(strategy ='most_frequent')),('one-hot',OneHotEncoder(handle_unknown='ignore', sparse=False))])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "categorical_feature =[\"family\", \"city\", \"holiday_type\"]\n",
+    "numeric_feature = ['store_nbr', 'onpromotion', 'transactions', 'oil_price', 'cluster','year', 'month']\n",
+    "preprocessor = ColumnTransformer(transformers=[('numeric_transformer',numeric_transformer,numeric_feature),('categorical_transformer',categorical_transformer,categorical_feature)],remainder='drop')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split the data into training and test sets\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:972: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "rf = GradientBoostingRegressor(n_estimators=100, random_state=42)\n",
+    "\n",
+    "rf = Pipeline(steps=[('preprocessor',preprocessor),('estimator',rf)])\n",
+    "rf.fit(X_train, y_train)\n",
+    "\n",
+    "# Make prediction on X_test\n",
+    "rf_predictions = rf.predict(X_test)\n",
+    "\n",
+    "\n",
+    "# Evaluate our models\n",
+    "rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(rf_predictions))).round(2)\n",
+    "\n",
+    "\n",
+    "results = pd.DataFrame([['Gradient Boosting', rmsle]], columns = ['Model', 'RMSLE'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Gradient Boosting Regression Model\n",
+    "#rf = GradientBoostingRegressor(n_estimators=100, random_state=42)\n",
+    "#rf.fit(X_train, y_train)\n",
+    "\n",
+    "# Make prediction on X_test\n",
+    "#rf_predictions = rf.predict(X_test)\n",
+    "\n",
+    "\n",
+    "# Evaluate our models\n",
+    "#rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(rf_predictions))).round(2)\n",
+    "\n",
+    "\n",
+    "#results = pd.DataFrame([['Gradient Boosting', rmsle]], columns = ['Model', 'RMSLE'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:972: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Model</th>\n",
+       "      <th>RMSLE</th>\n",
+       "      <th>Model</th>\n",
+       "      <th>RMSLE</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Gradient Boosting</td>\n",
+       "      <td>2.48</td>\n",
+       "      <td>Extra Tree</td>\n",
+       "      <td>1.93</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               Model  RMSLE       Model  RMSLE\n",
+       "0  Gradient Boosting   2.48  Extra Tree   1.93"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Extra Trees Regression Model\n",
+    "sg = ExtraTreesRegressor(n_estimators=100, random_state=42)\n",
+    "sg = Pipeline(steps=[('preprocessor',preprocessor),('estimator',sg)])\n",
+    "sg.fit(X_train, y_train)\n",
+    "\n",
+    "# Make prediction on X_test\n",
+    "sg_predictions = sg.predict(X_test)\n",
+    "\n",
+    "\n",
+    "# Evaluate our models\n",
+    "rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(sg_predictions))).round(2)\n",
+    "\n",
+    "\n",
+    "model_results = pd.DataFrame([['Extra Tree', rmsle]], columns = ['Model', 'RMSLE'])\n",
+    "results = pd.concat([results, model_results], axis=1)\n",
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:972: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Model</th>\n",
+       "      <th>RMSLE</th>\n",
+       "      <th>Model</th>\n",
+       "      <th>RMSLE</th>\n",
+       "      <th>Model</th>\n",
+       "      <th>RMSLE</th>\n",
+       "      <th>Model</th>\n",
+       "      <th>RMSLE</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Gradient Boosting</td>\n",
+       "      <td>2.48</td>\n",
+       "      <td>Extra Tree</td>\n",
+       "      <td>1.93</td>\n",
+       "      <td>Extra Tree</td>\n",
+       "      <td>1.93</td>\n",
+       "      <td>XGBoost</td>\n",
+       "      <td>2.15</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               Model  RMSLE       Model  RMSLE       Model  RMSLE    Model  \\\n",
+       "0  Gradient Boosting   2.48  Extra Tree   1.93  Extra Tree   1.93  XGBoost   \n",
+       "\n",
+       "   RMSLE  \n",
+       "0   2.15  "
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Extra Trees Regression Model\n",
+    "xg = XGBRegressor(n_estimators=100, random_state=42)\n",
+    "xg = Pipeline(steps=[('preprocessor',preprocessor),('estimator',xg)])\n",
+    "xg.fit(X_train, y_train)\n",
+    "\n",
+    "# Make prediction on X_test\n",
+    "xg_predictions = xg.predict(X_test)\n",
+    "\n",
+    "\n",
+    "# Evaluate our models\n",
+    "rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(xg_predictions))).round(2)\n",
+    "\n",
+    "\n",
+    "model_result = pd.DataFrame([['XGBoost', rmsle]], columns = ['Model', 'RMSLE'])\n",
+    "results = pd.concat([results, model_result], axis=1)\n",
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extra Trees Regression Model\n",
+    "#sg = ExtraTreesRegressor(n_estimators=100, random_state=42)\n",
+    "#sg.fit(X_train, y_train)\n",
+    "\n",
+    "# Make prediction on X_test\n",
+    "#sg_predictions = sg.predict(X_test)\n",
+    "\n",
+    "\n",
+    "# Evaluate our models\n",
+    "#rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(sg_predictions))).round(2)\n",
+    "\n",
+    "\n",
+    "#model_results = pd.DataFrame([['Extra Tree', rmsle]], columns = ['Model', 'RMSLE'])\n",
+    "#results = pd.concat([results, model_results], axis=1)\n",
+    "#results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "best_model = xg\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set the destination path to the \"export\" directory\n",
+    "#destination = \".\"\n",
+    "\n",
+    "# create a dictionary to store the objects and their filenames\n",
+    "#models = {\"numerical_imputer\": numerical_imputer,\n",
+    "#          \"categorical_imputer\": categorical_imputer,\n",
+    "#          \"scaler\": scaler,\n",
+    "#          \"le_family\": le_family,\n",
+    "#          \"le_holiday_type\": le_holiday_type,\n",
+    "#          \"le_city\": le_city,\n",
+    "#          \"Final_model\": best_model}\n",
+    "\n",
+    "# loop through the models and save them using joblib.dump()\n",
+    "#for name, model in models.items():\n",
+    "#    dump(model, os.path.join(destination, f\"{name}.joblib\"), compress=(\"lzma\", 5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set the destination path to the \"export\" directory\n",
+    "destination = \".\"\n",
+    "\n",
+    "# create a dictionary to store the objects and their filenames\n",
+    "models = {\"Best_model\": best_model}\n",
+    "\n",
+    "# loop through the models and save them using joblib.dump()\n",
+    "for name, model in models.items():\n",
+    "    dump(model, os.path.join(destination, f\"{name}.joblib\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Identify numeric and non-numeric columns\n",
+    "#num_cols = X.select_dtypes(include=[np.number]).columns.tolist()\n",
+    "#cat_cols = X.select_dtypes(exclude=[np.number]).columns.tolist()\n",
+    "\n",
+    "# Creating imputer variables\n",
+    "#numerical_imputer = SimpleImputer(strategy = \"mean\")\n",
+    "#categorical_imputer = SimpleImputer(strategy = \"most_frequent\")\n",
+    "\n",
+    "#X_cat = X[cat_cols].copy()\n",
+    "#X_num = X[num_cols].copy()\n",
+    "\n",
+    "\n",
+    "# Fitting the Imputer\n",
+    "#X_cat_imputed = categorical_imputer.fit_transform(X_cat)\n",
+    "#X_num_imputed = numerical_imputer.fit_transform(X_num)\n",
+    "\n",
+    "# Convert NumPy arrays to DataFrames\n",
+    "#X_cat_imputed = pd.DataFrame(X_cat_imputed, columns=cat_cols)\n",
+    "#X_num_imputed = pd.DataFrame(X_num_imputed, columns=num_cols)\n",
+    "\n",
+    "\n",
+    "#scaler = StandardScaler()\n",
+    "\n",
+    "#X_num_scaled = scaler.fit_transform(X_num_imputed)\n",
+    "#X_num_sc = pd.DataFrame(X_num_scaled, columns = num_cols)\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Concatenate the imputed dataframes\n",
+    "#X = pd.concat([X_num_sc, X_cat_imputed], axis=1)\n",
+    "\n",
+    "#le_family = LabelEncoder()\n",
+    "#X['family'] = le_family.fit_transform(X['family'])\n",
+    "\n",
+    "#le_holiday_type = LabelEncoder()\n",
+    "#X['holiday_type'] = le_holiday_type.fit_transform(X['holiday_type'])\n",
+    "\n",
+    "#le_city = LabelEncoder()\n",
+    "#X['city'] = le_city.fit_transform(X['city'])\n",
+    "\n",
+    "#X.info()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

requirements .txt ADDED Viewed

	@@ -0,0 +1,12 @@

+google_api_python_client==2.84.0
+google_auth_oauthlib==1.0.0
+gradio==3.35.2
+joblib==1.2.0
+matplotlib==3.7.1
+numpy==1.22.4
+pandas==1.5.3
+Pillow==8.4.0
+Pillow==9.5.0
+protobuf==3.20.3
+scikit_learn==1.2.2
+streamlit==1.24.0