slickdata commited on
Commit
2520f96
·
1 Parent(s): fafefa5

upload other files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ R2data.csv filter=lfs diff=lfs merge=lfs -text
Best_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bb07da03faa9a032414ed0f3fbc7a9842e6267d1e7e32b5ea6564478d12c68d
3
+ size 466109
R2data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cb9d18ea37db2a26cc7689b1c46d40d959555c35c55924b08317fdb0b37591
3
+ size 30441540
explore_page.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+
5
+ def show_explore_page():
6
+ st.title("Favorita Stores Sales Explore Predict Page")
7
+
8
+
predict_page.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.impute import SimpleImputer
6
+ from xgboost import XGBRegressor
7
+ from sklearn.preprocessing import LabelEncoder
8
+ from sklearn.preprocessing import StandardScaler
9
+ import joblib
10
+
11
+
12
+
13
+ # Load the numerical imputer
14
+ #num_imputer = joblib.load("numerical_imputer.joblib")
15
+
16
+ # Load the categorical imputer
17
+ #cat_imputer = joblib.load("categorical_imputer.joblib")
18
+
19
+ # Load the scaler
20
+ #scaler = joblib.load("scaler.joblib")
21
+
22
+ # Load the label encoder for 'family' feature
23
+ #le_family = joblib.load("le_family.joblib")
24
+
25
+ # Load the label encoder for 'holiday_type' feature
26
+ #le_holiday_type = joblib.load("le_holiday_type.joblib")
27
+
28
+ # Load the label encoder for 'city' feature
29
+ #le_city = joblib.load("le_city.joblib")
30
+
31
+ # Load the final model
32
+ regressor = joblib.load("Best_model.joblib")
33
+
34
+
35
+
36
+ #@st.cache_resource()
37
+ def show_predict_page():
38
+ # Add a title and subtitle
39
+ st.write("<center><h1>Predicting Sales App</h1></center>", unsafe_allow_html=True)
40
+
41
+
42
+ # Add a subtitle or description
43
+ st.write("This app predict sales by the using machine learning, based on certain input parameters. Simply enter the required information and click 'Predict' to get a sales prediction!")
44
+
45
+ st.subheader("Enter the following details to predict sales")
46
+
47
+ input_data = {
48
+ 'store_nbr': st.slider("store_nbr", step=1, min_value=0, max_value=54),
49
+ 'onpromotion': st.number_input("onpromotion, 0 - 800", min_value=0, max_value=800),
50
+ 'transactions': st.number_input("Number of Transactions, 0 - 10000", min_value=0, max_value=10000),
51
+ 'oil_price': st.number_input("oil_price, 1 - 200", step=1, min_value=0, max_value=200),
52
+ 'cluster': st.slider("cluster", step=1, min_value=0, max_value=17),
53
+ 'day': st.slider("day", 1, 31, 1),
54
+ 'year': st.selectbox("year", [1970]),
55
+ 'month': st.slider("month", 1, 12, 1),
56
+ #'dayofmonth': st.slider("dayofmonth", 1, 31, 1),
57
+ #'dayofweek': st.slider("dayofweek, 0=Sun and 6=Sat", step=1, min_value=1, max_value=6),
58
+ 'family': st.selectbox("products", ['AUTOMOTIVE', 'Personal Care', 'Beverages', 'STATIONERY', 'Food', 'CLEANING', 'HARDWARE', 'Home and Kitchen', 'Clothing', 'PET SUPPLIES', 'ELECTRONICS']),
59
+ 'holiday_type': st.selectbox("holiday_type", ['Workday', 'holiday']),
60
+ 'city': st.selectbox("City", ['Salinas', 'Quito', 'Cayambe', 'Latacunga', 'Riobamba', 'Ibarra', 'Santo Domingo', 'Guaranda', 'Ambato', 'Guayaquil', 'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Cuenca', 'Loja', 'Machala', 'Esmeraldas', 'El Carmen', 'Libertad', 'Manta', 'Puyo'])
61
+ }
62
+
63
+ # Create a button to make a prediction
64
+
65
+ if st.button("Predict", key="predict_button", help="Click to make a prediction."):
66
+ # Convert the input data to a pandas DataFrame
67
+ input_df = pd.DataFrame([input_data])
68
+
69
+
70
+ # Selecting categorical and numerical columns separately
71
+ # cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
72
+ # num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']
73
+
74
+
75
+ # Apply the imputers
76
+ # input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
77
+ # input_df_imputed_num = num_imputer.transform(input_df[num_columns])
78
+
79
+ # Convert the NumPy arrays to DataFrames
80
+ # input_df_imputed_cat = pd.DataFrame(input_df_imputed_cat, columns=cat_columns)
81
+ # input_df_imputed_num = pd.DataFrame(input_df_imputed_num, columns=num_columns)
82
+
83
+
84
+ # Scale the numerical columns
85
+ # input_df_scaled = scaler.transform(input_df_imputed_num)
86
+ # input_scaled_df = pd.DataFrame(input_df_scaled , columns = num_columns)
87
+
88
+ # input_df_imputed = pd.concat([input_df_imputed_cat, input_scaled_df], axis=1)
89
+
90
+ # Encode the categorical columns
91
+ # Encode the categorical columns
92
+ # input_df_imputed['family'] = le_family.transform(input_df_imputed['family'])
93
+ # input_df_imputed['holiday_type'] = le_holiday_type.transform(input_df_imputed['holiday_type'])
94
+ # input_df_imputed['city'] = le_city.transform(input_df_imputed['city'])
95
+
96
+
97
+ #input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat))
98
+ #input_encoded_df.columns = input_encoded_df.columns.astype(str)
99
+
100
+
101
+ #joining the cat encoded and num scaled
102
+ # final_df = input_df_imputed
103
+
104
+ # Make a prediction
105
+ prediction = round(regressor.predict(input_df)[0], 2)
106
+
107
+
108
+ # Display the prediction
109
+ #st.write(f"The predicted sales are: {prediction}.")
110
+
111
+ # Display the prediction
112
+ st.subheader("Sales Prediction")
113
+ st.write("The predicted sales for the company is:", prediction)
reg_notebook.ipynb ADDED
@@ -0,0 +1,643 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# Data handling\n",
10
+ "import pandas as pd\n",
11
+ "import numpy as np\n",
12
+ "\n",
13
+ "# Vizualisation (Matplotlib, Plotly, Seaborn, etc. )\n",
14
+ "import matplotlib.pyplot as plt\n",
15
+ "# EDA (pandas-profiling, etc. )\n",
16
+ "...\n",
17
+ "\n",
18
+ "# Feature Processing (Scikit-learn processing, etc. )\n",
19
+ "from sklearn.metrics import mean_squared_error, mean_squared_log_error\n",
20
+ "\n",
21
+ "# Machine Learning (Scikit-learn Estimators, Catboost, LightGBM, etc. )\n",
22
+ "from sklearn.preprocessing import LabelEncoder\n",
23
+ "from sklearn.preprocessing import StandardScaler\n",
24
+ "from sklearn.model_selection import train_test_split\n",
25
+ "from sklearn.impute import SimpleImputer\n",
26
+ "from sklearn.ensemble import ExtraTreesRegressor\n",
27
+ "from xgboost import XGBRegressor\n",
28
+ "from sklearn.ensemble import GradientBoostingRegressor\n",
29
+ "from sklearn.preprocessing import OneHotEncoder\n",
30
+ "from sklearn.pipeline import Pipeline\n",
31
+ "from sklearn.compose import ColumnTransformer\n",
32
+ "\n",
33
+ "\n",
34
+ "\n",
35
+ "# Other packages\n",
36
+ "from joblib import dump\n",
37
+ "import os\n",
38
+ "import pickle\n"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 2,
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "data": {
48
+ "text/html": [
49
+ "<div>\n",
50
+ "<style scoped>\n",
51
+ " .dataframe tbody tr th:only-of-type {\n",
52
+ " vertical-align: middle;\n",
53
+ " }\n",
54
+ "\n",
55
+ " .dataframe tbody tr th {\n",
56
+ " vertical-align: top;\n",
57
+ " }\n",
58
+ "\n",
59
+ " .dataframe thead th {\n",
60
+ " text-align: right;\n",
61
+ " }\n",
62
+ "</style>\n",
63
+ "<table border=\"1\" class=\"dataframe\">\n",
64
+ " <thead>\n",
65
+ " <tr style=\"text-align: right;\">\n",
66
+ " <th></th>\n",
67
+ " <th>store_nbr</th>\n",
68
+ " <th>family</th>\n",
69
+ " <th>sales</th>\n",
70
+ " <th>onpromotion</th>\n",
71
+ " <th>transactions</th>\n",
72
+ " <th>holiday_type</th>\n",
73
+ " <th>oil_price</th>\n",
74
+ " <th>city</th>\n",
75
+ " <th>cluster</th>\n",
76
+ " <th>day</th>\n",
77
+ " <th>year</th>\n",
78
+ " <th>month</th>\n",
79
+ " </tr>\n",
80
+ " <tr>\n",
81
+ " <th>date</th>\n",
82
+ " <th></th>\n",
83
+ " <th></th>\n",
84
+ " <th></th>\n",
85
+ " <th></th>\n",
86
+ " <th></th>\n",
87
+ " <th></th>\n",
88
+ " <th></th>\n",
89
+ " <th></th>\n",
90
+ " <th></th>\n",
91
+ " <th></th>\n",
92
+ " <th></th>\n",
93
+ " <th></th>\n",
94
+ " </tr>\n",
95
+ " </thead>\n",
96
+ " <tbody>\n",
97
+ " <tr>\n",
98
+ " <th>1970-01-01 00:00:00.000002013</th>\n",
99
+ " <td>25</td>\n",
100
+ " <td>AUTOMOTIVE</td>\n",
101
+ " <td>0.0</td>\n",
102
+ " <td>0</td>\n",
103
+ " <td>770</td>\n",
104
+ " <td>Workday</td>\n",
105
+ " <td>93.14</td>\n",
106
+ " <td>Salinas</td>\n",
107
+ " <td>1</td>\n",
108
+ " <td>1</td>\n",
109
+ " <td>1970</td>\n",
110
+ " <td>1</td>\n",
111
+ " </tr>\n",
112
+ " <tr>\n",
113
+ " <th>1970-01-01 00:00:00.000002013</th>\n",
114
+ " <td>25</td>\n",
115
+ " <td>Personal Care</td>\n",
116
+ " <td>0.0</td>\n",
117
+ " <td>0</td>\n",
118
+ " <td>770</td>\n",
119
+ " <td>Workday</td>\n",
120
+ " <td>93.14</td>\n",
121
+ " <td>Salinas</td>\n",
122
+ " <td>1</td>\n",
123
+ " <td>1</td>\n",
124
+ " <td>1970</td>\n",
125
+ " <td>1</td>\n",
126
+ " </tr>\n",
127
+ " <tr>\n",
128
+ " <th>1970-01-01 00:00:00.000002013</th>\n",
129
+ " <td>25</td>\n",
130
+ " <td>Personal Care</td>\n",
131
+ " <td>2.0</td>\n",
132
+ " <td>0</td>\n",
133
+ " <td>770</td>\n",
134
+ " <td>Workday</td>\n",
135
+ " <td>93.14</td>\n",
136
+ " <td>Salinas</td>\n",
137
+ " <td>1</td>\n",
138
+ " <td>1</td>\n",
139
+ " <td>1970</td>\n",
140
+ " <td>1</td>\n",
141
+ " </tr>\n",
142
+ " <tr>\n",
143
+ " <th>1970-01-01 00:00:00.000002013</th>\n",
144
+ " <td>25</td>\n",
145
+ " <td>Beverages</td>\n",
146
+ " <td>810.0</td>\n",
147
+ " <td>0</td>\n",
148
+ " <td>770</td>\n",
149
+ " <td>Workday</td>\n",
150
+ " <td>93.14</td>\n",
151
+ " <td>Salinas</td>\n",
152
+ " <td>1</td>\n",
153
+ " <td>1</td>\n",
154
+ " <td>1970</td>\n",
155
+ " <td>1</td>\n",
156
+ " </tr>\n",
157
+ " <tr>\n",
158
+ " <th>1970-01-01 00:00:00.000002013</th>\n",
159
+ " <td>25</td>\n",
160
+ " <td>STATIONERY</td>\n",
161
+ " <td>0.0</td>\n",
162
+ " <td>0</td>\n",
163
+ " <td>770</td>\n",
164
+ " <td>Workday</td>\n",
165
+ " <td>93.14</td>\n",
166
+ " <td>Salinas</td>\n",
167
+ " <td>1</td>\n",
168
+ " <td>1</td>\n",
169
+ " <td>1970</td>\n",
170
+ " <td>1</td>\n",
171
+ " </tr>\n",
172
+ " </tbody>\n",
173
+ "</table>\n",
174
+ "</div>"
175
+ ],
176
+ "text/plain": [
177
+ " store_nbr family sales onpromotion \\\n",
178
+ "date \n",
179
+ "1970-01-01 00:00:00.000002013 25 AUTOMOTIVE 0.0 0 \n",
180
+ "1970-01-01 00:00:00.000002013 25 Personal Care 0.0 0 \n",
181
+ "1970-01-01 00:00:00.000002013 25 Personal Care 2.0 0 \n",
182
+ "1970-01-01 00:00:00.000002013 25 Beverages 810.0 0 \n",
183
+ "1970-01-01 00:00:00.000002013 25 STATIONERY 0.0 0 \n",
184
+ "\n",
185
+ " transactions holiday_type oil_price city \\\n",
186
+ "date \n",
187
+ "1970-01-01 00:00:00.000002013 770 Workday 93.14 Salinas \n",
188
+ "1970-01-01 00:00:00.000002013 770 Workday 93.14 Salinas \n",
189
+ "1970-01-01 00:00:00.000002013 770 Workday 93.14 Salinas \n",
190
+ "1970-01-01 00:00:00.000002013 770 Workday 93.14 Salinas \n",
191
+ "1970-01-01 00:00:00.000002013 770 Workday 93.14 Salinas \n",
192
+ "\n",
193
+ " cluster day year month \n",
194
+ "date \n",
195
+ "1970-01-01 00:00:00.000002013 1 1 1970 1 \n",
196
+ "1970-01-01 00:00:00.000002013 1 1 1970 1 \n",
197
+ "1970-01-01 00:00:00.000002013 1 1 1970 1 \n",
198
+ "1970-01-01 00:00:00.000002013 1 1 1970 1 \n",
199
+ "1970-01-01 00:00:00.000002013 1 1 1970 1 "
200
+ ]
201
+ },
202
+ "execution_count": 2,
203
+ "metadata": {},
204
+ "output_type": "execute_result"
205
+ }
206
+ ],
207
+ "source": [
208
+ "data = pd.read_csv('R2data.csv')\n",
209
+ "data.drop(columns=['Unnamed: 0'], inplace=True)\n",
210
+ "\n",
211
+ "# Convert the date column to a datetime object\n",
212
+ "data['date'] = pd.to_datetime(data['date'])\n",
213
+ "\n",
214
+ "# Set the date column as the index\n",
215
+ "data = data.set_index('date')\n",
216
+ "data.head()"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": 3,
222
+ "metadata": {},
223
+ "outputs": [],
224
+ "source": [
225
+ "y = data['sales'] # Target Variable\n",
226
+ "X = data.drop('sales', axis = 1) # Independent Variable"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": 4,
232
+ "metadata": {},
233
+ "outputs": [],
234
+ "source": [
235
+ "numeric_transformer = Pipeline(steps = [('num_imputer',SimpleImputer(strategy = 'mean')),('scaler',StandardScaler())])\n",
236
+ "categorical_transformer = Pipeline(steps = [('cat_imputer',SimpleImputer(strategy ='most_frequent')),('one-hot',OneHotEncoder(handle_unknown='ignore', sparse=False))])"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": 5,
242
+ "metadata": {},
243
+ "outputs": [],
244
+ "source": [
245
+ "categorical_feature =[\"family\", \"city\", \"holiday_type\"]\n",
246
+ "numeric_feature = ['store_nbr', 'onpromotion', 'transactions', 'oil_price', 'cluster','year', 'month']\n",
247
+ "preprocessor = ColumnTransformer(transformers=[('numeric_transformer',numeric_transformer,numeric_feature),('categorical_transformer',categorical_transformer,categorical_feature)],remainder='drop')"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "execution_count": 6,
253
+ "metadata": {},
254
+ "outputs": [],
255
+ "source": [
256
+ "# Split the data into training and test sets\n",
257
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": 7,
263
+ "metadata": {},
264
+ "outputs": [
265
+ {
266
+ "name": "stderr",
267
+ "output_type": "stream",
268
+ "text": [
269
+ "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:972: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n",
270
+ " warnings.warn(\n"
271
+ ]
272
+ }
273
+ ],
274
+ "source": [
275
+ "rf = GradientBoostingRegressor(n_estimators=100, random_state=42)\n",
276
+ "\n",
277
+ "rf = Pipeline(steps=[('preprocessor',preprocessor),('estimator',rf)])\n",
278
+ "rf.fit(X_train, y_train)\n",
279
+ "\n",
280
+ "# Make prediction on X_test\n",
281
+ "rf_predictions = rf.predict(X_test)\n",
282
+ "\n",
283
+ "\n",
284
+ "# Evaluate our models\n",
285
+ "rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(rf_predictions))).round(2)\n",
286
+ "\n",
287
+ "\n",
288
+ "results = pd.DataFrame([['Gradient Boosting', rmsle]], columns = ['Model', 'RMSLE'])"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "code",
293
+ "execution_count": 8,
294
+ "metadata": {},
295
+ "outputs": [],
296
+ "source": [
297
+ "# Gradient Boosting Regression Model\n",
298
+ "#rf = GradientBoostingRegressor(n_estimators=100, random_state=42)\n",
299
+ "#rf.fit(X_train, y_train)\n",
300
+ "\n",
301
+ "# Make prediction on X_test\n",
302
+ "#rf_predictions = rf.predict(X_test)\n",
303
+ "\n",
304
+ "\n",
305
+ "# Evaluate our models\n",
306
+ "#rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(rf_predictions))).round(2)\n",
307
+ "\n",
308
+ "\n",
309
+ "#results = pd.DataFrame([['Gradient Boosting', rmsle]], columns = ['Model', 'RMSLE'])"
310
+ ]
311
+ },
312
+ {
313
+ "cell_type": "code",
314
+ "execution_count": 9,
315
+ "metadata": {},
316
+ "outputs": [
317
+ {
318
+ "name": "stderr",
319
+ "output_type": "stream",
320
+ "text": [
321
+ "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:972: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n",
322
+ " warnings.warn(\n"
323
+ ]
324
+ },
325
+ {
326
+ "data": {
327
+ "text/html": [
328
+ "<div>\n",
329
+ "<style scoped>\n",
330
+ " .dataframe tbody tr th:only-of-type {\n",
331
+ " vertical-align: middle;\n",
332
+ " }\n",
333
+ "\n",
334
+ " .dataframe tbody tr th {\n",
335
+ " vertical-align: top;\n",
336
+ " }\n",
337
+ "\n",
338
+ " .dataframe thead th {\n",
339
+ " text-align: right;\n",
340
+ " }\n",
341
+ "</style>\n",
342
+ "<table border=\"1\" class=\"dataframe\">\n",
343
+ " <thead>\n",
344
+ " <tr style=\"text-align: right;\">\n",
345
+ " <th></th>\n",
346
+ " <th>Model</th>\n",
347
+ " <th>RMSLE</th>\n",
348
+ " <th>Model</th>\n",
349
+ " <th>RMSLE</th>\n",
350
+ " </tr>\n",
351
+ " </thead>\n",
352
+ " <tbody>\n",
353
+ " <tr>\n",
354
+ " <th>0</th>\n",
355
+ " <td>Gradient Boosting</td>\n",
356
+ " <td>2.48</td>\n",
357
+ " <td>Extra Tree</td>\n",
358
+ " <td>1.93</td>\n",
359
+ " </tr>\n",
360
+ " </tbody>\n",
361
+ "</table>\n",
362
+ "</div>"
363
+ ],
364
+ "text/plain": [
365
+ " Model RMSLE Model RMSLE\n",
366
+ "0 Gradient Boosting 2.48 Extra Tree 1.93"
367
+ ]
368
+ },
369
+ "execution_count": 9,
370
+ "metadata": {},
371
+ "output_type": "execute_result"
372
+ }
373
+ ],
374
+ "source": [
375
+ "# Extra Trees Regression Model\n",
376
+ "sg = ExtraTreesRegressor(n_estimators=100, random_state=42)\n",
377
+ "sg = Pipeline(steps=[('preprocessor',preprocessor),('estimator',sg)])\n",
378
+ "sg.fit(X_train, y_train)\n",
379
+ "\n",
380
+ "# Make prediction on X_test\n",
381
+ "sg_predictions = sg.predict(X_test)\n",
382
+ "\n",
383
+ "\n",
384
+ "# Evaluate our models\n",
385
+ "rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(sg_predictions))).round(2)\n",
386
+ "\n",
387
+ "\n",
388
+ "model_results = pd.DataFrame([['Extra Tree', rmsle]], columns = ['Model', 'RMSLE'])\n",
389
+ "results = pd.concat([results, model_results], axis=1)\n",
390
+ "results"
391
+ ]
392
+ },
393
+ {
394
+ "cell_type": "code",
395
+ "execution_count": 12,
396
+ "metadata": {},
397
+ "outputs": [
398
+ {
399
+ "name": "stderr",
400
+ "output_type": "stream",
401
+ "text": [
402
+ "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:972: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n",
403
+ " warnings.warn(\n"
404
+ ]
405
+ },
406
+ {
407
+ "data": {
408
+ "text/html": [
409
+ "<div>\n",
410
+ "<style scoped>\n",
411
+ " .dataframe tbody tr th:only-of-type {\n",
412
+ " vertical-align: middle;\n",
413
+ " }\n",
414
+ "\n",
415
+ " .dataframe tbody tr th {\n",
416
+ " vertical-align: top;\n",
417
+ " }\n",
418
+ "\n",
419
+ " .dataframe thead th {\n",
420
+ " text-align: right;\n",
421
+ " }\n",
422
+ "</style>\n",
423
+ "<table border=\"1\" class=\"dataframe\">\n",
424
+ " <thead>\n",
425
+ " <tr style=\"text-align: right;\">\n",
426
+ " <th></th>\n",
427
+ " <th>Model</th>\n",
428
+ " <th>RMSLE</th>\n",
429
+ " <th>Model</th>\n",
430
+ " <th>RMSLE</th>\n",
431
+ " <th>Model</th>\n",
432
+ " <th>RMSLE</th>\n",
433
+ " <th>Model</th>\n",
434
+ " <th>RMSLE</th>\n",
435
+ " </tr>\n",
436
+ " </thead>\n",
437
+ " <tbody>\n",
438
+ " <tr>\n",
439
+ " <th>0</th>\n",
440
+ " <td>Gradient Boosting</td>\n",
441
+ " <td>2.48</td>\n",
442
+ " <td>Extra Tree</td>\n",
443
+ " <td>1.93</td>\n",
444
+ " <td>Extra Tree</td>\n",
445
+ " <td>1.93</td>\n",
446
+ " <td>XGBoost</td>\n",
447
+ " <td>2.15</td>\n",
448
+ " </tr>\n",
449
+ " </tbody>\n",
450
+ "</table>\n",
451
+ "</div>"
452
+ ],
453
+ "text/plain": [
454
+ " Model RMSLE Model RMSLE Model RMSLE Model \\\n",
455
+ "0 Gradient Boosting 2.48 Extra Tree 1.93 Extra Tree 1.93 XGBoost \n",
456
+ "\n",
457
+ " RMSLE \n",
458
+ "0 2.15 "
459
+ ]
460
+ },
461
+ "execution_count": 12,
462
+ "metadata": {},
463
+ "output_type": "execute_result"
464
+ }
465
+ ],
466
+ "source": [
467
+ "# Extra Trees Regression Model\n",
468
+ "xg = XGBRegressor(n_estimators=100, random_state=42)\n",
469
+ "xg = Pipeline(steps=[('preprocessor',preprocessor),('estimator',xg)])\n",
470
+ "xg.fit(X_train, y_train)\n",
471
+ "\n",
472
+ "# Make prediction on X_test\n",
473
+ "xg_predictions = xg.predict(X_test)\n",
474
+ "\n",
475
+ "\n",
476
+ "# Evaluate our models\n",
477
+ "rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(xg_predictions))).round(2)\n",
478
+ "\n",
479
+ "\n",
480
+ "model_result = pd.DataFrame([['XGBoost', rmsle]], columns = ['Model', 'RMSLE'])\n",
481
+ "results = pd.concat([results, model_result], axis=1)\n",
482
+ "results"
483
+ ]
484
+ },
485
+ {
486
+ "cell_type": "code",
487
+ "execution_count": null,
488
+ "metadata": {},
489
+ "outputs": [],
490
+ "source": []
491
+ },
492
+ {
493
+ "cell_type": "code",
494
+ "execution_count": 11,
495
+ "metadata": {},
496
+ "outputs": [],
497
+ "source": [
498
+ "# Extra Trees Regression Model\n",
499
+ "#sg = ExtraTreesRegressor(n_estimators=100, random_state=42)\n",
500
+ "#sg.fit(X_train, y_train)\n",
501
+ "\n",
502
+ "# Make prediction on X_test\n",
503
+ "#sg_predictions = sg.predict(X_test)\n",
504
+ "\n",
505
+ "\n",
506
+ "# Evaluate our models\n",
507
+ "#rmsle = np.sqrt(mean_squared_log_error(abs(y_test), abs(sg_predictions))).round(2)\n",
508
+ "\n",
509
+ "\n",
510
+ "#model_results = pd.DataFrame([['Extra Tree', rmsle]], columns = ['Model', 'RMSLE'])\n",
511
+ "#results = pd.concat([results, model_results], axis=1)\n",
512
+ "#results"
513
+ ]
514
+ },
515
+ {
516
+ "cell_type": "code",
517
+ "execution_count": 13,
518
+ "metadata": {},
519
+ "outputs": [],
520
+ "source": [
521
+ "best_model = xg\n"
522
+ ]
523
+ },
524
+ {
525
+ "cell_type": "code",
526
+ "execution_count": 13,
527
+ "metadata": {},
528
+ "outputs": [],
529
+ "source": [
530
+ "# set the destination path to the \"export\" directory\n",
531
+ "#destination = \".\"\n",
532
+ "\n",
533
+ "# create a dictionary to store the objects and their filenames\n",
534
+ "#models = {\"numerical_imputer\": numerical_imputer,\n",
535
+ "# \"categorical_imputer\": categorical_imputer,\n",
536
+ "# \"scaler\": scaler,\n",
537
+ "# \"le_family\": le_family,\n",
538
+ "# \"le_holiday_type\": le_holiday_type,\n",
539
+ "# \"le_city\": le_city,\n",
540
+ "# \"Final_model\": best_model}\n",
541
+ "\n",
542
+ "# loop through the models and save them using joblib.dump()\n",
543
+ "#for name, model in models.items():\n",
544
+ "# dump(model, os.path.join(destination, f\"{name}.joblib\"), compress=(\"lzma\", 5))"
545
+ ]
546
+ },
547
+ {
548
+ "cell_type": "code",
549
+ "execution_count": 14,
550
+ "metadata": {},
551
+ "outputs": [],
552
+ "source": [
553
+ "# set the destination path to the \"export\" directory\n",
554
+ "destination = \".\"\n",
555
+ "\n",
556
+ "# create a dictionary to store the objects and their filenames\n",
557
+ "models = {\"Best_model\": best_model}\n",
558
+ "\n",
559
+ "# loop through the models and save them using joblib.dump()\n",
560
+ "for name, model in models.items():\n",
561
+ " dump(model, os.path.join(destination, f\"{name}.joblib\"))"
562
+ ]
563
+ },
564
+ {
565
+ "cell_type": "code",
566
+ "execution_count": null,
567
+ "metadata": {},
568
+ "outputs": [],
569
+ "source": []
570
+ },
571
+ {
572
+ "cell_type": "code",
573
+ "execution_count": 15,
574
+ "metadata": {},
575
+ "outputs": [],
576
+ "source": [
577
+ "# Identify numeric and non-numeric columns\n",
578
+ "#num_cols = X.select_dtypes(include=[np.number]).columns.tolist()\n",
579
+ "#cat_cols = X.select_dtypes(exclude=[np.number]).columns.tolist()\n",
580
+ "\n",
581
+ "# Creating imputer variables\n",
582
+ "#numerical_imputer = SimpleImputer(strategy = \"mean\")\n",
583
+ "#categorical_imputer = SimpleImputer(strategy = \"most_frequent\")\n",
584
+ "\n",
585
+ "#X_cat = X[cat_cols].copy()\n",
586
+ "#X_num = X[num_cols].copy()\n",
587
+ "\n",
588
+ "\n",
589
+ "# Fitting the Imputer\n",
590
+ "#X_cat_imputed = categorical_imputer.fit_transform(X_cat)\n",
591
+ "#X_num_imputed = numerical_imputer.fit_transform(X_num)\n",
592
+ "\n",
593
+ "# Convert NumPy arrays to DataFrames\n",
594
+ "#X_cat_imputed = pd.DataFrame(X_cat_imputed, columns=cat_cols)\n",
595
+ "#X_num_imputed = pd.DataFrame(X_num_imputed, columns=num_cols)\n",
596
+ "\n",
597
+ "\n",
598
+ "#scaler = StandardScaler()\n",
599
+ "\n",
600
+ "#X_num_scaled = scaler.fit_transform(X_num_imputed)\n",
601
+ "#X_num_sc = pd.DataFrame(X_num_scaled, columns = num_cols)\n",
602
+ "\n",
603
+ "\n",
604
+ "\n",
605
+ "# Concatenate the imputed dataframes\n",
606
+ "#X = pd.concat([X_num_sc, X_cat_imputed], axis=1)\n",
607
+ "\n",
608
+ "#le_family = LabelEncoder()\n",
609
+ "#X['family'] = le_family.fit_transform(X['family'])\n",
610
+ "\n",
611
+ "#le_holiday_type = LabelEncoder()\n",
612
+ "#X['holiday_type'] = le_holiday_type.fit_transform(X['holiday_type'])\n",
613
+ "\n",
614
+ "#le_city = LabelEncoder()\n",
615
+ "#X['city'] = le_city.fit_transform(X['city'])\n",
616
+ "\n",
617
+ "#X.info()"
618
+ ]
619
+ }
620
+ ],
621
+ "metadata": {
622
+ "kernelspec": {
623
+ "display_name": "Python 3",
624
+ "language": "python",
625
+ "name": "python3"
626
+ },
627
+ "language_info": {
628
+ "codemirror_mode": {
629
+ "name": "ipython",
630
+ "version": 3
631
+ },
632
+ "file_extension": ".py",
633
+ "mimetype": "text/x-python",
634
+ "name": "python",
635
+ "nbconvert_exporter": "python",
636
+ "pygments_lexer": "ipython3",
637
+ "version": "3.11.4"
638
+ },
639
+ "orig_nbformat": 4
640
+ },
641
+ "nbformat": 4,
642
+ "nbformat_minor": 2
643
+ }
requirements .txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ google_api_python_client==2.84.0
2
+ google_auth_oauthlib==1.0.0
3
+ gradio==3.35.2
4
+ joblib==1.2.0
5
+ matplotlib==3.7.1
6
+ numpy==1.22.4
7
+ pandas==1.5.3
8
+ Pillow==8.4.0
9
+ Pillow==9.5.0
10
+ protobuf==3.20.3
11
+ scikit_learn==1.2.2
12
+ streamlit==1.24.0