WebashalarForML commited on
Commit
bc872ec
·
verified ·
1 Parent(s): 44073b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -58
app.py CHANGED
@@ -2,13 +2,14 @@ from flask import Flask, render_template, request, redirect, url_for, flash, sen
2
  import os
3
  import pandas as pd
4
  from werkzeug.utils import secure_filename
5
- from joblib import load
6
  import numpy as np
7
  from sklearn.preprocessing import LabelEncoder
8
  from time import time
9
  from huggingface_hub import hf_hub_download
10
  import pickle
11
- import os
 
12
 
13
  app = Flask(__name__)
14
 
@@ -22,22 +23,25 @@ MODEL_FOLDER = "models/"
22
 
23
  # Define the model directory and label encoder directory
24
  MODEL_DIR = r'./Model'
25
- LABEL_ENOCDER_DIR = r'./Label_encoders'
26
 
27
  # Global file names for outputs; these will be updated per prediction.
28
- PRED_OUTPUT_FILE = "data/pred_output.csv"
29
- CLASS_OUTPUT_FILE = "data/class_output.csv"
 
30
 
31
  ALLOWED_EXTENSIONS = {'csv', 'xlsx'}
32
 
 
33
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
34
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
35
 
36
- app.config['DATA_FOLDER'] = UPLOAD_FOLDER
37
  os.makedirs(app.config['DATA_FOLDER'], exist_ok=True)
 
38
  os.makedirs("data", exist_ok=True)
39
 
40
- app.config['MODEL_FOLDER'] = UPLOAD_FOLDER
41
  os.makedirs(app.config['MODEL_FOLDER'], exist_ok=True)
42
 
43
 
@@ -45,14 +49,12 @@ os.makedirs(app.config['MODEL_FOLDER'], exist_ok=True)
45
  # Load Models and Label Encoders
46
  # ------------------------------
47
 
48
- # prediction analysis
49
- # Download the model file to the specified location
50
  file_path_1 = hf_hub_download(
51
  repo_id="WebashalarForML/Diamond_model_",
52
  filename="models_list/mkble/StackingRegressor_best_pipeline_mkble_0_to_1.01.pkl",
53
  cache_dir=MODEL_FOLDER
54
  )
55
-
56
  with open(file_path_1, "rb") as f:
57
  makable_model = pickle.load(f)
58
 
@@ -61,7 +63,6 @@ file_path_2 = hf_hub_download(
61
  filename="models_list/grd/StackingRegressor_best_pipeline_grd_0_to_1.01.pkl",
62
  cache_dir=MODEL_FOLDER
63
  )
64
-
65
  with open(file_path_2, "rb") as f:
66
  grade_model = pickle.load(f)
67
 
@@ -70,7 +71,6 @@ file_path_3 = hf_hub_download(
70
  filename="models_list/bygrad/StackingRegressor_best_pipeline_bygrad_0_to_1.01.pkl",
71
  cache_dir=MODEL_FOLDER
72
  )
73
-
74
  with open(file_path_3, "rb") as f:
75
  bygrade_model = pickle.load(f)
76
 
@@ -79,16 +79,10 @@ file_path_4 = hf_hub_download(
79
  filename="models_list/gia/StackingRegressor_best_pipeline_gia_0_to_1.01.pkl",
80
  cache_dir=MODEL_FOLDER
81
  )
82
-
83
  with open(file_path_4, "rb") as f:
84
  gia_model = pickle.load(f)
85
 
86
- #gia_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_gia_price.joblib'))
87
- #grade_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_grade_price.joblib'))
88
- #bygrade_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_bygrade_price.joblib'))
89
- #makable_model = load(os.path.join(MODEL_DIR, 'linear_regression_model_makable_price.joblib'))
90
-
91
- # classifcation analysis
92
  col_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_col.joblib'))
93
  cts_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cts.joblib'))
94
  cut_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cut.joblib'))
@@ -112,20 +106,25 @@ wht_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegr
112
  open_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_open.joblib'))
113
  pav_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_pav.joblib'))
114
 
115
- encoder_list = ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo',
116
- 'EngNts', 'EngMikly', 'EngLab','EngBlk', 'EngWht', 'EngOpen','EngPav',
117
- 'Change_cts_value', 'Change_shape_value', 'Change_quality_value', 'Change_color_value',
118
- 'Change_cut_value', 'Change_Blk_Eng_to_Mkbl_value', 'Change_Wht_Eng_to_Mkbl_value',
119
- 'Change_Open_Eng_to_Mkbl_value', 'Change_Pav_Eng_to_Mkbl_value', 'Change_Blk_Eng_to_Grd_value',
120
- 'Change_Wht_Eng_to_Grd_value', 'Change_Open_Eng_to_Grd_value', 'Change_Pav_Eng_to_Grd_value',
121
- 'Change_Blk_Eng_to_ByGrd_value', 'Change_Wht_Eng_to_ByGrd_value', 'Change_Open_Eng_to_ByGrd_value',
122
- 'Change_Pav_Eng_to_ByGrd_value', 'Change_Blk_Eng_to_Gia_value', 'Change_Wht_Eng_to_Gia_value',
123
- 'Change_Open_Eng_to_Gia_value', 'Change_Pav_Eng_to_Gia_value']
124
-
 
 
 
 
125
  loaded_label_encoder = {}
 
126
  for val in encoder_list:
127
- encoder_path = os.path.join(LABEL_ENOCDER_DIR, f"label_encoder_{val}.joblib")
128
- loaded_label_encoder[val] = load(encoder_path)
129
 
130
  # ------------------------------
131
  # Utility: Allowed File Check
@@ -144,12 +143,12 @@ def index():
144
  def predict():
145
  if 'file' not in request.files:
146
  flash('No file part', 'error')
147
- return redirect(request.url)
148
 
149
  file = request.files['file']
150
  if file.filename == '':
151
  flash('No selected file', 'error')
152
- return redirect(request.url)
153
 
154
  if file and allowed_file(file.filename):
155
  filename = secure_filename(file.filename)
@@ -157,23 +156,27 @@ def predict():
157
  file.save(filepath)
158
 
159
  # Convert file to DataFrame
160
- if filename.endswith('.csv'):
161
- df = pd.read_csv(filepath)
162
- else:
163
- df = pd.read_excel(filepath)
 
 
 
 
164
 
165
  # Process the DataFrame and generate predictions and classification analysis.
166
  df_pred, dx_class = process_dataframe(df)
167
  if df_pred.empty:
168
- print("Processed prediction DataFrame is empty. Check the input file and processing logic.", "error")
169
- return redirect(request.url)
170
-
171
 
172
- # Save output files with a timestamp (you can also store in session if needed)
173
  current_date = pd.Timestamp.now().strftime("%Y-%m-%d")
 
174
  global PRED_OUTPUT_FILE, CLASS_OUTPUT_FILE
175
- PRED_OUTPUT_FILE = f'data/prediction_output_{current_date}.csv'
176
- CLASS_OUTPUT_FILE = f'data/classification_output_{current_date}.csv'
177
  df_pred.to_csv(PRED_OUTPUT_FILE, index=False)
178
  dx_class.to_csv(CLASS_OUTPUT_FILE, index=False)
179
 
@@ -181,11 +184,11 @@ def predict():
181
  return redirect(url_for('report_view', report_type='pred', page=1))
182
  else:
183
  flash('Invalid file type. Only CSV and Excel files are allowed.', 'error')
184
- return redirect(request.url)
185
 
186
  def process_dataframe(df):
187
  try:
188
- # Define the columns needed for two parts
189
  required_columns = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut',
190
  'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngAmt']
191
  required_columns_2 = required_columns + ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']
@@ -196,7 +199,11 @@ def process_dataframe(df):
196
 
197
  # Transform categorical columns for prediction DataFrame using the label encoders.
198
  for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']:
199
- df_pred[col] = loaded_label_encoder[col].transform(df_pred[col])
 
 
 
 
200
 
201
  # Update the classification DataFrame with the transformed prediction columns.
202
  for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']:
@@ -204,16 +211,19 @@ def process_dataframe(df):
204
 
205
  # Transform the extra columns in the classification DataFrame.
206
  for col in ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']:
207
- df_class[col] = loaded_label_encoder[col].transform(df_class[col])
 
 
 
 
208
 
209
- # Convert both DataFrames to float (or handle as needed).
210
  df_pred = df_pred.astype(float)
211
  df_class = df_class.astype(float)
212
 
213
  # -------------------------
214
  # Prediction Report Section
215
  # -------------------------
216
- # Use the prediction DataFrame for price predictions.
217
  x = df_pred.copy()
218
  df_pred['GIA_Predicted'] = gia_model.predict(x)
219
  df_pred['Grade_Predicted'] = grade_model.predict(x)
@@ -227,7 +237,6 @@ def process_dataframe(df):
227
  # -------------------------
228
  # Classification Report Section
229
  # -------------------------
230
- # For classification, use df_class (which has extra columns).
231
  x2 = df_class.copy()
232
  dx = df_pred.copy() # Start with the prediction data.
233
  dx['col_change'] = col_model.predict(x)
@@ -275,7 +284,8 @@ def process_dataframe(df):
275
  dx['Change_Open_Eng_to_Gia_value'] = loaded_label_encoder['Change_Open_Eng_to_Gia_value'].inverse_transform(dx['Change_Open_Eng_to_Gia_value'])
276
  dx['Change_Pav_Eng_to_Gia_value'] = loaded_label_encoder['Change_Pav_Eng_to_Gia_value'].inverse_transform(dx['Change_Pav_Eng_to_Gia_value'])
277
 
278
- return df_pred, dx.head(len(df_pred)) # Return full DataFrames for pagination later.
 
279
  except Exception as e:
280
  flash(f'Error processing file: {e}', 'error')
281
  return pd.DataFrame(), pd.DataFrame()
@@ -285,30 +295,26 @@ def process_dataframe(df):
285
  # ------------------------------
286
  @app.route('/report')
287
  def report_view():
288
- # Get query parameters: report_type (pred or class) and page number.
289
  report_type = request.args.get('report_type', 'pred')
290
  try:
291
  page = int(request.args.get('page', 1))
292
  except ValueError:
293
  page = 1
294
  per_page = 15 # records per page
295
-
296
  # Read the appropriate CSV file.
297
  if report_type == 'pred':
298
  df = pd.read_csv(PRED_OUTPUT_FILE)
299
  else:
300
  df = pd.read_csv(CLASS_OUTPUT_FILE)
301
-
302
- # Calculate pagination indices.
303
  start_idx = (page - 1) * per_page
304
  end_idx = start_idx + per_page
305
  total_records = len(df)
306
 
307
- # Slice the DataFrame for the current page.
308
  df_page = df.iloc[start_idx:end_idx]
309
  table_html = df_page.to_html(classes="data-table", index=False)
310
 
311
- # Determine if previous/next pages exist.
312
  has_prev = page > 1
313
  has_next = end_idx < total_records
314
 
@@ -320,7 +326,7 @@ def report_view():
320
  has_next=has_next)
321
 
322
  # ------------------------------
323
- # Download Routes (remain unchanged)
324
  # ------------------------------
325
  @app.route('/download_pred', methods=['GET'])
326
  def download_pred():
 
2
  import os
3
  import pandas as pd
4
  from werkzeug.utils import secure_filename
5
+ from joblib import load, dump
6
  import numpy as np
7
  from sklearn.preprocessing import LabelEncoder
8
  from time import time
9
  from huggingface_hub import hf_hub_download
10
  import pickle
11
+ import uuid
12
+ from pathlib import Path
13
 
14
  app = Flask(__name__)
15
 
 
23
 
24
  # Define the model directory and label encoder directory
25
  MODEL_DIR = r'./Model'
26
+ LABEL_ENCODER_DIR = r'./Label_encoders' # Renamed for clarity
27
 
28
  # Global file names for outputs; these will be updated per prediction.
29
+ # Note: we now include a unique id to avoid overwriting.
30
+ PRED_OUTPUT_FILE = None
31
+ CLASS_OUTPUT_FILE = None
32
 
33
  ALLOWED_EXTENSIONS = {'csv', 'xlsx'}
34
 
35
+ # Create directories if they do not exist.
36
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
37
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
38
 
39
+ app.config['DATA_FOLDER'] = DATA_FOLDER
40
  os.makedirs(app.config['DATA_FOLDER'], exist_ok=True)
41
+
42
  os.makedirs("data", exist_ok=True)
43
 
44
+ app.config['MODEL_FOLDER'] = MODEL_FOLDER
45
  os.makedirs(app.config['MODEL_FOLDER'], exist_ok=True)
46
 
47
 
 
49
  # Load Models and Label Encoders
50
  # ------------------------------
51
 
52
+ # Prediction analysis models loaded from Hugging Face.
 
53
  file_path_1 = hf_hub_download(
54
  repo_id="WebashalarForML/Diamond_model_",
55
  filename="models_list/mkble/StackingRegressor_best_pipeline_mkble_0_to_1.01.pkl",
56
  cache_dir=MODEL_FOLDER
57
  )
 
58
  with open(file_path_1, "rb") as f:
59
  makable_model = pickle.load(f)
60
 
 
63
  filename="models_list/grd/StackingRegressor_best_pipeline_grd_0_to_1.01.pkl",
64
  cache_dir=MODEL_FOLDER
65
  )
 
66
  with open(file_path_2, "rb") as f:
67
  grade_model = pickle.load(f)
68
 
 
71
  filename="models_list/bygrad/StackingRegressor_best_pipeline_bygrad_0_to_1.01.pkl",
72
  cache_dir=MODEL_FOLDER
73
  )
 
74
  with open(file_path_3, "rb") as f:
75
  bygrade_model = pickle.load(f)
76
 
 
79
  filename="models_list/gia/StackingRegressor_best_pipeline_gia_0_to_1.01.pkl",
80
  cache_dir=MODEL_FOLDER
81
  )
 
82
  with open(file_path_4, "rb") as f:
83
  gia_model = pickle.load(f)
84
 
85
+ # Classification models loaded using joblib.
 
 
 
 
 
86
  col_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_col.joblib'))
87
  cts_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cts.joblib'))
88
  cut_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_cut.joblib'))
 
106
  open_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_open.joblib'))
107
  pav_eng_to_gia_model = load(os.path.join(MODEL_DIR, 'classification_LogisticRegression_gia_pav.joblib'))
108
 
109
+ # List of label encoder names.
110
+ encoder_list = [
111
+ 'Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo',
112
+ 'EngNts', 'EngMikly', 'EngLab','EngBlk', 'EngWht', 'EngOpen','EngPav',
113
+ 'Change_cts_value', 'Change_shape_value', 'Change_quality_value', 'Change_color_value',
114
+ 'Change_cut_value', 'Change_Blk_Eng_to_Mkbl_value', 'Change_Wht_Eng_to_Mkbl_value',
115
+ 'Change_Open_Eng_to_Mkbl_value', 'Change_Pav_Eng_to_Mkbl_value', 'Change_Blk_Eng_to_Grd_value',
116
+ 'Change_Wht_Eng_to_Grd_value', 'Change_Open_Eng_to_Grd_value', 'Change_Pav_Eng_to_Grd_value',
117
+ 'Change_Blk_Eng_to_ByGrd_value', 'Change_Wht_Eng_to_ByGrd_value', 'Change_Open_Eng_to_ByGrd_value',
118
+ 'Change_Pav_Eng_to_ByGrd_value', 'Change_Blk_Eng_to_Gia_value', 'Change_Wht_Eng_to_Gia_value',
119
+ 'Change_Open_Eng_to_Gia_value', 'Change_Pav_Eng_to_Gia_value'
120
+ ]
121
+
122
+ # Load label encoders using pathlib for cleaner path management.
123
  loaded_label_encoder = {}
124
+ enc_path = Path(LABEL_ENCODER_DIR)
125
  for val in encoder_list:
126
+ encoder_file = enc_path / f"label_encoder_{val}.joblib"
127
+ loaded_label_encoder[val] = load(encoder_file)
128
 
129
  # ------------------------------
130
  # Utility: Allowed File Check
 
143
  def predict():
144
  if 'file' not in request.files:
145
  flash('No file part', 'error')
146
+ return redirect(url_for('index'))
147
 
148
  file = request.files['file']
149
  if file.filename == '':
150
  flash('No selected file', 'error')
151
+ return redirect(url_for('index'))
152
 
153
  if file and allowed_file(file.filename):
154
  filename = secure_filename(file.filename)
 
156
  file.save(filepath)
157
 
158
  # Convert file to DataFrame
159
+ try:
160
+ if filename.endswith('.csv'):
161
+ df = pd.read_csv(filepath)
162
+ else:
163
+ df = pd.read_excel(filepath)
164
+ except Exception as e:
165
+ flash(f'Error reading file: {e}', 'error')
166
+ return redirect(url_for('index'))
167
 
168
  # Process the DataFrame and generate predictions and classification analysis.
169
  df_pred, dx_class = process_dataframe(df)
170
  if df_pred.empty:
171
+ flash("Processed prediction DataFrame is empty. Check the input file and processing logic.", "error")
172
+ return redirect(url_for('index'))
 
173
 
174
+ # Save output files with a timestamp and unique id.
175
  current_date = pd.Timestamp.now().strftime("%Y-%m-%d")
176
+ unique_id = uuid.uuid4().hex[:8]
177
  global PRED_OUTPUT_FILE, CLASS_OUTPUT_FILE
178
+ PRED_OUTPUT_FILE = f'data/prediction_output_{current_date}_{unique_id}.csv'
179
+ CLASS_OUTPUT_FILE = f'data/classification_output_{current_date}_{unique_id}.csv'
180
  df_pred.to_csv(PRED_OUTPUT_FILE, index=False)
181
  dx_class.to_csv(CLASS_OUTPUT_FILE, index=False)
182
 
 
184
  return redirect(url_for('report_view', report_type='pred', page=1))
185
  else:
186
  flash('Invalid file type. Only CSV and Excel files are allowed.', 'error')
187
+ return redirect(url_for('index'))
188
 
189
  def process_dataframe(df):
190
  try:
191
+ # Define the columns needed for two parts.
192
  required_columns = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut',
193
  'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngAmt']
194
  required_columns_2 = required_columns + ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']
 
199
 
200
  # Transform categorical columns for prediction DataFrame using the label encoders.
201
  for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']:
202
+ try:
203
+ df_pred[col] = loaded_label_encoder[col].transform(df_pred[col])
204
+ except ValueError as e:
205
+ flash(f'Invalid value in column {col}: {e}', 'error')
206
+ return pd.DataFrame(), pd.DataFrame()
207
 
208
  # Update the classification DataFrame with the transformed prediction columns.
209
  for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']:
 
211
 
212
  # Transform the extra columns in the classification DataFrame.
213
  for col in ['EngBlk', 'EngWht', 'EngOpen', 'EngPav']:
214
+ try:
215
+ df_class[col] = loaded_label_encoder[col].transform(df_class[col])
216
+ except ValueError as e:
217
+ flash(f'Invalid value in column {col}: {e}', 'error')
218
+ return pd.DataFrame(), pd.DataFrame()
219
 
220
+ # Convert both DataFrames to float.
221
  df_pred = df_pred.astype(float)
222
  df_class = df_class.astype(float)
223
 
224
  # -------------------------
225
  # Prediction Report Section
226
  # -------------------------
 
227
  x = df_pred.copy()
228
  df_pred['GIA_Predicted'] = gia_model.predict(x)
229
  df_pred['Grade_Predicted'] = grade_model.predict(x)
 
237
  # -------------------------
238
  # Classification Report Section
239
  # -------------------------
 
240
  x2 = df_class.copy()
241
  dx = df_pred.copy() # Start with the prediction data.
242
  dx['col_change'] = col_model.predict(x)
 
284
  dx['Change_Open_Eng_to_Gia_value'] = loaded_label_encoder['Change_Open_Eng_to_Gia_value'].inverse_transform(dx['Change_Open_Eng_to_Gia_value'])
285
  dx['Change_Pav_Eng_to_Gia_value'] = loaded_label_encoder['Change_Pav_Eng_to_Gia_value'].inverse_transform(dx['Change_Pav_Eng_to_Gia_value'])
286
 
287
+ # Final return with full data for pagination.
288
+ return df_pred, dx.head(len(df_pred))
289
  except Exception as e:
290
  flash(f'Error processing file: {e}', 'error')
291
  return pd.DataFrame(), pd.DataFrame()
 
295
  # ------------------------------
296
  @app.route('/report')
297
  def report_view():
 
298
  report_type = request.args.get('report_type', 'pred')
299
  try:
300
  page = int(request.args.get('page', 1))
301
  except ValueError:
302
  page = 1
303
  per_page = 15 # records per page
304
+
305
  # Read the appropriate CSV file.
306
  if report_type == 'pred':
307
  df = pd.read_csv(PRED_OUTPUT_FILE)
308
  else:
309
  df = pd.read_csv(CLASS_OUTPUT_FILE)
310
+
 
311
  start_idx = (page - 1) * per_page
312
  end_idx = start_idx + per_page
313
  total_records = len(df)
314
 
 
315
  df_page = df.iloc[start_idx:end_idx]
316
  table_html = df_page.to_html(classes="data-table", index=False)
317
 
 
318
  has_prev = page > 1
319
  has_next = end_idx < total_records
320
 
 
326
  has_next=has_next)
327
 
328
  # ------------------------------
329
+ # Download Routes
330
  # ------------------------------
331
  @app.route('/download_pred', methods=['GET'])
332
  def download_pred():