|
from flask import Flask, render_template, request, redirect, url_for, send_file |
|
import os |
|
import shutil |
|
import pandas as pd |
|
from werkzeug.utils import secure_filename |
|
from joblib import load, dump |
|
import numpy as np |
|
from sklearn.preprocessing import LabelEncoder |
|
from time import time |
|
from huggingface_hub import hf_hub_download |
|
import pickle |
|
import uuid |
|
from pathlib import Path |
|
import numpy as np |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib as mpl |
|
import matplotlib.pyplot as plt |
|
import matplotlib.pylab as pylab |
|
from sklearn.preprocessing import OneHotEncoder, LabelEncoder |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import StandardScaler |
|
from sklearn.decomposition import PCA |
|
from sklearn.pipeline import Pipeline |
|
from sklearn.tree import DecisionTreeRegressor |
|
from sklearn.ensemble import RandomForestRegressor |
|
from sklearn.linear_model import LinearRegression |
|
from xgboost import XGBRegressor |
|
from sklearn.neighbors import KNeighborsRegressor |
|
from sklearn.model_selection import cross_val_score |
|
from sklearn.metrics import mean_squared_error |
|
from sklearn import metrics |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.pipeline import Pipeline |
|
from sklearn.preprocessing import PowerTransformer, StandardScaler |
|
from sklearn.ensemble import RandomForestRegressor |
|
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV |
|
import lightgbm as lgb |
|
from catboost import CatBoostRegressor |
|
from sklearn.ensemble import StackingRegressor |
|
|
|
app = Flask(__name__) |
|
|
|
|
|
app.secret_key = os.urandom(24) |
|
|
|
|
|
UPLOAD_FOLDER = "uploads/" |
|
DATA_FOLDER = "data/" |
|
MODEL_FOLDER = "models/" |
|
|
|
os.makedirs(MODEL_FOLDER, exist_ok=True) |
|
|
|
|
|
MODEL_DIR = r'./Model' |
|
LABEL_ENCODER_DIR = r'./Label_encoders' |
|
|
|
|
|
|
|
PRED_OUTPUT_FILE = None |
|
CLASS_OUTPUT_FILE = None |
|
|
|
ALLOWED_EXTENSIONS = {'csv', 'xlsx'} |
|
|
|
|
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) |
|
|
|
app.config['DATA_FOLDER'] = DATA_FOLDER |
|
os.makedirs(app.config['DATA_FOLDER'], exist_ok=True) |
|
|
|
os.makedirs("data", exist_ok=True) |
|
|
|
app.config['MODEL_FOLDER'] = MODEL_FOLDER |
|
os.makedirs(app.config['MODEL_FOLDER'], exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src_path = hf_hub_download( |
|
repo_id="WebashalarForML/Diamond_model_", |
|
filename="models_list/mkble/DecisionTree_best_pipeline_mkble_0_to_0.99_al.pkl", |
|
cache_dir=MODEL_FOLDER |
|
) |
|
dst_path = os.path.join(MODEL_FOLDER, "DecisionTree_best_pipeline_mkble_0_to_0.99_al.pkl") |
|
shutil.copy(src_path, dst_path) |
|
makable_model_0 = load(dst_path) |
|
|
|
src_path = hf_hub_download( |
|
repo_id="WebashalarForML/Diamond_model_", |
|
filename="models_list/mkble/DecisionTree_best_pipeline_mkble_1_to_1.49.pkl", |
|
cache_dir=MODEL_FOLDER |
|
) |
|
dst_path = os.path.join(MODEL_FOLDER, "DecisionTree_best_pipeline_mkble_1_to_1.49.pkl") |
|
shutil.copy(src_path, dst_path) |
|
makable_model_1 = load(dst_path) |
|
|
|
|
|
src_path = hf_hub_download( |
|
repo_id="WebashalarForML/Diamond_model_", |
|
filename="models_list/mkble/DecisionTree_best_pipeline_mkble_1.50_to_1.99.pkl", |
|
cache_dir=MODEL_FOLDER |
|
) |
|
dst_path = os.path.join(MODEL_FOLDER, "DecisionTree_best_pipeline_mkble_1.50_to_1.99.pkl") |
|
shutil.copy(src_path, dst_path) |
|
makable_model_2 = load(dst_path) |
|
|
|
|
|
src_path = hf_hub_download( |
|
repo_id="WebashalarForML/Diamond_model_", |
|
filename="models_list/classification/3_pipeline.pkl", |
|
cache_dir=MODEL_FOLDER |
|
) |
|
dst_path = os.path.join(MODEL_FOLDER, "3_pipeline.pkl") |
|
shutil.copy(src_path, dst_path) |
|
mkble_amt_class_model = load(dst_path) |
|
|
|
|
|
|
|
|
|
|
|
print("================================") |
|
print("mkble_amt_class_model type:", type(mkble_amt_class_model)) |
|
|
|
|
|
encoder_list = [ |
|
'Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', |
|
'EngNts', 'EngMikly', 'EngLab','EngBlk', 'EngWht', 'EngOpen','EngPav', |
|
'Change_cts_value', 'Change_shape_value', 'Change_quality_value', 'Change_color_value', |
|
'Change_cut_value', 'Change_Blk_Eng_to_Mkbl_value', 'Change_Wht_Eng_to_Mkbl_value', |
|
'Change_Open_Eng_to_Mkbl_value', 'Change_Pav_Eng_to_Mkbl_value', 'Change_Blk_Eng_to_Grd_value', |
|
'Change_Wht_Eng_to_Grd_value', 'Change_Open_Eng_to_Grd_value', 'Change_Pav_Eng_to_Grd_value', |
|
'Change_Blk_Eng_to_ByGrd_value', 'Change_Wht_Eng_to_ByGrd_value', 'Change_Open_Eng_to_ByGrd_value', |
|
'Change_Pav_Eng_to_ByGrd_value', 'Change_Blk_Eng_to_Gia_value', 'Change_Wht_Eng_to_Gia_value', |
|
'Change_Open_Eng_to_Gia_value', 'Change_Pav_Eng_to_Gia_value' |
|
] |
|
|
|
|
|
loaded_label_encoder = {} |
|
enc_path = Path(LABEL_ENCODER_DIR) |
|
for val in encoder_list: |
|
encoder_file = enc_path / f"label_encoder_{val}.joblib" |
|
loaded_label_encoder[val] = load(encoder_file) |
|
|
|
|
|
|
|
|
|
def allowed_file(filename): |
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS |
|
|
|
|
|
|
|
|
|
@app.route('/') |
|
def index(): |
|
return render_template('index.html') |
|
|
|
@app.route('/predict', methods=['POST']) |
|
def predict(): |
|
if 'file' not in request.files: |
|
print('No file part', 'error') |
|
return redirect(url_for('index')) |
|
|
|
file = request.files['file'] |
|
if file.filename == '': |
|
print('No selected file', 'error') |
|
return redirect(url_for('index')) |
|
|
|
if file and allowed_file(file.filename): |
|
filename = secure_filename(file.filename) |
|
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) |
|
file.save(filepath) |
|
|
|
|
|
try: |
|
if filename.endswith('.csv'): |
|
df = pd.read_csv(filepath) |
|
else: |
|
df = pd.read_excel(filepath) |
|
except Exception as e: |
|
print(f'Error reading file: {e}', 'error') |
|
return redirect(url_for('index')) |
|
|
|
|
|
df_pred, dx_class = process_dataframe(df) |
|
if df_pred.empty: |
|
print("Processed prediction DataFrame is empty. Check the input file and processing logic.", "error") |
|
return redirect(url_for('index')) |
|
|
|
|
|
current_date = pd.Timestamp.now().strftime("%Y-%m-%d") |
|
unique_id = uuid.uuid4().hex[:8] |
|
global PRED_OUTPUT_FILE, CLASS_OUTPUT_FILE |
|
PRED_OUTPUT_FILE = f'data/prediction_output_{current_date}_{unique_id}.csv' |
|
CLASS_OUTPUT_FILE = f'data/classification_output_{current_date}_{unique_id}.csv' |
|
df_pred.to_csv(PRED_OUTPUT_FILE, index=False) |
|
dx_class.to_csv(CLASS_OUTPUT_FILE, index=False) |
|
|
|
|
|
return redirect(url_for('report_view', report_type='pred', page=1)) |
|
else: |
|
print('Invalid file type. Only CSV and Excel files are allowed.', 'error') |
|
return redirect(url_for('index')) |
|
|
|
def process_dataframe(df): |
|
try: |
|
|
|
|
|
|
|
required_columns = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', |
|
'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngBlk', 'EngWht', 'EngOpen', |
|
'EngPav', 'EngAmt'] |
|
required_columns_2 = ['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', |
|
'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngAmt'] |
|
|
|
|
|
df_pred = df[required_columns].copy() |
|
|
|
df_pred[['EngBlk', 'EngWht', 'EngOpen', 'EngPav']]=df_pred[['EngBlk', 'EngWht', 'EngOpen', 'EngPav']].fillna("NA") |
|
df_class = df[required_columns_2].fillna("NA").copy() |
|
|
|
|
|
for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly','EngBlk', 'EngWht', 'EngOpen', 'EngPav']: |
|
try: |
|
df_pred[col] = loaded_label_encoder[col].transform(df_pred[col]) |
|
except ValueError as e: |
|
print(f'Invalid value in column {col}: {e}', 'error') |
|
return pd.DataFrame(), pd.DataFrame() |
|
|
|
|
|
for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly']: |
|
df_class[col] = df_pred[col] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_pred = df_pred.astype(float) |
|
df_class = df_class.astype(float) |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
df_pred_0 = df_pred[(df_pred[['EngCts']] > 0.00).all(axis=1) & (df_pred[['EngCts']] < 0.99).all(axis=1)] |
|
df_pred_0['change_in_amt_mkble'] = pd.DataFrame(mkble_amt_class_model.predict(df_pred_0), columns=["pred_change_in_eng_to_mkble"]) |
|
print(df_pred_0.columns) |
|
df_pred_0 = df_pred_0[['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', |
|
'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngBlk', 'EngWht', 'EngOpen', |
|
'EngPav', 'EngAmt', |
|
'change_in_amt_mkble' |
|
]] |
|
df_pred_0['Makable_Predicted'] = pd.DataFrame(np.expm1(makable_model_0.predict(df_pred_0)), columns=["Predicted"]) |
|
print(df_pred_0.columns) |
|
|
|
|
|
|
|
df_pred_1 = df_pred[(df_pred[['EngCts']] > 0.99).all(axis=1) & (df_pred[['EngCts']] < 1.50).all(axis=1)] |
|
df_pred_1['change_in_amt_mkble'] = pd.DataFrame(mkble_amt_class_model.predict(df_pred_1), columns=["pred_change_in_eng_to_mkble"]) |
|
print(df_pred_1.columns) |
|
df_pred_1 = df_pred_1[['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', |
|
'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngBlk', 'EngWht', 'EngOpen', |
|
'EngPav', 'EngAmt', |
|
'change_in_amt_mkble' |
|
]] |
|
df_pred_1['Makable_Predicted'] = pd.DataFrame(np.expm1(makable_model_1.predict(df_pred_1)), columns=["Predicted"]) |
|
print(df_pred_1.columns) |
|
|
|
|
|
|
|
df_pred_2 = df_pred[(df_pred[['EngCts']] > 1.49).all(axis=1) & (df_pred[['EngCts']] < 2.00).all(axis=1)] |
|
df_pred_2['change_in_amt_mkble'] = pd.DataFrame(mkble_amt_class_model.predict(df_pred_2), columns=["pred_change_in_eng_to_mkble"]) |
|
print(df_pred_2.columns) |
|
df_pred_2 = df_pred_2[['Tag', 'EngCts', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', |
|
'EngSym', 'EngFlo', 'EngNts', 'EngMikly', 'EngBlk', 'EngWht', 'EngOpen', |
|
'EngPav', 'EngAmt', |
|
'change_in_amt_mkble' |
|
]] |
|
df_pred_2['Makable_Predicted'] = pd.DataFrame(np.expm1(makable_model_2.predict(df_pred_2)), columns=["Predicted"]) |
|
print(df_pred_2.columns) |
|
|
|
|
|
df_pred_main = pd.concat([df_pred_0, df_pred_1, df_pred_2]) |
|
df_pred_main['Makable_Diff'] = df_pred_main['EngAmt'] - df_pred_main['Makable_Predicted'] |
|
|
|
for col in ['Tag', 'EngShp', 'EngQua', 'EngCol', 'EngCut', 'EngPol', 'EngSym', 'EngFlo', 'EngNts', 'EngMikly','EngBlk', 'EngWht', 'EngOpen', 'EngPav']: |
|
try: |
|
df_pred_main[col] = loaded_label_encoder[col].inverse_transform(df_pred_main[col].astype(int)) |
|
except ValueError as e: |
|
print(f'inverse transform fails value in column {col}: {e}', 'error') |
|
|
|
except ValueError as e: |
|
print(f'pred model error----->: {e}', 'error') |
|
|
|
|
|
return df_pred_main, df_pred_main |
|
except Exception as e: |
|
print(f'Error processing file: {e}', 'error') |
|
return pd.DataFrame(), pd.DataFrame() |
|
|
|
|
|
|
|
|
|
@app.route('/report') |
|
def report_view(): |
|
report_type = request.args.get('report_type', 'pred') |
|
try: |
|
page = int(request.args.get('page', 1)) |
|
except ValueError: |
|
page = 1 |
|
per_page = 15 |
|
|
|
|
|
if report_type == 'pred': |
|
df = pd.read_csv(PRED_OUTPUT_FILE) |
|
else: |
|
df = pd.read_csv(CLASS_OUTPUT_FILE) |
|
|
|
start_idx = (page - 1) * per_page |
|
end_idx = start_idx + per_page |
|
total_records = len(df) |
|
|
|
df_page = df.iloc[start_idx:end_idx] |
|
table_html = df_page.to_html(classes="data-table", index=False) |
|
|
|
has_prev = page > 1 |
|
has_next = end_idx < total_records |
|
|
|
return render_template('output.html', |
|
table_html=table_html, |
|
report_type=report_type, |
|
page=page, |
|
has_prev=has_prev, |
|
has_next=has_next) |
|
|
|
|
|
|
|
|
|
@app.route('/download_pred', methods=['GET']) |
|
def download_pred(): |
|
return send_file(PRED_OUTPUT_FILE, as_attachment=True) |
|
|
|
@app.route('/download_class', methods=['GET']) |
|
def download_class(): |
|
return send_file(CLASS_OUTPUT_FILE, as_attachment=True) |
|
|
|
if __name__ == "__main__": |
|
app.run(debug=True) |