|
import os |
|
import numpy as np |
|
import pandas as pd |
|
from sklearn.metrics import accuracy_score |
|
from sklearn.preprocessing import LabelEncoder |
|
from sklearn.preprocessing import PowerTransformer |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.metrics import f1_score |
|
import argparse |
|
import joblib |
|
|
|
def train(dataset_pth): |
|
df=pd.read_csv(dataset_pth) |
|
features=["Torque(Nm)","Hydraulic_Pressure(bar)","Cutting(kN)","Coolant_Pressure(bar)","Spindle_Speed(RPM)","Coolant_Temperature","Downtime"] |
|
|
|
df=df[features] |
|
df.dropna(inplace=True,ignore_index=True) |
|
X=df.drop("Downtime",axis=1) |
|
y=df["Downtime"] |
|
|
|
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=42,stratify=y) |
|
|
|
transform=PowerTransformer() |
|
X_train=transform.fit_transform(X_train) |
|
X_test=transform.transform(X_test) |
|
|
|
encoder=LabelEncoder() |
|
y_train=encoder.fit_transform(y_train) |
|
y_test=encoder.transform(y_test) |
|
|
|
model=RandomForestClassifier(random_state=42) |
|
model.fit(X_train,y_train) |
|
predict=model.predict(X_test) |
|
|
|
cwd=os.getcwd() |
|
transform_pth=os.path.join(cwd,"app","transform.pkl") |
|
encoder_pth=os.path.join(cwd,"app","encoder.pkl") |
|
model_pth=os.path.join(cwd,"app","model.pkl") |
|
|
|
joblib.dump(transform,transform_pth) |
|
joblib.dump(encoder,encoder_pth) |
|
joblib.dump(model,model_pth) |
|
|
|
return {"Accuracy":accuracy_score(y_test,predict), |
|
"F1_Score":f1_score(y_test,predict)} |
|
|
|
if __name__=="__main__": |
|
parser=argparse.ArgumentParser() |
|
parser.add_argument("--dataset_pth",default="/home/sudhanshu/manufacturing_defect/Manufacturing_Downtime_Dataset.csv") |
|
args=parser.parse_args() |
|
results=train(args.dataset_pth) |
|
|
|
print(f"Accuracy: {results['Accuracy']}\n") |
|
print(f"F1_Score: {results['F1_Score']}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|