import os import numpy as np import pandas as pd from sklearn.metrics import accuracy_score from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import PowerTransformer from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import f1_score import argparse import joblib def train(dataset): df=dataset.copy() features=["Torque(Nm)","Hydraulic_Pressure(bar)","Cutting(kN)","Coolant_Pressure(bar)","Spindle_Speed(RPM)","Coolant_Temperature","Downtime"] df=df[features] df.dropna(inplace=True,ignore_index=True) X=df.drop("Downtime",axis=1) y=df["Downtime"] X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=42,stratify=y) transform=PowerTransformer() X_train=transform.fit_transform(X_train) X_test=transform.transform(X_test) encoder=LabelEncoder() y_train=encoder.fit_transform(y_train) y_test=encoder.transform(y_test) model=RandomForestClassifier(random_state=42) model.fit(X_train,y_train) predict=model.predict(X_test) return {"model":model, "encoder":encoder, "transform":transform, "Accuracy":f"{accuracy_score(y_test,predict):4f}", "F1_Score":f"{f1_score(y_test,predict):4f}"}