jy395 commited on
Commit
40ca8d4
·
verified ·
1 Parent(s): 5736d58

Create train.py

Browse files
Files changed (1) hide show
  1. train.py +70 -0
train.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import skops.io as sio
3
+ from sklearn.compose import ColumnTransformer
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ from sklearn.impute import SimpleImputer
6
+ from sklearn.metrics import accuracy_score, f1_score
7
+ from sklearn.pipeline import Pipeline
8
+ from sklearn.preprocessing import OrdinalEncoder, StandardScaler
9
+
10
+ ## Loading the Data
11
+ drug_df = pd.read_csv("Data/drug.csv")
12
+ drug_df = drug_df.sample(frac=1)
13
+
14
+ ## Train Test Split
15
+ from sklearn.model_selection import train_test_split
16
+
17
+ X = drug_df.drop("Drug", axis=1).values
18
+ y = drug_df.Drug.values
19
+
20
+ X_train, X_test, y_train, y_test = train_test_split(
21
+ X, y, test_size=0.3, random_state=125
22
+ )
23
+
24
+
25
+ ## Pipeline
26
+ cat_col = [1,2,3]
27
+ num_col = [0,4]
28
+
29
+ transform = ColumnTransformer(
30
+ [
31
+ ("encoder", OrdinalEncoder(), cat_col),
32
+ ("num_imputer", SimpleImputer(strategy="median"), num_col),
33
+ ("num_scaler", StandardScaler(), num_col),
34
+ ]
35
+ )
36
+ pipe = Pipeline(
37
+ steps=[
38
+ ("preprocessing", transform),
39
+ ("model", RandomForestClassifier(n_estimators=10, random_state=125)),
40
+ ]
41
+ )
42
+
43
+ ## Training
44
+ pipe.fit(X_train, y_train)
45
+
46
+
47
+ ## Model Evaluation
48
+ predictions = pipe.predict(X_test)
49
+ accuracy = accuracy_score(y_test, predictions)
50
+ f1 = f1_score(y_test, predictions, average="macro")
51
+
52
+ print("Accuracy:", str(round(accuracy, 2) * 100) + "%", "F1:", round(f1, 2))
53
+
54
+
55
+ ## Confusion Matrix Plot
56
+ import matplotlib.pyplot as plt
57
+ from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
58
+
59
+ predictions = pipe.predict(X_test)
60
+ cm = confusion_matrix(y_test, predictions, labels=pipe.classes_)
61
+ disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=pipe.classes_)
62
+ disp.plot()
63
+ plt.savefig("./Results/model_results.png", dpi=120)
64
+
65
+ ## Write metrics to file
66
+ with open("./Results/metrics.txt", "w") as outfile:
67
+ outfile.write(f"\nAccuracy = {round(accuracy, 2)}, F1 Score = {round(f1, 2)}")
68
+
69
+ ## Saving the model file
70
+ sio.dump(pipe, "./Model/drug_pipeline.skops")