Lrosado commited on
Commit
3cd93ff
·
verified ·
1 Parent(s): 994f4ca

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +78 -0
  2. model.joblib +3 -0
  3. requirements.txt +2 -0
  4. train.py +80 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import joblib
4
+ import json
5
+
6
+ import gradio as gr
7
+ import pandas as pd
8
+
9
+ from huggingface_hub import CommitScheduler
10
+ from pathlib import Path
11
+
12
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
13
+ log_folder = log_file.parent
14
+
15
+ scheduler = CommitScheduler(
16
+ repo_id="machine-failure-logs",
17
+ repo_type="dataset",
18
+ folder_path=log_folder,
19
+ path_in_repo="data",
20
+ every=2
21
+ )
22
+
23
+ machine_failure_predictor = joblib.load('model.joblib')
24
+
25
+ air_temperature_input = gr.Number(label='Air temperature [K]')
26
+ process_temperature_input = gr.Number(label='Process temperature [K]')
27
+ rotational_speed_input = gr.Number(label='Rotational speed [rpm]')
28
+ torque_input = gr.Number(label='Torque [Nm]')
29
+ tool_wear_input = gr.Number(label='Tool wear [min]')
30
+ type_input = gr.Dropdown(
31
+ ['L', 'M', 'H'],
32
+ label='Type'
33
+ )
34
+
35
+ model_output = gr.Label(label="Machine failure")
36
+
37
+ def predict_machine_failure(air_temperature, process_temperature, rotational_speed, torque, tool_wear, type):
38
+ sample = {
39
+ 'Air temperature [K]': air_temperature,
40
+ 'Process temperature [K]': process_temperature,
41
+ 'Rotational speed [rpm]': rotational_speed,
42
+ 'Torque [Nm]': torque,
43
+ 'Tool wear [min]': tool_wear,
44
+ 'Type': type
45
+ }
46
+ data_point = pd.DataFrame([sample])
47
+ prediction = machine_failure_predictor.predict(data_point).tolist()
48
+
49
+ with scheduler.lock:
50
+ with log_file.open("a") as f:
51
+ f.write(json.dumps(
52
+ {
53
+ 'Air temperature [K]': air_temperature,
54
+ 'Process temperature [K]': process_temperature,
55
+ 'Rotational speed [rpm]': rotational_speed,
56
+ 'Torque [Nm]': torque,
57
+ 'Tool wear [min]': tool_wear,
58
+ 'Type': type,
59
+ 'prediction': prediction[0]
60
+ }
61
+ ))
62
+ f.write("\n")
63
+
64
+ return prediction[0]
65
+
66
+ demo = gr.Interface(
67
+ fn=predict_machine_failure,
68
+ inputs=[air_temperature_input, process_temperature_input, rotational_speed_input,
69
+ torque_input, tool_wear_input, type_input],
70
+ outputs=model_output,
71
+ title="Machine Failure Predictor",
72
+ description="This API allows you to predict the machine failure status of an equipment",
73
+ allow_flagging="auto",
74
+ concurrency_limit=8
75
+ )
76
+
77
+ demo.queue()
78
+ demo.launch(share=False)
model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c3c382c7233f0463a9c2698c7190fa6a89f2704433ae79735d9a6a1acfb5529
3
+ size 8439
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ scikit-learn==1.2.2
2
+ numpy==1.26.4
train.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import joblib
3
+ import pandas as pd
4
+
5
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
+ from sklearn.compose import make_column_transformer
7
+ from sklearn.impute import SimpleImputer
8
+ from sklearn.pipeline import Pipeline
9
+ from sklearn.pipeline import make_pipeline
10
+
11
+ from sklearn.model_selection import train_test_split, RandomizedSearchCV
12
+
13
+ from sklearn.linear_model import LogisticRegression
14
+ from sklearn.metrics import accuracy_score, classification_report
15
+
16
+ data_df = pd.read_csv("Bank_Telemarketing.csv")
17
+
18
+ target = 'subscribed'
19
+ numerical_features = ['Age', 'Duration(Sec)', 'CC Contact Freq', 'Days Since PC','PC Contact Freq']
20
+ categorical_features = ['Job', 'Marital Status', 'Education', 'Defaulter', 'Home Loan',
21
+ 'Personal Loan', 'Communication Type', 'Last Contacted', 'Day of Week',
22
+ 'PC Outcome']
23
+
24
+ print("Creating data subsets")
25
+
26
+ X = data_df[numerical_features + categorical_features]
27
+ y = data_df[target]
28
+
29
+ Xtrain, Xtest, ytrain, ytest = train_test_split(
30
+ X, y,
31
+ test_size=0.2,
32
+ random_state=42
33
+ )
34
+
35
+ numerical_pipeline = Pipeline([
36
+ ('imputer', SimpleImputer(strategy='median')),
37
+ ('scaler', StandardScaler())
38
+ ])
39
+
40
+ categorical_pipeline = Pipeline([
41
+ ('imputer', SimpleImputer(strategy='most_frequent')),
42
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
43
+ ])
44
+
45
+ preprocessor = make_column_transformer(
46
+ (numerical_pipeline, numerical_features),
47
+ (categorical_pipeline, categorical_features)
48
+ )
49
+
50
+ model_logistic_regression = LogisticRegression(n_jobs=-1)
51
+
52
+ print("Estimating Best Model Pipeline")
53
+
54
+ model_pipeline = make_pipeline(
55
+ preprocessor,
56
+ model_logistic_regression
57
+ )
58
+
59
+ param_distribution = {
60
+ "logisticregression__C": [0.001, 0.01, 0.1, 0.5, 1, 5, 10]
61
+ }
62
+
63
+ rand_search_cv = RandomizedSearchCV(
64
+ model_pipeline,
65
+ param_distribution,
66
+ n_iter=3,
67
+ cv=3,
68
+ random_state=42
69
+ )
70
+
71
+ rand_search_cv.fit(Xtrain, ytrain)
72
+
73
+ print("Logging Metrics")
74
+ print(f"Accuracy: {rand_search_cv.best_score_}")
75
+
76
+ print("Serializing Model")
77
+
78
+ saved_model_path = "model.joblib"
79
+
80
+ joblib.dump(rand_search_cv.best_estimator_, saved_model_path)