Eemansleepdeprived commited on
Commit
2eb5587
Β·
verified Β·
1 Parent(s): 760f29b

Upload 6 files

Browse files
Files changed (5) hide show
  1. LICENSE +21 -0
  2. Model.py +697 -0
  3. maintenance_report.csv +0 -0
  4. requirements.txt +10 -0
  5. train.csv +0 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Divija Joshi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Model.py ADDED
@@ -0,0 +1,697 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.preprocessing import StandardScaler
5
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
6
+ from sklearn.metrics import classification_report, mean_squared_error, precision_recall_curve, roc_curve, auc
7
+ from sklearn.impute import SimpleImputer
8
+ from sklearn.pipeline import Pipeline
9
+ from sklearn.feature_selection import SelectFromModel
10
+ import joblib
11
+ import streamlit as st
12
+ import plotly.express as px
13
+ import plotly.graph_objects as go
14
+ from datetime import datetime, timedelta
15
+ import seaborn as sns
16
+ import matplotlib.pyplot as plt
17
+ import os
18
+
19
+ # Set page config
20
+ st.set_page_config(
21
+ page_title="Predictive Maintenance Dashboard",
22
+ page_icon="πŸ”§",
23
+ layout="wide",
24
+ initial_sidebar_state="expanded"
25
+ )
26
+
27
+ # Custom CSS for better styling
28
+ st.markdown("""
29
+ <style>
30
+ .main {
31
+ padding: 0rem 1rem;
32
+ }
33
+ .stAlert {
34
+ padding: 1rem;
35
+ margin: 1rem 0;
36
+ }
37
+ .metric-card {
38
+ background-color: #f0f2f6;
39
+ padding: 1rem;
40
+ border-radius: 0.5rem;
41
+ }
42
+ </style>
43
+ """, unsafe_allow_html=True)
44
+
45
+ def load_and_prepare_data():
46
+ """
47
+ ETL Pipeline for data preparation
48
+ Returns cleaned and feature-engineered dataset
49
+ """
50
+ # Load dataset
51
+ data = pd.read_csv('playground-series-s3e17/train.csv')
52
+
53
+ # Data Cleaning
54
+ data = data.ffill().bfill()
55
+
56
+ # Feature Engineering
57
+ data['Failure'] = data[['TWF', 'HDF', 'PWF', 'OSF', 'RNF']].sum(axis=1) > 0
58
+
59
+ # Advanced Feature Engineering
60
+ data['Torque_RollingMean'] = data['Torque [Nm]'].rolling(window=10, min_periods=1).mean()
61
+ data['RPM_Variance'] = data['Rotational speed [rpm]'].rolling(window=10, min_periods=1).var()
62
+ data['Temperature_Difference'] = data['Process temperature [K]'] - data['Air temperature [K]']
63
+ data['Power'] = data['Torque [Nm]'] * data['Rotational speed [rpm]'] / 9550 # Mechanical Power in kW
64
+ data['Temperature_Rate'] = data['Process temperature [K]'].diff().fillna(0)
65
+ data['Wear_Rate'] = data['Tool wear [min]'].diff().fillna(0)
66
+ data['Power_to_Wear_Ratio'] = data['Power'] / (data['Tool wear [min]'] + 1)
67
+
68
+ # Simulate maintenance history
69
+ data['Last_Maintenance'] = np.random.randint(0, 1000, size=len(data))
70
+ data['Maintenance_Count'] = np.random.randint(0, 5, size=len(data))
71
+
72
+ return data
73
+
74
+ @st.cache_data
75
+ def get_failure_patterns(data):
76
+ """Analyze common patterns leading to failures"""
77
+ failure_data = data[data['Failure'] == 1]
78
+ patterns = {
79
+ 'high_temp': failure_data[failure_data['Temperature_Difference'] > failure_data['Temperature_Difference'].mean()].shape[0],
80
+ 'high_wear': failure_data[failure_data['Tool wear [min]'] > failure_data['Tool wear [min]'].mean()].shape[0],
81
+ 'high_power': failure_data[failure_data['Power'] > failure_data['Power'].mean()].shape[0]
82
+ }
83
+ return patterns
84
+
85
+ def create_pipelines(model_params=None):
86
+ """Create ML pipelines with configurable parameters"""
87
+ if model_params is None:
88
+ model_params = {
89
+ 'n_estimators_clf': 200,
90
+ 'max_depth_clf': 15,
91
+ 'n_estimators_reg': 150,
92
+ 'max_depth_reg': 7
93
+ }
94
+
95
+ # Use StratifiedKFold for classification
96
+ from sklearn.model_selection import StratifiedKFold
97
+ skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
98
+
99
+ clf_pipeline = Pipeline([
100
+ ('imputer', SimpleImputer(strategy='mean')),
101
+ ('scaler', StandardScaler()),
102
+ ('feature_selection', SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42))),
103
+ ('classifier', RandomForestClassifier(
104
+ n_estimators=model_params['n_estimators_clf'],
105
+ max_depth=model_params['max_depth_clf'],
106
+ class_weight='balanced',
107
+ random_state=42
108
+ ))
109
+ ])
110
+
111
+ reg_pipeline = Pipeline([
112
+ ('imputer', SimpleImputer(strategy='mean')),
113
+ ('scaler', StandardScaler()),
114
+ ('feature_selection', SelectFromModel(GradientBoostingRegressor(n_estimators=100, random_state=42))),
115
+ ('regressor', GradientBoostingRegressor(
116
+ n_estimators=model_params['n_estimators_reg'],
117
+ max_depth=model_params['max_depth_reg'],
118
+ learning_rate=0.1,
119
+ random_state=42
120
+ ))
121
+ ])
122
+
123
+ return clf_pipeline, reg_pipeline
124
+
125
+ def calculate_maintenance_metrics(failure_prob, tool_wear, last_maintenance, thresholds):
126
+ """
127
+ Calculate maintenance recommendations based on predictions and customizable thresholds
128
+ """
129
+ risk_threshold = thresholds['risk']
130
+ wear_threshold = thresholds['wear']
131
+ maintenance_age_threshold = thresholds['maintenance_age']
132
+
133
+ maintenance_due = (
134
+ (failure_prob > risk_threshold) |
135
+ (tool_wear > wear_threshold) |
136
+ (last_maintenance > maintenance_age_threshold)
137
+ )
138
+
139
+ priority = np.where(
140
+ failure_prob > 0.7, 'High',
141
+ np.where(failure_prob > 0.4, 'Medium', 'Low')
142
+ )
143
+
144
+ estimated_days = np.where(
145
+ maintenance_due,
146
+ 0,
147
+ np.ceil((wear_threshold - tool_wear) / np.maximum(0.1, tool_wear.mean()))
148
+ )
149
+
150
+ next_maintenance = np.where(
151
+ maintenance_due,
152
+ 'Immediate',
153
+ np.where(
154
+ estimated_days <= 7,
155
+ 'Within 1 week',
156
+ np.where(
157
+ estimated_days <= 30,
158
+ 'Within 1 month',
159
+ 'No immediate action needed'
160
+ )
161
+ )
162
+ )
163
+
164
+ return maintenance_due, priority, next_maintenance, estimated_days
165
+
166
+ def create_failure_analysis_plots(data, X_train, y_train, X_test, y_test, predictions):
167
+ """Create various failure analysis visualizations"""
168
+
169
+ # Train the model (assuming a RandomForestClassifier for this example)
170
+ model = RandomForestClassifier(n_estimators=100, max_depth=10)
171
+ model.fit(X_train, y_train) # Train the model with training data
172
+
173
+ # Time series of key metrics
174
+ fig1 = go.Figure()
175
+ fig1.add_trace(go.Scatter(
176
+ y=data['Tool wear [min]'],
177
+ name='Tool Wear',
178
+ line=dict(color='blue')
179
+ ))
180
+ fig1.add_trace(go.Scatter(
181
+ y=data['Temperature_Difference'],
182
+ name='Temperature Difference',
183
+ line=dict(color='red')
184
+ ))
185
+ fig1.add_trace(go.Scatter(
186
+ y=data['Power'],
187
+ name='Power',
188
+ line=dict(color='green')
189
+ ))
190
+ fig1.update_layout(title='Key Metrics Over Time', xaxis_title='Observation')
191
+
192
+ # Failure probability distribution
193
+ fig2 = px.histogram(
194
+ predictions,
195
+ nbins=50,
196
+ title='Distribution of Failure Probabilities'
197
+ )
198
+
199
+ # Get predicted probabilities for the positive class
200
+ y_pred_proba = model.predict_proba(X_test)[:, 1] # Probabilities for the positive class (binary classification)
201
+ y_test_cls = y_test # True class labels
202
+
203
+ # ROC Curve
204
+ fpr, tpr, _ = roc_curve(y_test_cls, y_pred_proba)
205
+ roc_auc = auc(fpr, tpr)
206
+ fig3 = go.Figure()
207
+ fig3.add_trace(go.Scatter(
208
+ x=fpr, y=tpr,
209
+ mode='lines',
210
+ name=f'ROC Curve (AUC = {roc_auc:.2f})'
211
+ ))
212
+ fig3.plot_bgcolor = 'white'
213
+ fig3.update_layout(
214
+ title='Receiver Operating Characteristic (ROC) Curve',
215
+ xaxis_title='False Positive Rate',
216
+ yaxis_title='True Positive Rate',
217
+ xaxis_range=[0, 1],
218
+ yaxis_range=[0, 1]
219
+ )
220
+
221
+ return fig1, fig2, fig3
222
+
223
+ def plot_maintenance_calendar(schedule_df):
224
+ """Create an interactive maintenance calendar view"""
225
+ fig = px.timeline(
226
+ schedule_df,
227
+ x_start='Scheduled_Date',
228
+ x_end='Due_Date',
229
+ y='Equipment_ID',
230
+ color='Priority',
231
+ title='Maintenance Schedule Timeline'
232
+ )
233
+ fig.update_yaxes(autorange="reversed", title="Equipment ID")
234
+ fig.update_xaxes(title="Date")
235
+ return fig
236
+
237
+ def sidebar_controls():
238
+ """Create sidebar controls for user input"""
239
+ st.sidebar.header('Dashboard Controls')
240
+
241
+ # Model Parameters
242
+ st.sidebar.subheader('Model Parameters')
243
+ n_estimators_clf = st.sidebar.slider('Number of Trees (Classification)', 50, 300, 200)
244
+ max_depth_clf = st.sidebar.slider('Max Tree Depth (Classification)', 5, 30, 15)
245
+ n_estimators_reg = st.sidebar.slider('Number of Trees (Regression)', 50, 300, 150)
246
+ max_depth_reg = st.sidebar.slider('Max Tree Depth (Regression)', 5, 30, 7)
247
+
248
+ # Threshold Settings
249
+ st.sidebar.subheader('Maintenance Thresholds')
250
+ risk_threshold = st.sidebar.slider('Risk Threshold', 0.0, 1.0, 0.3)
251
+ wear_threshold = st.sidebar.slider('Wear Threshold', 100, 300, 200)
252
+ maintenance_age = st.sidebar.slider('Maintenance Age Threshold', 500, 1000, 800)
253
+
254
+ # Visualization Settings
255
+ st.sidebar.subheader('Visualization Settings')
256
+ plot_height = st.sidebar.slider('Plot Height', 400, 800, 600)
257
+ color_theme = st.sidebar.selectbox('Color Theme', ['blues', 'reds', 'greens'])
258
+
259
+ return {
260
+ 'model_params': {
261
+ 'n_estimators_clf': n_estimators_clf,
262
+ 'max_depth_clf': max_depth_clf,
263
+ 'n_estimators_reg': n_estimators_reg,
264
+ 'max_depth_reg': max_depth_reg
265
+ },
266
+ 'thresholds': {
267
+ 'risk': risk_threshold,
268
+ 'wear': wear_threshold,
269
+ 'maintenance_age': maintenance_age
270
+ },
271
+ 'viz_params': {
272
+ 'plot_height': plot_height,
273
+ 'color_theme': color_theme
274
+ }
275
+ }
276
+
277
+ def main():
278
+ st.title("πŸ”§ Advanced Predictive Maintenance Dashboard")
279
+
280
+ # Get user input parameters
281
+ params = sidebar_controls()
282
+
283
+ # Introduction
284
+ with st.expander("ℹ️ Dashboard Overview", expanded=True):
285
+ st.markdown("""
286
+ This dashboard provides comprehensive predictive maintenance analytics for manufacturing equipment:
287
+
288
+ 1. *Real-time Monitoring*: Track equipment health metrics and failure predictions
289
+ 2. *Maintenance Planning*: Get AI-powered maintenance recommendations
290
+ 3. *Performance Analysis*: Analyze historical data and model performance
291
+ 4. *Interactive Features*: Customize thresholds and visualization parameters
292
+
293
+ Use the sidebar controls to adjust model parameters and thresholds.
294
+ """)
295
+
296
+ # Load and prepare data
297
+ with st.spinner("Loading and preparing data..."):
298
+ data = load_and_prepare_data()
299
+
300
+ # Define features
301
+ feature_columns = [
302
+ 'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]',
303
+ 'Torque [Nm]', 'Tool wear [min]', 'Torque_RollingMean', 'RPM_Variance',
304
+ 'Temperature_Difference', 'Power', 'Temperature_Rate', 'Wear_Rate',
305
+ 'Power_to_Wear_Ratio'
306
+ ]
307
+
308
+ X = data[feature_columns]
309
+ y_classification = data['Failure']
310
+ y_regression = data['Tool wear [min]']
311
+
312
+ # Load or train models with user parameters
313
+ model_dir = './models'
314
+ os.makedirs(model_dir, exist_ok=True)
315
+
316
+ clf_pipeline_file = os.path.join(model_dir, 'clf_pipeline.pkl')
317
+ reg_pipeline_file = os.path.join(model_dir, 'reg_pipeline.pkl')
318
+
319
+ if os.path.exists(clf_pipeline_file) and os.path.exists(reg_pipeline_file):
320
+ # Load pre-trained models
321
+ clf_pipeline = joblib.load(clf_pipeline_file)
322
+ reg_pipeline = joblib.load(reg_pipeline_file)
323
+
324
+
325
+ # Data split for prediction
326
+ X_train, X_test, y_train_cls, y_test_cls = train_test_split(
327
+ X, y_classification, test_size=0.2, random_state=42, stratify=y_classification
328
+ )
329
+ _, _, y_train_reg, y_test_reg = train_test_split(
330
+ X, y_regression, test_size=0.2, random_state=42
331
+ )
332
+
333
+ else:
334
+ # Train models with user parameters
335
+ with st.spinner("Training models with selected parameters..."):
336
+ clf_pipeline, reg_pipeline = create_pipelines(params['model_params'])
337
+
338
+ # Split data for training
339
+ X_train, X_test, y_train_cls, y_test_cls = train_test_split(
340
+ X, y_classification, test_size=0.2, random_state=42, stratify=y_classification
341
+ )
342
+ _, _, y_train_reg, y_test_reg = train_test_split(
343
+ X, y_regression, test_size=0.2, random_state=42
344
+ )
345
+
346
+ # Train models
347
+ clf_pipeline.fit(X_train, y_train_cls)
348
+ reg_pipeline.fit(X_train, y_train_reg)
349
+
350
+ # Save models
351
+ joblib.dump(clf_pipeline, clf_pipeline_file)
352
+ joblib.dump(reg_pipeline, reg_pipeline_file)
353
+ st.write("Trained and saved new models to ./models folder.")
354
+
355
+ # Make predictions
356
+ y_pred_cls = clf_pipeline.predict(X_test)
357
+ y_pred_proba = clf_pipeline.predict_proba(X_test)[:, 1]
358
+ y_pred_reg = reg_pipeline.predict(X_test)
359
+
360
+ # Calculate maintenance recommendations
361
+ maintenance_due, priority, next_maintenance, estimated_days = calculate_maintenance_metrics(
362
+ y_pred_proba,
363
+ y_pred_reg,
364
+ data['Last_Maintenance'].iloc[-len(y_pred_cls):],
365
+ params['thresholds']
366
+ )
367
+
368
+ # Dashboard Layout
369
+
370
+ # 1. Equipment Health Overview
371
+ st.header("πŸ“Š Equipment Health Overview")
372
+
373
+ metric_cols = st.columns(4)
374
+ with metric_cols[0]:
375
+ st.metric(
376
+ "Overall Health Index",
377
+ f"{(1 - y_pred_proba.mean()):.1%}",
378
+ delta=f"{-y_pred_proba.mean():.1%}",
379
+ delta_color="inverse"
380
+ )
381
+
382
+ with metric_cols[1]:
383
+ st.metric(
384
+ "Average Failure Risk",
385
+ f"{y_pred_proba.mean():.1%}",
386
+ delta=f"{(y_pred_proba.mean() - 0.3):.1%}" if y_pred_proba.mean() > 0.3 else "Normal",
387
+ delta_color="inverse"
388
+ )
389
+
390
+ with metric_cols[2]:
391
+ st.metric(
392
+ "Equipment Requiring Maintenance",
393
+ f"{maintenance_due.sum()}",
394
+ delta=f"{maintenance_due.sum() - 10}" if maintenance_due.sum() > 10 else "Within limits"
395
+ )
396
+
397
+ with metric_cols[3]:
398
+ st.metric(
399
+ "Average Tool Wear",
400
+ f"{y_pred_reg.mean():.1f} min",
401
+ delta=f"{y_pred_reg.mean() - params['thresholds']['wear']:.1f}"
402
+ )
403
+
404
+ # 2. Interactive Analysis Tabs
405
+ tabs = st.tabs([
406
+ "πŸ” Real-time Monitoring",
407
+ "πŸ“ˆ Performance Analysis",
408
+ "πŸ”§ Maintenance Planning",
409
+ "πŸ“Š Historical Analysis"
410
+ ])
411
+
412
+ # Tab 1: Real-time Monitoring
413
+ with tabs[0]:
414
+ # Equipment Status Summary
415
+ status_df = pd.DataFrame({
416
+ 'Status': ['Healthy', 'Warning', 'Critical'],
417
+ 'Count': [
418
+ (y_pred_proba < 0.3).sum(),
419
+ ((y_pred_proba >= 0.3) & (y_pred_proba < 0.7)).sum(),
420
+ (y_pred_proba >= 0.7).sum()
421
+ ]
422
+ })
423
+ fig = px.pie(
424
+ status_df,
425
+ values='Count',
426
+ names='Status',
427
+ title='Equipment Status Distribution',
428
+ color='Status',
429
+ color_discrete_map={
430
+ 'Healthy': 'green',
431
+ 'Warning': 'yellow',
432
+ 'Critical': 'red'
433
+ }
434
+ )
435
+ st.plotly_chart(fig, use_container_width=True)
436
+
437
+ # Real-time Alerts
438
+ if maintenance_due.sum() > 0:
439
+ st.warning(f"⚠️ {maintenance_due.sum()} equipment units require immediate attention!")
440
+
441
+ # Interactive Equipment Explorer
442
+ st.subheader("Equipment Explorer")
443
+ selected_metric = st.selectbox(
444
+ "Select Metric to Monitor:",
445
+ options=['Temperature_Difference', 'Tool wear [min]', 'Power', 'Torque [Nm]', 'Rotational speed [rpm]']
446
+ )
447
+
448
+ time_window = st.slider(
449
+ "Time Window (last N observations)",
450
+ min_value=10,
451
+ max_value=len(data),
452
+ value=100
453
+ )
454
+
455
+ # Plot selected metric
456
+ fig = px.line(
457
+ data.tail(time_window),
458
+ y=selected_metric,
459
+ title=f'{selected_metric} - Last {time_window} Observations'
460
+ )
461
+ fig.add_hline(
462
+ y=data[selected_metric].mean(),
463
+ line_dash="dash",
464
+ annotation_text="Average"
465
+ )
466
+ st.plotly_chart(fig, use_container_width=True)
467
+
468
+ # Tab 2: Performance Analysis
469
+ with tabs[1]:
470
+ st.subheader("Model Performance Analysis")
471
+
472
+ col1, col2 = st.columns(2)
473
+
474
+ with col1:
475
+ # Classification Performance
476
+ st.markdown("### Failure Prediction Performance")
477
+ st.text("Classification Report:")
478
+ st.code(classification_report(y_test_cls, y_pred_cls))
479
+
480
+ # Precision-Recall curve
481
+ precision, recall, _ = precision_recall_curve(y_test_cls, y_pred_proba)
482
+ fig = go.Figure()
483
+ fig.add_trace(go.Scatter(
484
+ x=recall, y=precision,
485
+ mode='lines',
486
+ name='Precision-Recall curve',
487
+ fill='tozeroy'
488
+ ))
489
+ fig.update_layout(
490
+ title='Precision-Recall Curve',
491
+ xaxis_title='Recall',
492
+ yaxis_title='Precision'
493
+ )
494
+ st.plotly_chart(fig, use_container_width=True)
495
+
496
+ with col2:
497
+ # Regression Performance
498
+ st.markdown("### Tool Wear Prediction Performance")
499
+ mse = mean_squared_error(y_test_reg, y_pred_reg)
500
+ rmse = np.sqrt(mse)
501
+ st.metric("Root Mean Squared Error", f"{rmse:.2f}")
502
+
503
+ # Feature Importance
504
+ feature_names = feature_columns
505
+ feature_importances = clf_pipeline.named_steps['classifier'].feature_importances_
506
+
507
+ # Ensure feature_names and feature_importances are of the same length
508
+ len_features = len(feature_names)
509
+ len_importances = len(feature_importances)
510
+
511
+ if len_features > len_importances:
512
+ feature_names = feature_names[:len_importances]
513
+ elif len_importances > len_features:
514
+ feature_importances = feature_importances[:len_features]
515
+
516
+ feature_imp = pd.DataFrame({
517
+ 'Feature': feature_names,
518
+ 'Importance': feature_importances
519
+ }).sort_values('Importance', ascending=True)
520
+
521
+ fig = px.bar(
522
+ feature_imp,
523
+ x='Importance',
524
+ y='Feature',
525
+ orientation='h',
526
+ title='Feature Importance Analysis'
527
+ )
528
+ st.plotly_chart(fig, use_container_width=True)
529
+
530
+ # Correlation Analysis
531
+ st.subheader("Feature Correlation Analysis")
532
+
533
+ # Calculate the correlation matrix
534
+ correlation_matrix = data[feature_columns].corr()
535
+
536
+ # Create a heatmap using plotly
537
+ correlation_fig = px.imshow(correlation_matrix,
538
+ text_auto=True,
539
+ color_continuous_scale='Viridis',
540
+ title="Feature Correlation Heatmap")
541
+
542
+ # Customize layout for better display
543
+ correlation_fig.update_layout(
544
+ width=800,
545
+ height=600,
546
+ xaxis_title="Features",
547
+ yaxis_title="Features",
548
+ xaxis={'tickangle': 45},
549
+ yaxis={'tickangle': -45}
550
+ )
551
+
552
+ # Display the correlation heatmap
553
+ st.plotly_chart(correlation_fig, use_container_width=True)
554
+
555
+
556
+ # Tab 3: Maintenance Planning
557
+ with tabs[2]:
558
+ st.subheader("Maintenance Schedule and Recommendations")
559
+
560
+ # Create maintenance schedule DataFrame
561
+ schedule_df = pd.DataFrame({
562
+ 'Equipment_ID': range(1, len(maintenance_due) + 1),
563
+ 'Failure_Probability': y_pred_proba,
564
+ 'Tool_Wear': y_pred_reg,
565
+ 'Priority': priority,
566
+ 'Next_Maintenance': next_maintenance,
567
+ 'Estimated_Days': estimated_days
568
+ })
569
+
570
+ # Add simulated dates
571
+ today = datetime.now()
572
+ schedule_df['Scheduled_Date'] = [
573
+ today + timedelta(days=int(d)) for d in schedule_df['Estimated_Days']
574
+ ]
575
+ schedule_df['Due_Date'] = [
576
+ d + timedelta(days=7) for d in schedule_df['Scheduled_Date']
577
+ ]
578
+
579
+ # Maintenance Calendar
580
+ st.markdown("### πŸ“… Maintenance Calendar")
581
+ calendar_fig = plot_maintenance_calendar(schedule_df)
582
+ st.plotly_chart(calendar_fig, use_container_width=True)
583
+
584
+ # Priority-based maintenance table
585
+ st.markdown("### πŸ”§ Priority Maintenance Tasks")
586
+ priority_df = schedule_df[schedule_df['Priority'] == 'High'].sort_values(
587
+ 'Failure_Probability', ascending=False
588
+ )
589
+
590
+ if not priority_df.empty:
591
+ st.dataframe(
592
+ priority_df[['Equipment_ID', 'Failure_Probability', 'Tool_Wear', 'Next_Maintenance']],
593
+ use_container_width=True
594
+ )
595
+ else:
596
+ st.success("No high-priority maintenance tasks at the moment!")
597
+
598
+ # Maintenance Cost Analysis
599
+ st.markdown("### πŸ’° Maintenance Cost Projection")
600
+ est_cost_per_maintenance = st.number_input(
601
+ "Estimated cost per maintenance (USD):",
602
+ value=1000,
603
+ step=100
604
+ )
605
+
606
+ total_maintenance = maintenance_due.sum()
607
+ projected_cost = total_maintenance * est_cost_per_maintenance
608
+
609
+ cost_col1, cost_col2 = st.columns(2)
610
+ with cost_col1:
611
+ st.metric(
612
+ "Projected Maintenance Cost",
613
+ f"${projected_cost:,.2f}",
614
+ delta=f"${projected_cost - 10000:,.2f}" if projected_cost > 10000 else "Within budget"
615
+ )
616
+
617
+ with cost_col2:
618
+ st.metric(
619
+ "Average Cost per Equipment",
620
+ f"${projected_cost/len(maintenance_due):,.2f}"
621
+ )
622
+
623
+ # Tab 4: Historical Analysis
624
+ with tabs[3]:
625
+ st.subheader("Historical Performance Analysis")
626
+
627
+ # Time series analysis
628
+ st.markdown("### πŸ“ˆ Historical Trends")
629
+ metric_for_history = st.selectbox(
630
+ "Select metric for historical analysis:",
631
+ options=['Tool wear [min]', 'Temperature_Difference', 'Power', 'Failure']
632
+ )
633
+
634
+ fig = go.Figure()
635
+ fig.add_trace(go.Scatter(
636
+ y=data[metric_for_history],
637
+ mode='lines',
638
+ name=metric_for_history
639
+ ))
640
+
641
+ # Add trend line
642
+ z = np.polyfit(range(len(data)), data[metric_for_history], 1)
643
+ p = np.poly1d(z)
644
+ fig.add_trace(go.Scatter(
645
+ y=p(range(len(data))),
646
+ mode='lines',
647
+ name='Trend',
648
+ line=dict(dash='dash')
649
+ ))
650
+
651
+ st.plotly_chart(fig, use_container_width=True)
652
+
653
+ # Failure patterns analysis
654
+ st.markdown("### πŸ” Failure Patterns")
655
+ patterns = get_failure_patterns(data)
656
+
657
+ pattern_cols = st.columns(3)
658
+ for i, (pattern, count) in enumerate(patterns.items()):
659
+ with pattern_cols[i]:
660
+ st.metric(
661
+ f"Failures due to {pattern.replace('_', ' ').title()}",
662
+ count,
663
+ delta=f"{count/len(data['Failure'])*100:.1f}% of total"
664
+ )
665
+
666
+ # Footer with additional information
667
+ st.markdown("---")
668
+ st.markdown("""
669
+ ### πŸ“ Notes and Recommendations
670
+ - Adjust thresholds in the sidebar to customize maintenance triggers
671
+ - Regular model retraining is recommended for optimal performance
672
+ - Contact maintenance team for immediate issues
673
+ """)
674
+
675
+ # Download section for reports
676
+ if st.button("Generate Maintenance Report"):
677
+ # Create report DataFrame
678
+ report_df = pd.DataFrame({
679
+ 'Equipment_ID': range(1, len(maintenance_due) + 1),
680
+ 'Failure_Risk': y_pred_proba,
681
+ 'Tool_Wear': y_pred_reg,
682
+ 'Maintenance_Priority': priority,
683
+ 'Next_Maintenance': next_maintenance,
684
+ 'Days_Until_Maintenance': estimated_days
685
+ })
686
+
687
+ # Convert to CSV
688
+ csv = report_df.to_csv(index=False)
689
+ st.download_button(
690
+ label="Download Maintenance Report",
691
+ data=csv,
692
+ file_name="maintenance_report.csv",
693
+ mime="text/csv"
694
+ )
695
+
696
+ if __name__ == "__main__":
697
+ main()
maintenance_report.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ imbalanced_learn==0.12.4
2
+ joblib==1.3.2
3
+ matplotlib==3.7.2
4
+ numpy==1.24.3
5
+ pandas==1.5.3
6
+ plotly==5.24.1
7
+ scikit_learn==1.5.2
8
+ seaborn==0.13.2
9
+ shap==0.46.0
10
+ streamlit==1.37.0
train.csv ADDED
The diff for this file is too large to render. See raw diff