Spaces:

Eemansleepdeprived
/

test

Sleeping

App Files Files Community

test / app.py

Eemansleepdeprived

Update app.py

b7e0259 verified 8 months ago

raw

history blame contribute delete

24.7 kB

	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
	from sklearn.metrics import classification_report, mean_squared_error, precision_recall_curve, roc_curve, auc
	from sklearn.impute import SimpleImputer
	from sklearn.pipeline import Pipeline
	from sklearn.feature_selection import SelectFromModel
	import joblib
	import streamlit as st
	import plotly.express as px
	import plotly.graph_objects as go
	from datetime import datetime, timedelta
	import seaborn as sns
	import matplotlib.pyplot as plt
	import os

	# Set page config
	st.set_page_config(
	page_title="Predictive Maintenance Dashboard",
	page_icon="🔧",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.main {
	padding: 0rem 1rem;
	}
	.stAlert {
	padding: 1rem;
	margin: 1rem 0;
	}
	.metric-card {
	background-color: #f0f2f6;
	padding: 1rem;
	border-radius: 0.5rem;
	}
	</style>
	""", unsafe_allow_html=True)

	def load_and_prepare_data():
	"""
	ETL Pipeline for data preparation
	Returns cleaned and feature-engineered dataset
	"""
	# Load dataset
	data = pd.read_csv('./train.csv')

	# Data Cleaning
	data = data.ffill().bfill()

	# Feature Engineering
	data['Failure'] = data[['TWF', 'HDF', 'PWF', 'OSF', 'RNF']].sum(axis=1) > 0

	# Advanced Feature Engineering
	data['Torque_RollingMean'] = data['Torque [Nm]'].rolling(window=10, min_periods=1).mean()
	data['RPM_Variance'] = data['Rotational speed [rpm]'].rolling(window=10, min_periods=1).var()
	data['Temperature_Difference'] = data['Process temperature [K]'] - data['Air temperature [K]']
	data['Power'] = data['Torque [Nm]'] * data['Rotational speed [rpm]'] / 9550 # Mechanical Power in kW
	data['Temperature_Rate'] = data['Process temperature [K]'].diff().fillna(0)
	data['Wear_Rate'] = data['Tool wear [min]'].diff().fillna(0)
	data['Power_to_Wear_Ratio'] = data['Power'] / (data['Tool wear [min]'] + 1)

	# Simulate maintenance history
	data['Last_Maintenance'] = np.random.randint(0, 1000, size=len(data))
	data['Maintenance_Count'] = np.random.randint(0, 5, size=len(data))

	return data

	@st.cache_data
	def get_failure_patterns(data):
	"""Analyze common patterns leading to failures"""
	failure_data = data[data['Failure'] == 1]
	patterns = {
	'high_temp': failure_data[failure_data['Temperature_Difference'] > failure_data['Temperature_Difference'].mean()].shape[0],
	'high_wear': failure_data[failure_data['Tool wear [min]'] > failure_data['Tool wear [min]'].mean()].shape[0],
	'high_power': failure_data[failure_data['Power'] > failure_data['Power'].mean()].shape[0]
	}
	return patterns

	def create_pipelines(model_params=None):
	"""Create ML pipelines with configurable parameters"""
	if model_params is None:
	model_params = {
	'n_estimators_clf': 200,
	'max_depth_clf': 15,
	'n_estimators_reg': 150,
	'max_depth_reg': 7
	}

	# Use StratifiedKFold for classification
	from sklearn.model_selection import StratifiedKFold
	skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

	clf_pipeline = Pipeline([
	('imputer', SimpleImputer(strategy='mean')),
	('scaler', StandardScaler()),
	('feature_selection', SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42))),
	('classifier', RandomForestClassifier(
	n_estimators=model_params['n_estimators_clf'],
	max_depth=model_params['max_depth_clf'],
	class_weight='balanced',
	random_state=42
	))
	])

	reg_pipeline = Pipeline([
	('imputer', SimpleImputer(strategy='mean')),
	('scaler', StandardScaler()),
	('feature_selection', SelectFromModel(GradientBoostingRegressor(n_estimators=100, random_state=42))),
	('regressor', GradientBoostingRegressor(
	n_estimators=model_params['n_estimators_reg'],
	max_depth=model_params['max_depth_reg'],
	learning_rate=0.1,
	random_state=42
	))
	])

	return clf_pipeline, reg_pipeline

	def calculate_maintenance_metrics(failure_prob, tool_wear, last_maintenance, thresholds):
	"""
	Calculate maintenance recommendations based on predictions and customizable thresholds
	"""
	risk_threshold = thresholds['risk']
	wear_threshold = thresholds['wear']
	maintenance_age_threshold = thresholds['maintenance_age']

	maintenance_due = (
	(failure_prob > risk_threshold) \|
	(tool_wear > wear_threshold) \|
	(last_maintenance > maintenance_age_threshold)
	)

	priority = np.where(
	failure_prob > 0.7, 'High',
	np.where(failure_prob > 0.4, 'Medium', 'Low')
	)

	estimated_days = np.where(
	maintenance_due,
	0,
	np.ceil((wear_threshold - tool_wear) / np.maximum(0.1, tool_wear.mean()))
	)

	next_maintenance = np.where(
	maintenance_due,
	'Immediate',
	np.where(
	estimated_days <= 7,
	'Within 1 week',
	np.where(
	estimated_days <= 30,
	'Within 1 month',
	'No immediate action needed'
	)
	)
	)

	return maintenance_due, priority, next_maintenance, estimated_days

	def create_failure_analysis_plots(data, X_train, y_train, X_test, y_test, predictions):
	"""Create various failure analysis visualizations"""

	# Train the model (assuming a RandomForestClassifier for this example)
	model = RandomForestClassifier(n_estimators=100, max_depth=10)
	model.fit(X_train, y_train) # Train the model with training data

	# Time series of key metrics
	fig1 = go.Figure()
	fig1.add_trace(go.Scatter(
	y=data['Tool wear [min]'],
	name='Tool Wear',
	line=dict(color='blue')
	))
	fig1.add_trace(go.Scatter(
	y=data['Temperature_Difference'],
	name='Temperature Difference',
	line=dict(color='red')
	))
	fig1.add_trace(go.Scatter(
	y=data['Power'],
	name='Power',
	line=dict(color='green')
	))
	fig1.update_layout(title='Key Metrics Over Time', xaxis_title='Observation')

	# Failure probability distribution
	fig2 = px.histogram(
	predictions,
	nbins=50,
	title='Distribution of Failure Probabilities'
	)

	# Get predicted probabilities for the positive class
	y_pred_proba = model.predict_proba(X_test)[:, 1] # Probabilities for the positive class (binary classification)
	y_test_cls = y_test # True class labels

	# ROC Curve
	fpr, tpr, _ = roc_curve(y_test_cls, y_pred_proba)
	roc_auc = auc(fpr, tpr)
	fig3 = go.Figure()
	fig3.add_trace(go.Scatter(
	x=fpr, y=tpr,
	mode='lines',
	name=f'ROC Curve (AUC = {roc_auc:.2f})'
	))
	fig3.plot_bgcolor = 'white'
	fig3.update_layout(
	title='Receiver Operating Characteristic (ROC) Curve',
	xaxis_title='False Positive Rate',
	yaxis_title='True Positive Rate',
	xaxis_range=[0, 1],
	yaxis_range=[0, 1]
	)

	return fig1, fig2, fig3

	def plot_maintenance_calendar(schedule_df):
	"""Create an interactive maintenance calendar view"""
	fig = px.timeline(
	schedule_df,
	x_start='Scheduled_Date',
	x_end='Due_Date',
	y='Equipment_ID',
	color='Priority',
	title='Maintenance Schedule Timeline'
	)
	fig.update_yaxes(autorange="reversed", title="Equipment ID")
	fig.update_xaxes(title="Date")
	return fig

	def sidebar_controls():
	"""Create sidebar controls for user input"""
	st.sidebar.header('Dashboard Controls')

	# Model Parameters
	st.sidebar.subheader('Model Parameters')
	n_estimators_clf = st.sidebar.slider('Number of Trees (Classification)', 50, 300, 200)
	max_depth_clf = st.sidebar.slider('Max Tree Depth (Classification)', 5, 30, 15)
	n_estimators_reg = st.sidebar.slider('Number of Trees (Regression)', 50, 300, 150)
	max_depth_reg = st.sidebar.slider('Max Tree Depth (Regression)', 5, 30, 7)

	# Threshold Settings
	st.sidebar.subheader('Maintenance Thresholds')
	risk_threshold = st.sidebar.slider('Risk Threshold', 0.0, 1.0, 0.3)
	wear_threshold = st.sidebar.slider('Wear Threshold', 100, 300, 200)
	maintenance_age = st.sidebar.slider('Maintenance Age Threshold', 500, 1000, 800)

	# Visualization Settings
	st.sidebar.subheader('Visualization Settings')
	plot_height = st.sidebar.slider('Plot Height', 400, 800, 600)
	color_theme = st.sidebar.selectbox('Color Theme', ['blues', 'reds', 'greens'])

	return {
	'model_params': {
	'n_estimators_clf': n_estimators_clf,
	'max_depth_clf': max_depth_clf,
	'n_estimators_reg': n_estimators_reg,
	'max_depth_reg': max_depth_reg
	},
	'thresholds': {
	'risk': risk_threshold,
	'wear': wear_threshold,
	'maintenance_age': maintenance_age
	},
	'viz_params': {
	'plot_height': plot_height,
	'color_theme': color_theme
	}
	}

	def main():
	st.title("🔧 Advanced Predictive Maintenance Dashboard")

	# Get user input parameters
	params = sidebar_controls()

	# Introduction
	with st.expander("ℹ️ Dashboard Overview", expanded=True):
	st.markdown("""
	This dashboard provides comprehensive predictive maintenance analytics for manufacturing equipment:

	1. Real-time Monitoring: Track equipment health metrics and failure predictions
	2. Maintenance Planning: Get AI-powered maintenance recommendations
	3. Performance Analysis: Analyze historical data and model performance
	4. Interactive Features: Customize thresholds and visualization parameters

	Use the sidebar controls to adjust model parameters and thresholds.
	""")

	# Load and prepare data
	with st.spinner("Loading and preparing data..."):
	data = load_and_prepare_data()

	# Define features
	feature_columns = [
	'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]',
	'Torque [Nm]', 'Tool wear [min]', 'Torque_RollingMean', 'RPM_Variance',
	'Temperature_Difference', 'Power', 'Temperature_Rate', 'Wear_Rate',
	'Power_to_Wear_Ratio'
	]

	X = data[feature_columns]
	y_classification = data['Failure']
	y_regression = data['Tool wear [min]']

	# Load or train models with user parameters
	model_dir = './models'
	os.makedirs(model_dir, exist_ok=True)

	clf_pipeline_file = os.path.join(model_dir, 'clf_pipeline.pkl')
	reg_pipeline_file = os.path.join(model_dir, 'reg_pipeline.pkl')

	if os.path.exists(clf_pipeline_file) and os.path.exists(reg_pipeline_file):
	# Load pre-trained models
	clf_pipeline = joblib.load(clf_pipeline_file)
	reg_pipeline = joblib.load(reg_pipeline_file)


	# Data split for prediction
	X_train, X_test, y_train_cls, y_test_cls = train_test_split(
	X, y_classification, test_size=0.2, random_state=42, stratify=y_classification
	)
	_, _, y_train_reg, y_test_reg = train_test_split(
	X, y_regression, test_size=0.2, random_state=42
	)

	else:
	# Train models with user parameters
	with st.spinner("Training models with selected parameters..."):
	clf_pipeline, reg_pipeline = create_pipelines(params['model_params'])

	# Split data for training
	X_train, X_test, y_train_cls, y_test_cls = train_test_split(
	X, y_classification, test_size=0.2, random_state=42, stratify=y_classification
	)
	_, _, y_train_reg, y_test_reg = train_test_split(
	X, y_regression, test_size=0.2, random_state=42
	)

	# Train models
	clf_pipeline.fit(X_train, y_train_cls)
	reg_pipeline.fit(X_train, y_train_reg)

	# Save models
	joblib.dump(clf_pipeline, clf_pipeline_file)
	joblib.dump(reg_pipeline, reg_pipeline_file)
	st.write("Trained and saved new models to ./models folder.")

	# Make predictions
	y_pred_cls = clf_pipeline.predict(X_test)
	y_pred_proba = clf_pipeline.predict_proba(X_test)[:, 1]
	y_pred_reg = reg_pipeline.predict(X_test)

	# Calculate maintenance recommendations
	maintenance_due, priority, next_maintenance, estimated_days = calculate_maintenance_metrics(
	y_pred_proba,
	y_pred_reg,
	data['Last_Maintenance'].iloc[-len(y_pred_cls):],
	params['thresholds']
	)

	# Dashboard Layout

	# 1. Equipment Health Overview
	st.header("📊 Equipment Health Overview")

	metric_cols = st.columns(4)
	with metric_cols[0]:
	st.metric(
	"Overall Health Index",
	f"{(1 - y_pred_proba.mean()):.1%}",
	delta=f"{-y_pred_proba.mean():.1%}",
	delta_color="inverse"
	)

	with metric_cols[1]:
	st.metric(
	"Average Failure Risk",
	f"{y_pred_proba.mean():.1%}",
	delta=f"{(y_pred_proba.mean() - 0.3):.1%}" if y_pred_proba.mean() > 0.3 else "Normal",
	delta_color="inverse"
	)

	with metric_cols[2]:
	st.metric(
	"Equipment Requiring Maintenance",
	f"{maintenance_due.sum()}",
	delta=f"{maintenance_due.sum() - 10}" if maintenance_due.sum() > 10 else "Within limits"
	)

	with metric_cols[3]:
	st.metric(
	"Average Tool Wear",
	f"{y_pred_reg.mean():.1f} min",
	delta=f"{y_pred_reg.mean() - params['thresholds']['wear']:.1f}"
	)

	# 2. Interactive Analysis Tabs
	tabs = st.tabs([
	"🔍 Real-time Monitoring",
	"📈 Performance Analysis",
	"🔧 Maintenance Planning",
	"📊 Historical Analysis"
	])

	# Tab 1: Real-time Monitoring
	with tabs[0]:
	# Equipment Status Summary
	status_df = pd.DataFrame({
	'Status': ['Healthy', 'Warning', 'Critical'],
	'Count': [
	(y_pred_proba < 0.3).sum(),
	((y_pred_proba >= 0.3) & (y_pred_proba < 0.7)).sum(),
	(y_pred_proba >= 0.7).sum()
	]
	})
	fig = px.pie(
	status_df,
	values='Count',
	names='Status',
	title='Equipment Status Distribution',
	color='Status',
	color_discrete_map={
	'Healthy': 'green',
	'Warning': 'yellow',
	'Critical': 'red'
	}
	)
	st.plotly_chart(fig, use_container_width=True)

	# Real-time Alerts
	if maintenance_due.sum() > 0:
	st.warning(f"⚠️ {maintenance_due.sum()} equipment units require immediate attention!")

	# Interactive Equipment Explorer
	st.subheader("Equipment Explorer")
	selected_metric = st.selectbox(
	"Select Metric to Monitor:",
	options=['Temperature_Difference', 'Tool wear [min]', 'Power', 'Torque [Nm]', 'Rotational speed [rpm]']
	)

	time_window = st.slider(
	"Time Window (last N observations)",
	min_value=10,
	max_value=len(data),
	value=100
	)

	# Plot selected metric
	fig = px.line(
	data.tail(time_window),
	y=selected_metric,
	title=f'{selected_metric} - Last {time_window} Observations'
	)
	fig.add_hline(
	y=data[selected_metric].mean(),
	line_dash="dash",
	annotation_text="Average"
	)
	st.plotly_chart(fig, use_container_width=True)

	# Tab 2: Performance Analysis
	with tabs[1]:
	st.subheader("Model Performance Analysis")

	col1, col2 = st.columns(2)

	with col1:
	# Classification Performance
	st.markdown("### Failure Prediction Performance")
	st.text("Classification Report:")
	st.code(classification_report(y_test_cls, y_pred_cls))

	# Precision-Recall curve
	precision, recall, _ = precision_recall_curve(y_test_cls, y_pred_proba)
	fig = go.Figure()
	fig.add_trace(go.Scatter(
	x=recall, y=precision,
	mode='lines',
	name='Precision-Recall curve',
	fill='tozeroy'
	))
	fig.update_layout(
	title='Precision-Recall Curve',
	xaxis_title='Recall',
	yaxis_title='Precision'
	)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Regression Performance
	st.markdown("### Tool Wear Prediction Performance")
	mse = mean_squared_error(y_test_reg, y_pred_reg)
	rmse = np.sqrt(mse)
	st.metric("Root Mean Squared Error", f"{rmse:.2f}")

	# Feature Importance
	feature_names = feature_columns
	feature_importances = clf_pipeline.named_steps['classifier'].feature_importances_

	# Ensure feature_names and feature_importances are of the same length
	len_features = len(feature_names)
	len_importances = len(feature_importances)

	if len_features > len_importances:
	feature_names = feature_names[:len_importances]
	elif len_importances > len_features:
	feature_importances = feature_importances[:len_features]

	feature_imp = pd.DataFrame({
	'Feature': feature_names,
	'Importance': feature_importances
	}).sort_values('Importance', ascending=True)

	fig = px.bar(
	feature_imp,
	x='Importance',
	y='Feature',
	orientation='h',
	title='Feature Importance Analysis'
	)
	st.plotly_chart(fig, use_container_width=True)

	# Correlation Analysis
	st.subheader("Feature Correlation Analysis")

	# Calculate the correlation matrix
	correlation_matrix = data[feature_columns].corr()

	# Create a heatmap using plotly
	correlation_fig = px.imshow(correlation_matrix,
	text_auto=True,
	color_continuous_scale='Viridis',
	title="Feature Correlation Heatmap")

	# Customize layout for better display
	correlation_fig.update_layout(
	width=800,
	height=600,
	xaxis_title="Features",
	yaxis_title="Features",
	xaxis={'tickangle': 45},
	yaxis={'tickangle': -45}
	)

	# Display the correlation heatmap
	st.plotly_chart(correlation_fig, use_container_width=True)


	# Tab 3: Maintenance Planning
	with tabs[2]:
	st.subheader("Maintenance Schedule and Recommendations")

	# Create maintenance schedule DataFrame
	schedule_df = pd.DataFrame({
	'Equipment_ID': range(1, len(maintenance_due) + 1),
	'Failure_Probability': y_pred_proba,
	'Tool_Wear': y_pred_reg,
	'Priority': priority,
	'Next_Maintenance': next_maintenance,
	'Estimated_Days': estimated_days
	})

	# Add simulated dates
	today = datetime.now()
	schedule_df['Scheduled_Date'] = [
	today + timedelta(days=int(d)) for d in schedule_df['Estimated_Days']
	]
	schedule_df['Due_Date'] = [
	d + timedelta(days=7) for d in schedule_df['Scheduled_Date']
	]

	# Maintenance Calendar
	st.markdown("### 📅 Maintenance Calendar")
	calendar_fig = plot_maintenance_calendar(schedule_df)
	st.plotly_chart(calendar_fig, use_container_width=True)

	# Priority-based maintenance table
	st.markdown("### 🔧 Priority Maintenance Tasks")
	priority_df = schedule_df[schedule_df['Priority'] == 'High'].sort_values(
	'Failure_Probability', ascending=False
	)

	if not priority_df.empty:
	st.dataframe(
	priority_df[['Equipment_ID', 'Failure_Probability', 'Tool_Wear', 'Next_Maintenance']],
	use_container_width=True
	)
	else:
	st.success("No high-priority maintenance tasks at the moment!")

	# Maintenance Cost Analysis
	st.markdown("### 💰 Maintenance Cost Projection")
	est_cost_per_maintenance = st.number_input(
	"Estimated cost per maintenance (USD):",
	value=1000,
	step=100
	)

	total_maintenance = maintenance_due.sum()
	projected_cost = total_maintenance * est_cost_per_maintenance

	cost_col1, cost_col2 = st.columns(2)
	with cost_col1:
	st.metric(
	"Projected Maintenance Cost",
	f"${projected_cost:,.2f}",
	delta=f"${projected_cost - 10000:,.2f}" if projected_cost > 10000 else "Within budget"
	)

	with cost_col2:
	st.metric(
	"Average Cost per Equipment",
	f"${projected_cost/len(maintenance_due):,.2f}"
	)

	# Tab 4: Historical Analysis
	with tabs[3]:
	st.subheader("Historical Performance Analysis")

	# Time series analysis
	st.markdown("### 📈 Historical Trends")
	metric_for_history = st.selectbox(
	"Select metric for historical analysis:",
	options=['Tool wear [min]', 'Temperature_Difference', 'Power', 'Failure']
	)

	fig = go.Figure()
	fig.add_trace(go.Scatter(
	y=data[metric_for_history],
	mode='lines',
	name=metric_for_history
	))

	# Add trend line
	z = np.polyfit(range(len(data)), data[metric_for_history], 1)
	p = np.poly1d(z)
	fig.add_trace(go.Scatter(
	y=p(range(len(data))),
	mode='lines',
	name='Trend',
	line=dict(dash='dash')
	))

	st.plotly_chart(fig, use_container_width=True)

	# Failure patterns analysis
	st.markdown("### 🔍 Failure Patterns")
	patterns = get_failure_patterns(data)

	pattern_cols = st.columns(3)
	for i, (pattern, count) in enumerate(patterns.items()):
	with pattern_cols[i]:
	st.metric(
	f"Failures due to {pattern.replace('_', ' ').title()}",
	count,
	delta=f"{count/len(data['Failure'])*100:.1f}% of total"
	)

	# Footer with additional information
	st.markdown("---")
	st.markdown("""
	### 📝 Notes and Recommendations
	- Adjust thresholds in the sidebar to customize maintenance triggers
	- Regular model retraining is recommended for optimal performance
	- Contact maintenance team for immediate issues
	""")

	# Download section for reports
	if st.button("Generate Maintenance Report"):
	# Create report DataFrame
	report_df = pd.DataFrame({
	'Equipment_ID': range(1, len(maintenance_due) + 1),
	'Failure_Risk': y_pred_proba,
	'Tool_Wear': y_pred_reg,
	'Maintenance_Priority': priority,
	'Next_Maintenance': next_maintenance,
	'Days_Until_Maintenance': estimated_days
	})

	# Convert to CSV
	csv = report_df.to_csv(index=False)
	st.download_button(
	label="Download Maintenance Report",
	data=csv,
	file_name="maintenance_report.csv",
	mime="text/csv"
	)

	if __name__ == "__main__":
	main()