import time import math import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import gradio as gr from datasets import load_dataset from sklearn.datasets import fetch_openml from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report LOGS_DATASET_URI = 'pgurazada1/machine-failure-mlops-demo-logs' def get_data(): """ Connect to the HuggingFace dataset where the logs are stored. Pull the data into a dataframe """ data = load_dataset(LOGS_DATASET_URI) sample_df = data['train'].to_pandas().sample(100) return sample_df def load_training_data(): dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto") data_df = dataset.data target = 'Machine failure' numeric_features = [ 'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]' ] categorical_features = ['Type'] X = data_df[numeric_features + categorical_features] y = data_df[target] Xtrain, Xtest, ytrain, ytest = train_test_split( X, y, test_size=0.2, random_state=42 ) return Xtrain, ytrain def check_model_drift(): sample_df = get_data() p_pos_label_training_data = 0.03475 training_data_size = 8000 p_0 = sample_df.prediction.value_counts()[0] p_1 = sample_df.prediction.value_counts()[1] p_pos_label_sample_logs = p_1/(p_0+p_1) variance = (p_pos_label_training_data * (1-p_pos_label_training_data))/training_data_size p_diff = abs(p_pos_label_training_data - p_pos_label_sample_logs) if p_diff > 2 * math.sqrt(variance): return "Model Drift Detected! Check logs!" else: return "No Model Drift!" with gr.Blocks() as demo: gr.Markdown("# Real-time Monitoring Dashboard") gr.Markdown("Model drift detection (every 5 seconds)") with gr.Row(): with gr.Column(): gr.Textbox(check_model_drift, every=5, label="Model Drift Status") demo.queue().launch()