import time import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import gradio as gr from datasets import load_dataset from sklearn.datasets import fetch_openml from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report LOGS_DATASET_URI = 'pgurazada1/machine-failure-mlops-demo-logs' def get_data(): """ Connect to the HuggingFace dataset where the logs are stored. Pull the data into a dataframe """ data = load_dataset(LOGS_DATASET_URI) sample_df = data['train'].to_pandas().sample(100) return sample_df def load_training_data(): dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto") data_df = dataset.data target = 'Machine failure' numeric_features = [ 'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]' ] categorical_features = ['Type'] X = data_df[numeric_features + categorical_features] y = data_df[target] Xtrain, Xtest, ytrain, ytest = train_test_split( X, y, test_size=0.2, random_state=42 ) return Xtrain, ytrain def check_model_drift(): sample_df = get_data() p_pos_label_training_data = 0.03475 training_data_size = 8000 p_pos_label_sample_logs = sample_df.prediction.value_counts() return p_pos_label_sample_logs with gr.Blocks() as demo: gr.Markdown("# Real-time Monitoring Dashboard") gr.Markdown("Snapshot of live data") with gr.Row(): with gr.Column(): gr.DataFrame(get_data, every=5) with gr.Column(): gr.TextBox(f"Data refreshed at {time.time()}", every=5) demo.queue().launch()