Spaces:
Runtime error
Runtime error
import time | |
import math | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import gradio as gr | |
from datasets import load_dataset | |
from sklearn.datasets import fetch_openml | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import classification_report | |
LOGS_DATASET_URI = 'pgurazada1/machine-failure-mlops-demo-logs' | |
def get_data(): | |
""" | |
Connect to the HuggingFace dataset where the logs are stored. | |
Pull the data into a dataframe | |
""" | |
data = load_dataset(LOGS_DATASET_URI) | |
sample_df = data['train'].to_pandas().sample(100) | |
return sample_df | |
def load_training_data(): | |
dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto") | |
data_df = dataset.data | |
target = 'Machine failure' | |
numeric_features = [ | |
'Air temperature [K]', | |
'Process temperature [K]', | |
'Rotational speed [rpm]', | |
'Torque [Nm]', | |
'Tool wear [min]' | |
] | |
categorical_features = ['Type'] | |
X = data_df[numeric_features + categorical_features] | |
y = data_df[target] | |
Xtrain, Xtest, ytrain, ytest = train_test_split( | |
X, y, | |
test_size=0.2, | |
random_state=42 | |
) | |
return Xtrain, ytrain | |
def check_model_drift(): | |
sample_df = get_data() | |
p_pos_label_training_data = 0.03475 | |
training_data_size = 8000 | |
p_0 = sample_df.prediction.value_counts()[0] | |
p_1 = sample_df.prediction.value_counts()[1] | |
p_pos_label_sample_logs = p_1/(p_0+p_1) | |
variance = (p_pos_label_training_data * (1-p_pos_label_training_data))/training_data_size | |
p_diff = abs(p_pos_label_training_data - p_pos_label_sample_logs) | |
if p_diff > 2 * math.sqrt(variance): | |
print("Model Drift Detected!") | |
else: | |
print("No Model Drift!") | |
with gr.Blocks() as demo: | |
gr.Markdown("# Real-time Monitoring Dashboard") | |
gr.Markdown("Snapshot of live data") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Textbox(check_model_drift, every=5) | |
demo.queue().launch() |