pgurazada1's picture
Update app.py
0c5aeb8 verified
raw
history blame
2.16 kB
import time
import math
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gradio as gr
from datasets import load_dataset
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
LOGS_DATASET_URI = 'pgurazada1/machine-failure-mlops-demo-logs'
def get_data():
"""
Connect to the HuggingFace dataset where the logs are stored.
Pull the data into a dataframe
"""
data = load_dataset(LOGS_DATASET_URI)
sample_df = data['train'].to_pandas().sample(100)
return sample_df
def load_training_data():
dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto")
data_df = dataset.data
target = 'Machine failure'
numeric_features = [
'Air temperature [K]',
'Process temperature [K]',
'Rotational speed [rpm]',
'Torque [Nm]',
'Tool wear [min]'
]
categorical_features = ['Type']
X = data_df[numeric_features + categorical_features]
y = data_df[target]
Xtrain, Xtest, ytrain, ytest = train_test_split(
X, y,
test_size=0.2,
random_state=42
)
return Xtrain, ytrain
def check_model_drift():
sample_df = get_data()
p_pos_label_training_data = 0.03475
training_data_size = 8000
n_0 = sample_df.prediction.value_counts()[0]
try:
n_1 = sample_df.prediction.value_counts()[1]
except Exception as e:
n_1 = 0
p_pos_label_sample_logs = n_1/(n_0+n_1)
variance = (p_pos_label_training_data * (1-p_pos_label_training_data))/training_data_size
p_diff = abs(p_pos_label_training_data - p_pos_label_sample_logs)
if p_diff > 2 * math.sqrt(variance):
return "Model Drift Detected! Check logs!"
else:
return "No Model Drift!"
with gr.Blocks() as demo:
gr.Markdown("# Real-time Monitoring Dashboard")
gr.Markdown("Model drift detection (every 5 seconds)")
with gr.Row():
with gr.Column():
gr.Textbox(check_model_drift, every=5, label="Model Drift Status")
demo.queue().launch()