pgurazada1's picture
Update app.py
7ff76a7 verified
raw
history blame
2.05 kB
import time
import math
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gradio as gr
from datasets import load_dataset
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
LOGS_DATASET_URI = 'pgurazada1/machine-failure-mlops-demo-logs'
def get_data():
"""
Connect to the HuggingFace dataset where the logs are stored.
Pull the data into a dataframe
"""
data = load_dataset(LOGS_DATASET_URI)
sample_df = data['train'].to_pandas().sample(100)
return sample_df
def load_training_data():
dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto")
data_df = dataset.data
target = 'Machine failure'
numeric_features = [
'Air temperature [K]',
'Process temperature [K]',
'Rotational speed [rpm]',
'Torque [Nm]',
'Tool wear [min]'
]
categorical_features = ['Type']
X = data_df[numeric_features + categorical_features]
y = data_df[target]
Xtrain, Xtest, ytrain, ytest = train_test_split(
X, y,
test_size=0.2,
random_state=42
)
return Xtrain, ytrain
def check_model_drift():
sample_df = get_data()
p_pos_label_training_data = 0.03475
training_data_size = 8000
p_0 = sample_df.prediction.value_counts()[0]
p_1 = sample_df.prediction.value_counts()[1]
p_pos_label_sample_logs = p_1/(p_0+p_1)
variance = (p_pos_label_training_data * (1-p_pos_label_training_data))/training_data_size
p_diff = abs(p_pos_label_training_data - p_pos_label_sample_logs)
if p_diff > 2 * math.sqrt(variance):
print("Model Drift Detected!")
else:
print("No Model Drift!")
with gr.Blocks() as demo:
gr.Markdown("# Real-time Monitoring Dashboard")
gr.Markdown("Snapshot of live data")
with gr.Row():
with gr.Column():
gr.Textbox(check_model_drift, every=5)
demo.queue().launch()