File size: 2,162 Bytes
d503d85
7ff76a7
d503d85
423c800
d503d85
 
423c800
 
 
d503d85
 
 
423c800
 
 
 
 
 
 
 
 
 
d503d85
423c800
634f358
d503d85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c5aeb8
 
 
 
 
7ff76a7
0c5aeb8
7ff76a7
 
 
d503d85
7ff76a7
da9bd9a
7ff76a7
da9bd9a
7ff76a7
423c800
 
d503d85
 
da9bd9a
d503d85
423c800
 
bbfc212
423c800
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import time 
import math

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gradio as gr

from datasets import load_dataset
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

LOGS_DATASET_URI = 'pgurazada1/machine-failure-mlops-demo-logs'


def get_data():
    """
    Connect to the HuggingFace dataset where the logs are stored.
    Pull the data into a dataframe
    """
    data = load_dataset(LOGS_DATASET_URI)
    sample_df = data['train'].to_pandas().sample(100)

    return sample_df

def load_training_data():
    dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto")
    data_df = dataset.data
    
    target = 'Machine failure'
    numeric_features = [
        'Air temperature [K]',
        'Process temperature [K]',
        'Rotational speed [rpm]',
        'Torque [Nm]',
        'Tool wear [min]'
    ]
    
    categorical_features = ['Type']

    X = data_df[numeric_features + categorical_features]
    y = data_df[target]

    Xtrain, Xtest, ytrain, ytest = train_test_split(
        X, y,
        test_size=0.2,
        random_state=42
    )

    return Xtrain, ytrain

def check_model_drift():
    sample_df = get_data()
    p_pos_label_training_data = 0.03475
    training_data_size = 8000
    
    n_0 = sample_df.prediction.value_counts()[0]
    try:
        n_1 = sample_df.prediction.value_counts()[1]
    except Exception as e:
        n_1 = 0

    p_pos_label_sample_logs = n_1/(n_0+n_1)
    
    variance = (p_pos_label_training_data * (1-p_pos_label_training_data))/training_data_size
    p_diff = abs(p_pos_label_training_data - p_pos_label_sample_logs)
    
    if p_diff > 2 * math.sqrt(variance):
        return "Model Drift Detected! Check logs!"
    else:
        return "No Model Drift!"

    
with gr.Blocks() as demo:
    gr.Markdown("# Real-time Monitoring Dashboard")

    gr.Markdown("Model drift detection (every 5 seconds)")
    
    with gr.Row():
        with gr.Column():
            gr.Textbox(check_model_drift, every=5, label="Model Drift Status")

demo.queue().launch()