File size: 3,445 Bytes
aa2f7bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2beae27
 
 
aa2f7bf
 
 
 
 
 
 
 
2beae27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa2f7bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.linear_model import LogisticRegression
from sklearn.inspection import DecisionBoundaryDisplay

import gradio as gr

import matplotlib
matplotlib.use('agg')

def create_dataset(num_samples):
    # make 3-class dataset for classification
    centers = [[-5, 0], [0, 1.5], [5, -1]]
    X, y = make_blobs(n_samples=num_samples, centers=centers, random_state=42)
    transformation = [[0.4, 0.2], [-0.4, 1.2]]
    X = np.dot(X, transformation)

    return X, y

def train_plot(multi_class, num_samples):

    X, y = create_dataset(num_samples)
    clf = LogisticRegression(
            solver="sag", max_iter=100, random_state=42, multi_class=multi_class
        ).fit(X, y)
    
    fig, ax = plt.subplots()
    DecisionBoundaryDisplay.from_estimator(
        clf, X, response_method="predict", cmap=plt.cm.Paired, ax=ax
    )
    plt.title("Decision surface of LogisticRegression (%s)" % multi_class)
    plt.axis("tight")

    colors = "bry"
    for i, color in zip(clf.classes_, colors):
        idx = np.where(y == i)
        plt.scatter(
            X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired, edgecolor="black", s=20
        )

    # Plot the three one-against-all classifiers
    xmin, xmax = plt.xlim()
    ymin, ymax = plt.ylim()
    coef = clf.coef_
    intercept = clf.intercept_

    def plot_hyperplane(c, color):
        def line(x0):
            return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]

        plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls="--", color=color)

    for i, color in zip(clf.classes_, colors):
        plot_hyperplane(i, color)

    plt.xlabel("x")
    plt.ylabel("y")

    return fig, clf.score(X, y)

def plot_both(num_samples):
    fig1, score1 = train_plot("multinomial", num_samples)
    fig2, score2 = train_plot("ovr", num_samples)

    return fig1, fig2, score1, score2

title = "Plot multinomial and One-vs-Rest Logistic Regression"
description = """
            The demo shows the difference between multinomial and One-vs-Rest Logistic Regression in a \
            two-dimensional synthetic dataset. 
            
            The dataset is generated around three cluster centers to simulate three different classes. \
            Two different types of logistic regression models are fit to the synthetic data: a multinomial \
            and a one-vs-rest logistic regression. The figures show scatter plots of the data, the decision \
            boundaries of each logistic regresion model and the decision surfaces in different colors per respective class. \
            The hyperplanes corresponding to the three One-vs-Rest (OVR) classifiers are represented by the \
            dashed lines. \
            

            The mean accuracy of the training data and labels for each classifier is given underneath each respective plot. 
            """
with gr.Blocks() as demo:
    gr.Markdown(f"## {title}")
    gr.Markdown(description)

    num_samples = gr.Slider(minimum=500, maximum=2000, step=500, value=500, label="Number of samples")

    with gr.Row():
        plot = gr.Plot()
        plot2 = gr.Plot()
    with gr.Row():
        score1 = gr.Textbox(label="Multinomial score")
        score2 = gr.Textbox(label="OVR score")

    num_samples.change(fn=plot_both, inputs=[num_samples], outputs=[plot, plot2, score1, score2])
    
demo.launch(enable_queue=True)