|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from sklearn.datasets import make_blobs |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.inspection import DecisionBoundaryDisplay |
|
|
|
import gradio as gr |
|
|
|
import matplotlib |
|
matplotlib.use('agg') |
|
|
|
def create_dataset(num_samples): |
|
|
|
centers = [[-5, 0], [0, 1.5], [5, -1]] |
|
X, y = make_blobs(n_samples=num_samples, centers=centers, random_state=42) |
|
transformation = [[0.4, 0.2], [-0.4, 1.2]] |
|
X = np.dot(X, transformation) |
|
|
|
return X, y |
|
|
|
def train_plot(multi_class, num_samples): |
|
|
|
X, y = create_dataset(num_samples) |
|
clf = LogisticRegression( |
|
solver="sag", max_iter=100, random_state=42, multi_class=multi_class |
|
).fit(X, y) |
|
|
|
fig, ax = plt.subplots() |
|
DecisionBoundaryDisplay.from_estimator( |
|
clf, X, response_method="predict", cmap=plt.cm.Paired, ax=ax |
|
) |
|
plt.title("Decision surface of LogisticRegression (%s)" % multi_class) |
|
plt.axis("tight") |
|
|
|
colors = "bry" |
|
for i, color in zip(clf.classes_, colors): |
|
idx = np.where(y == i) |
|
plt.scatter( |
|
X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired, edgecolor="black", s=20 |
|
) |
|
|
|
|
|
xmin, xmax = plt.xlim() |
|
ymin, ymax = plt.ylim() |
|
coef = clf.coef_ |
|
intercept = clf.intercept_ |
|
|
|
def plot_hyperplane(c, color): |
|
def line(x0): |
|
return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1] |
|
|
|
plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls="--", color=color) |
|
|
|
for i, color in zip(clf.classes_, colors): |
|
plot_hyperplane(i, color) |
|
|
|
plt.xlabel("x") |
|
plt.ylabel("y") |
|
|
|
return fig, clf.score(X, y) |
|
|
|
def plot_both(num_samples): |
|
fig1, score1 = train_plot("multinomial", num_samples) |
|
fig2, score2 = train_plot("ovr", num_samples) |
|
|
|
return fig1, fig2, score1, score2 |
|
|
|
title = "Plot multinomial and One-vs-Rest Logistic Regression" |
|
description = """ |
|
The demo shows the difference between multinomial and One-vs-Rest Logistic Regression in a \ |
|
two-dimensional synthetic dataset. |
|
|
|
The dataset is generated around three cluster centers to simulate three different classes. \ |
|
Two different types of logistic regression models are fit to the synthetic data: a multinomial \ |
|
and a one-vs-rest logistic regression. The figures show scatter plots of the data, the decision \ |
|
boundaries of each logistic regresion model and the decision surfaces in different colors per respective class. \ |
|
The hyperplanes corresponding to the three One-vs-Rest (OVR) classifiers are represented by the \ |
|
dashed lines. \ |
|
|
|
|
|
The mean accuracy of the training data and labels for each classifier is given underneath each respective plot. |
|
""" |
|
with gr.Blocks() as demo: |
|
gr.Markdown(f"## {title}") |
|
gr.Markdown(description) |
|
|
|
num_samples = gr.Slider(minimum=500, maximum=2000, step=500, value=500, label="Number of samples") |
|
|
|
with gr.Row(): |
|
plot = gr.Plot() |
|
plot2 = gr.Plot() |
|
with gr.Row(): |
|
score1 = gr.Textbox(label="Multinomial score") |
|
score2 = gr.Textbox(label="OVR score") |
|
|
|
num_samples.change(fn=plot_both, inputs=[num_samples], outputs=[plot, plot2, score1, score2]) |
|
|
|
demo.launch(enable_queue=True) |
|
|
|
|