marik0's picture
Add details to description and plot
2beae27
raw
history blame
3.45 kB
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.linear_model import LogisticRegression
from sklearn.inspection import DecisionBoundaryDisplay
import gradio as gr
import matplotlib
matplotlib.use('agg')
def create_dataset(num_samples):
# make 3-class dataset for classification
centers = [[-5, 0], [0, 1.5], [5, -1]]
X, y = make_blobs(n_samples=num_samples, centers=centers, random_state=42)
transformation = [[0.4, 0.2], [-0.4, 1.2]]
X = np.dot(X, transformation)
return X, y
def train_plot(multi_class, num_samples):
X, y = create_dataset(num_samples)
clf = LogisticRegression(
solver="sag", max_iter=100, random_state=42, multi_class=multi_class
).fit(X, y)
fig, ax = plt.subplots()
DecisionBoundaryDisplay.from_estimator(
clf, X, response_method="predict", cmap=plt.cm.Paired, ax=ax
)
plt.title("Decision surface of LogisticRegression (%s)" % multi_class)
plt.axis("tight")
colors = "bry"
for i, color in zip(clf.classes_, colors):
idx = np.where(y == i)
plt.scatter(
X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired, edgecolor="black", s=20
)
# Plot the three one-against-all classifiers
xmin, xmax = plt.xlim()
ymin, ymax = plt.ylim()
coef = clf.coef_
intercept = clf.intercept_
def plot_hyperplane(c, color):
def line(x0):
return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]
plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls="--", color=color)
for i, color in zip(clf.classes_, colors):
plot_hyperplane(i, color)
plt.xlabel("x")
plt.ylabel("y")
return fig, clf.score(X, y)
def plot_both(num_samples):
fig1, score1 = train_plot("multinomial", num_samples)
fig2, score2 = train_plot("ovr", num_samples)
return fig1, fig2, score1, score2
title = "Plot multinomial and One-vs-Rest Logistic Regression"
description = """
The demo shows the difference between multinomial and One-vs-Rest Logistic Regression in a \
two-dimensional synthetic dataset.
The dataset is generated around three cluster centers to simulate three different classes. \
Two different types of logistic regression models are fit to the synthetic data: a multinomial \
and a one-vs-rest logistic regression. The figures show scatter plots of the data, the decision \
boundaries of each logistic regresion model and the decision surfaces in different colors per respective class. \
The hyperplanes corresponding to the three One-vs-Rest (OVR) classifiers are represented by the \
dashed lines. \
The mean accuracy of the training data and labels for each classifier is given underneath each respective plot.
"""
with gr.Blocks() as demo:
gr.Markdown(f"## {title}")
gr.Markdown(description)
num_samples = gr.Slider(minimum=500, maximum=2000, step=500, value=500, label="Number of samples")
with gr.Row():
plot = gr.Plot()
plot2 = gr.Plot()
with gr.Row():
score1 = gr.Textbox(label="Multinomial score")
score2 = gr.Textbox(label="OVR score")
num_samples.change(fn=plot_both, inputs=[num_samples], outputs=[plot, plot2, score1, score2])
demo.launch(enable_queue=True)