import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import make_blobs from sklearn.linear_model import LogisticRegression from sklearn.inspection import DecisionBoundaryDisplay import gradio as gr import matplotlib matplotlib.use('agg') def create_dataset(num_samples): # make 3-class dataset for classification centers = [[-5, 0], [0, 1.5], [5, -1]] X, y = make_blobs(n_samples=num_samples, centers=centers, random_state=42) transformation = [[0.4, 0.2], [-0.4, 1.2]] X = np.dot(X, transformation) return X, y def train_plot(multi_class, num_samples): X, y = create_dataset(num_samples) clf = LogisticRegression( solver="sag", max_iter=100, random_state=42, multi_class=multi_class ).fit(X, y) fig, ax = plt.subplots() DecisionBoundaryDisplay.from_estimator( clf, X, response_method="predict", cmap=plt.cm.Paired, ax=ax ) plt.title("Decision surface of LogisticRegression (%s)" % multi_class) plt.axis("tight") colors = "bry" for i, color in zip(clf.classes_, colors): idx = np.where(y == i) plt.scatter( X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired, edgecolor="black", s=20 ) # Plot the three one-against-all classifiers xmin, xmax = plt.xlim() ymin, ymax = plt.ylim() coef = clf.coef_ intercept = clf.intercept_ def plot_hyperplane(c, color): def line(x0): return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1] plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls="--", color=color) for i, color in zip(clf.classes_, colors): plot_hyperplane(i, color) plt.xlabel("x") plt.ylabel("y") return fig, clf.score(X, y) def plot_both(num_samples): fig1, score1 = train_plot("multinomial", num_samples) fig2, score2 = train_plot("ovr", num_samples) return fig1, fig2, score1, score2 title = "Plot multinomial and One-vs-Rest Logistic Regression" description = """ The demo shows the difference between multinomial and One-vs-Rest Logistic Regression in a \ two-dimensional synthetic dataset. The dataset is generated around three cluster centers to simulate three different classes. \ Two different types of logistic regression models are fit to the synthetic data: a multinomial \ and a one-vs-rest logistic regression. The figures show scatter plots of the data, the decision \ boundaries of each logistic regresion model and the decision surfaces in different colors per respective class. \ The hyperplanes corresponding to the three One-vs-Rest (OVR) classifiers are represented by the \ dashed lines. \ The mean accuracy of the training data and labels for each classifier is given underneath each respective plot. """ with gr.Blocks() as demo: gr.Markdown(f"## {title}") gr.Markdown(description) num_samples = gr.Slider(minimum=500, maximum=2000, step=500, value=500, label="Number of samples") with gr.Row(): plot = gr.Plot() plot2 = gr.Plot() with gr.Row(): score1 = gr.Textbox(label="Multinomial score") score2 = gr.Textbox(label="OVR score") num_samples.change(fn=plot_both, inputs=[num_samples], outputs=[plot, plot2, score1, score2]) demo.launch(enable_queue=True)