import gradio as gr import matplotlib.pyplot as plt # from skops import hub_utils import time import pickle import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LassoLarsIC from sklearn.pipeline import make_pipeline from sklearn.datasets import load_diabetes def load_dataset(): X, y = load_diabetes(return_X_y=True, as_frame=True) return X,y def aic_pipeline(X,y): lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y) return lasso_lars_ic def zou_et_al_criterion_rescaling(criterion, n_samples, noise_variance): """Rescale the information criterion to follow the definition of Zou et al.""" return criterion - n_samples * np.log(2 * np.pi * noise_variance) - n_samples def zou_et_all_aic(lasso_lars_ic): aic_criterion = zou_et_al_criterion_rescaling( lasso_lars_ic[-1].criterion_, n_samples, lasso_lars_ic[-1].noise_variance_, ) index_alpha_path_aic = np.flatnonzero( lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_ )[0] return index_alpha_path_aic, aic_criterion def zou_et_all_bic(lasso_lars_ic): lasso_lars_ic.set_params(lassolarsic__criterion="bic").fit(X, y) bic_criterion = zou_et_al_criterion_rescaling( lasso_lars_ic[-1].criterion_, n_samples, lasso_lars_ic[-1].noise_variance_, ) index_alpha_path_bic = np.flatnonzero( lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_ )[0] return index_alpha_path_bic, bic_criterion def fn_assert_true(): assert index_alpha_path_bic == index_alpha_path_aic def visualize_input_data(): fig = plt.figure(1, facecolor="w", figsize=(5, 5)) plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion") plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion") plt.vlines( index_alpha_path_bic, aic_criterion.min(), aic_criterion.max(), color="black", linestyle="--", label="Selected alpha", ) plt.legend() plt.ylabel("Information criterion") plt.xlabel("Lasso model sequence") _ = plt.title("Lasso model selection via AIC and BIC") return fig title = "Lasso model selection via information criteria" import gradio as gr import matplotlib.pyplot as plt # from skops import hub_utils import time import pickle import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LassoLarsIC from sklearn.pipeline import make_pipeline from sklearn.datasets import load_diabetes def load_dataset(): X, y = load_diabetes(return_X_y=True, as_frame=True) return X,y def aic_pipeline(X,y): lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y) return lasso_lars_ic def zou_et_al_criterion_rescaling(criterion, n_samples, noise_variance): """Rescale the information criterion to follow the definition of Zou et al.""" return criterion - n_samples * np.log(2 * np.pi * noise_variance) - n_samples def zou_et_all_aic(lasso_lars_ic): aic_criterion = zou_et_al_criterion_rescaling( lasso_lars_ic[-1].criterion_, n_samples, lasso_lars_ic[-1].noise_variance_, ) index_alpha_path_aic = np.flatnonzero( lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_ )[0] return index_alpha_path_aic, aic_criterion def zou_et_all_bic(lasso_lars_ic): lasso_lars_ic.set_params(lassolarsic__criterion="bic").fit(X, y) bic_criterion = zou_et_al_criterion_rescaling( lasso_lars_ic[-1].criterion_, n_samples, lasso_lars_ic[-1].noise_variance_, ) index_alpha_path_bic = np.flatnonzero( lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_ )[0] return index_alpha_path_bic, bic_criterion def fn_assert_true(): assert index_alpha_path_bic == index_alpha_path_aic def visualize_input_data(): fig = plt.figure(1, facecolor="w", figsize=(5, 5)) plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion") plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion") plt.vlines( index_alpha_path_bic, aic_criterion.min(), aic_criterion.max(), color="black", linestyle="--", label="Selected alpha", ) plt.legend() plt.ylabel("Information criterion") plt.xlabel("Lasso model sequence") _ = plt.title("Lasso model selection via AIC and BIC") return fig title = " Lasso model selection via information criteria" with gr.Blocks(title=title) as demo: gr.Markdown(f"# {title}") gr.Markdown( """ Probabilistic model selection using Information Criterion. This method in statistics is useful because they dont require a hold out set test set(cross validation set). AIC and BIC are two ways of scoring a model based on its log-likelihood and complexity. It is important to note that the optimization to find alpha with LassoLarsIC relies on the AIC or BIC criteria that are computed in-sample, thus on the training set directly. This approach differs from the cross-validation procedure. Also one of the drawbacks of these kinds of Probabilistic model is that same general statistic cannot be used across models. Instead, a careful metric must be devised for each of the models separately. The uncertainity of the model is not taken into account. """ ) gr.Markdown("See original example [here](https://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_lars_ic.html#sphx-glr-auto-examples-linear-model-plot-lasso-lars-ic-py).") ##process X,y = load_dataset() lasso_lars_ic = aic_pipeline(X,y) n_samples = X.shape[0] index_alpha_path_aic, aic_criterion = zou_et_all_aic(lasso_lars_ic) index_alpha_path_bic, bic_criterion = zou_et_all_bic(lasso_lars_ic) fn_assert_true() with gr.Tab("AIC BIC Criteria"): btn = gr.Button(value="Plot AIC BIC Criteria w Regularization") btn.click(visualize_input_data, outputs= gr.Plot(label='AIC BIC Criteria') ) demo.launch() demo.launch()