Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
6ea325b
1
Parent(s):
cbfe453
Update app.py
Browse files
app.py
CHANGED
@@ -58,92 +58,16 @@ def fn_assert_true():
|
|
58 |
|
59 |
|
60 |
|
61 |
-
def visualize_input_data():
|
62 |
fig = plt.figure(1, facecolor="w", figsize=(5, 5))
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
linestyle="--",
|
71 |
-
label="Selected alpha",
|
72 |
-
)
|
73 |
-
plt.legend()
|
74 |
-
plt.ylabel("Information criterion")
|
75 |
-
plt.xlabel("Lasso model sequence")
|
76 |
-
_ = plt.title("Lasso model selection via AIC and BIC")
|
77 |
-
|
78 |
-
|
79 |
-
return fig
|
80 |
-
|
81 |
-
title = "Lasso model selection via information criteria"
|
82 |
-
|
83 |
-
import gradio as gr
|
84 |
-
import matplotlib.pyplot as plt
|
85 |
-
# from skops import hub_utils
|
86 |
-
import time
|
87 |
-
import pickle
|
88 |
-
import numpy as np
|
89 |
-
from sklearn.preprocessing import StandardScaler
|
90 |
-
from sklearn.linear_model import LassoLarsIC
|
91 |
-
from sklearn.pipeline import make_pipeline
|
92 |
-
from sklearn.datasets import load_diabetes
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
def load_dataset():
|
97 |
-
X, y = load_diabetes(return_X_y=True, as_frame=True)
|
98 |
-
return X,y
|
99 |
-
|
100 |
-
|
101 |
-
def aic_pipeline(X,y):
|
102 |
-
lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y)
|
103 |
-
return lasso_lars_ic
|
104 |
-
|
105 |
-
|
106 |
-
def zou_et_al_criterion_rescaling(criterion, n_samples, noise_variance):
|
107 |
-
"""Rescale the information criterion to follow the definition of Zou et al."""
|
108 |
-
return criterion - n_samples * np.log(2 * np.pi * noise_variance) - n_samples
|
109 |
|
110 |
-
|
111 |
-
def zou_et_all_aic(lasso_lars_ic):
|
112 |
-
aic_criterion = zou_et_al_criterion_rescaling(
|
113 |
-
lasso_lars_ic[-1].criterion_,
|
114 |
-
n_samples,
|
115 |
-
lasso_lars_ic[-1].noise_variance_,
|
116 |
-
)
|
117 |
-
|
118 |
-
index_alpha_path_aic = np.flatnonzero(
|
119 |
-
lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
|
120 |
-
)[0]
|
121 |
-
|
122 |
-
return index_alpha_path_aic, aic_criterion
|
123 |
-
|
124 |
-
def zou_et_all_bic(lasso_lars_ic):
|
125 |
-
lasso_lars_ic.set_params(lassolarsic__criterion="bic").fit(X, y)
|
126 |
-
bic_criterion = zou_et_al_criterion_rescaling(
|
127 |
-
lasso_lars_ic[-1].criterion_,
|
128 |
-
n_samples,
|
129 |
-
lasso_lars_ic[-1].noise_variance_,
|
130 |
-
)
|
131 |
-
|
132 |
-
index_alpha_path_bic = np.flatnonzero(
|
133 |
-
lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
|
134 |
-
)[0]
|
135 |
-
|
136 |
-
return index_alpha_path_bic, bic_criterion
|
137 |
-
|
138 |
-
def fn_assert_true():
|
139 |
-
assert index_alpha_path_bic == index_alpha_path_aic
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
def visualize_input_data():
|
144 |
-
fig = plt.figure(1, facecolor="w", figsize=(5, 5))
|
145 |
-
plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion")
|
146 |
-
plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
|
147 |
plt.vlines(
|
148 |
index_alpha_path_bic,
|
149 |
aic_criterion.min(),
|
@@ -162,29 +86,24 @@ def visualize_input_data():
|
|
162 |
|
163 |
title = " Lasso model selection via information criteria"
|
164 |
|
165 |
-
with gr.Blocks(title=title) as demo:
|
166 |
gr.Markdown(f"# {title}")
|
167 |
gr.Markdown(
|
168 |
"""
|
169 |
-
Probabilistic model selection using Information Criterion.
|
170 |
This method in statistics is useful because they dont require a hold out set test set(cross validation set).
|
171 |
-
|
172 |
AIC and BIC are two ways of scoring a model based on its log-likelihood and complexity.
|
173 |
-
|
174 |
-
|
175 |
-
that
|
176 |
-
|
177 |
-
|
178 |
-
Also one of the drawbacks of these kinds of Probabilistic model is that same general statistic cannot be used across models.
|
179 |
-
Instead, a careful metric must be devised for each of the models separately.
|
180 |
-
The uncertainity of the model is not taken into account.
|
181 |
"""
|
182 |
|
183 |
)
|
184 |
|
185 |
|
186 |
|
187 |
-
gr.Markdown("
|
188 |
|
189 |
##process
|
190 |
X,y = load_dataset()
|
@@ -197,14 +116,12 @@ with gr.Blocks(title=title) as demo:
|
|
197 |
fn_assert_true()
|
198 |
|
199 |
with gr.Tab("AIC BIC Criteria"):
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
demo.launch()
|
208 |
|
209 |
|
210 |
|
|
|
58 |
|
59 |
|
60 |
|
61 |
+
def visualize_input_data(choice):
|
62 |
fig = plt.figure(1, facecolor="w", figsize=(5, 5))
|
63 |
+
if choice == "AIC criterion":
|
64 |
+
plt.plot(aic_criterion, color="tab:blue", marker="x", label="AIC criterion")
|
65 |
+
elif choice == "BIC criterion":
|
66 |
+
plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
|
67 |
+
else:
|
68 |
+
plt.plot(aic_criterion, color="tab:blue", marker="*", label="AIC criterion")
|
69 |
+
plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
plt.vlines(
|
72 |
index_alpha_path_bic,
|
73 |
aic_criterion.min(),
|
|
|
86 |
|
87 |
title = " Lasso model selection via information criteria"
|
88 |
|
89 |
+
with gr.Blocks(title=title,theme=gr.themes.Default(font=[gr.themes.GoogleFont("Oxygen"), "Arial", "sans-serif"])) as demo:
|
90 |
gr.Markdown(f"# {title}")
|
91 |
gr.Markdown(
|
92 |
"""
|
93 |
+
# Probabilistic model selection using Information Criterion.
|
94 |
This method in statistics is useful because they dont require a hold out set test set(cross validation set).
|
|
|
95 |
AIC and BIC are two ways of scoring a model based on its log-likelihood and complexity.
|
96 |
+
It is important to note that the optimization to find alpha with LassoLarsIC relies on the AIC or BIC criteria that are computed in-sample,
|
97 |
+
thus on the training set directly. This approach differs from the cross-validation procedure.
|
98 |
+
Also one of the drawbacks of these kinds of Probabilistic model is that same general statistic cannot be used across models.Instead a careful metric must be deviced
|
99 |
+
for each of the models seperately.The uncertainity of the model is not taken into account.
|
|
|
|
|
|
|
|
|
100 |
"""
|
101 |
|
102 |
)
|
103 |
|
104 |
|
105 |
|
106 |
+
gr.Markdown(" **https://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_lars_ic.html#sphx-glr-auto-examples-linear-model-plot-lasso-lars-ic-py**")
|
107 |
|
108 |
##process
|
109 |
X,y = load_dataset()
|
|
|
116 |
fn_assert_true()
|
117 |
|
118 |
with gr.Tab("AIC BIC Criteria"):
|
119 |
+
radio = gr.Radio(
|
120 |
+
["AIC criterion", "BIC criterion", "Both"], label="What model selection criteria would you choose?"
|
121 |
+
)
|
122 |
+
# btn = gr.Button(value="Plot AIC BIC Criteria w Regularization")
|
123 |
+
# btn.click(visualize_input_data, outputs= gr.Plot(label='AIC BIC Criteria') )
|
124 |
+
radio.change(fn=visualize_input_data, inputs=radio, outputs=gr.Plot(label='AIC BIC Criteria'))
|
|
|
|
|
125 |
|
126 |
|
127 |
|