Jayabalambika commited on
Commit
6ea325b
·
1 Parent(s): cbfe453

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -104
app.py CHANGED
@@ -58,92 +58,16 @@ def fn_assert_true():
58
 
59
 
60
 
61
- def visualize_input_data():
62
  fig = plt.figure(1, facecolor="w", figsize=(5, 5))
63
- plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion")
64
- plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
65
- plt.vlines(
66
- index_alpha_path_bic,
67
- aic_criterion.min(),
68
- aic_criterion.max(),
69
- color="black",
70
- linestyle="--",
71
- label="Selected alpha",
72
- )
73
- plt.legend()
74
- plt.ylabel("Information criterion")
75
- plt.xlabel("Lasso model sequence")
76
- _ = plt.title("Lasso model selection via AIC and BIC")
77
-
78
-
79
- return fig
80
-
81
- title = "Lasso model selection via information criteria"
82
-
83
- import gradio as gr
84
- import matplotlib.pyplot as plt
85
- # from skops import hub_utils
86
- import time
87
- import pickle
88
- import numpy as np
89
- from sklearn.preprocessing import StandardScaler
90
- from sklearn.linear_model import LassoLarsIC
91
- from sklearn.pipeline import make_pipeline
92
- from sklearn.datasets import load_diabetes
93
-
94
-
95
-
96
- def load_dataset():
97
- X, y = load_diabetes(return_X_y=True, as_frame=True)
98
- return X,y
99
-
100
-
101
- def aic_pipeline(X,y):
102
- lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y)
103
- return lasso_lars_ic
104
-
105
-
106
- def zou_et_al_criterion_rescaling(criterion, n_samples, noise_variance):
107
- """Rescale the information criterion to follow the definition of Zou et al."""
108
- return criterion - n_samples * np.log(2 * np.pi * noise_variance) - n_samples
109
 
110
-
111
- def zou_et_all_aic(lasso_lars_ic):
112
- aic_criterion = zou_et_al_criterion_rescaling(
113
- lasso_lars_ic[-1].criterion_,
114
- n_samples,
115
- lasso_lars_ic[-1].noise_variance_,
116
- )
117
-
118
- index_alpha_path_aic = np.flatnonzero(
119
- lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
120
- )[0]
121
-
122
- return index_alpha_path_aic, aic_criterion
123
-
124
- def zou_et_all_bic(lasso_lars_ic):
125
- lasso_lars_ic.set_params(lassolarsic__criterion="bic").fit(X, y)
126
- bic_criterion = zou_et_al_criterion_rescaling(
127
- lasso_lars_ic[-1].criterion_,
128
- n_samples,
129
- lasso_lars_ic[-1].noise_variance_,
130
- )
131
-
132
- index_alpha_path_bic = np.flatnonzero(
133
- lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
134
- )[0]
135
-
136
- return index_alpha_path_bic, bic_criterion
137
-
138
- def fn_assert_true():
139
- assert index_alpha_path_bic == index_alpha_path_aic
140
-
141
-
142
-
143
- def visualize_input_data():
144
- fig = plt.figure(1, facecolor="w", figsize=(5, 5))
145
- plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion")
146
- plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
147
  plt.vlines(
148
  index_alpha_path_bic,
149
  aic_criterion.min(),
@@ -162,29 +86,24 @@ def visualize_input_data():
162
 
163
  title = " Lasso model selection via information criteria"
164
 
165
- with gr.Blocks(title=title) as demo:
166
  gr.Markdown(f"# {title}")
167
  gr.Markdown(
168
  """
169
- Probabilistic model selection using Information Criterion.
170
  This method in statistics is useful because they dont require a hold out set test set(cross validation set).
171
-
172
  AIC and BIC are two ways of scoring a model based on its log-likelihood and complexity.
173
-
174
- It is important to note that the optimization to find alpha with LassoLarsIC relies on the AIC or BIC criteria
175
- that are computed in-sample, thus on the training set directly.
176
- This approach differs from the cross-validation procedure.
177
-
178
- Also one of the drawbacks of these kinds of Probabilistic model is that same general statistic cannot be used across models.
179
- Instead, a careful metric must be devised for each of the models separately.
180
- The uncertainity of the model is not taken into account.
181
  """
182
 
183
  )
184
 
185
 
186
 
187
- gr.Markdown("See original example [here](https://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_lars_ic.html#sphx-glr-auto-examples-linear-model-plot-lasso-lars-ic-py).")
188
 
189
  ##process
190
  X,y = load_dataset()
@@ -197,14 +116,12 @@ with gr.Blocks(title=title) as demo:
197
  fn_assert_true()
198
 
199
  with gr.Tab("AIC BIC Criteria"):
200
- btn = gr.Button(value="Plot AIC BIC Criteria w Regularization")
201
- btn.click(visualize_input_data, outputs= gr.Plot(label='AIC BIC Criteria') )
202
-
203
-
204
-
205
-
206
-
207
- demo.launch()
208
 
209
 
210
 
 
58
 
59
 
60
 
61
+ def visualize_input_data(choice):
62
  fig = plt.figure(1, facecolor="w", figsize=(5, 5))
63
+ if choice == "AIC criterion":
64
+ plt.plot(aic_criterion, color="tab:blue", marker="x", label="AIC criterion")
65
+ elif choice == "BIC criterion":
66
+ plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
67
+ else:
68
+ plt.plot(aic_criterion, color="tab:blue", marker="*", label="AIC criterion")
69
+ plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  plt.vlines(
72
  index_alpha_path_bic,
73
  aic_criterion.min(),
 
86
 
87
  title = " Lasso model selection via information criteria"
88
 
89
+ with gr.Blocks(title=title,theme=gr.themes.Default(font=[gr.themes.GoogleFont("Oxygen"), "Arial", "sans-serif"])) as demo:
90
  gr.Markdown(f"# {title}")
91
  gr.Markdown(
92
  """
93
+ # Probabilistic model selection using Information Criterion.
94
  This method in statistics is useful because they dont require a hold out set test set(cross validation set).
 
95
  AIC and BIC are two ways of scoring a model based on its log-likelihood and complexity.
96
+ It is important to note that the optimization to find alpha with LassoLarsIC relies on the AIC or BIC criteria that are computed in-sample,
97
+ thus on the training set directly. This approach differs from the cross-validation procedure.
98
+ Also one of the drawbacks of these kinds of Probabilistic model is that same general statistic cannot be used across models.Instead a careful metric must be deviced
99
+ for each of the models seperately.The uncertainity of the model is not taken into account.
 
 
 
 
100
  """
101
 
102
  )
103
 
104
 
105
 
106
+ gr.Markdown(" **https://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_lars_ic.html#sphx-glr-auto-examples-linear-model-plot-lasso-lars-ic-py**")
107
 
108
  ##process
109
  X,y = load_dataset()
 
116
  fn_assert_true()
117
 
118
  with gr.Tab("AIC BIC Criteria"):
119
+ radio = gr.Radio(
120
+ ["AIC criterion", "BIC criterion", "Both"], label="What model selection criteria would you choose?"
121
+ )
122
+ # btn = gr.Button(value="Plot AIC BIC Criteria w Regularization")
123
+ # btn.click(visualize_input_data, outputs= gr.Plot(label='AIC BIC Criteria') )
124
+ radio.change(fn=visualize_input_data, inputs=radio, outputs=gr.Plot(label='AIC BIC Criteria'))
 
 
125
 
126
 
127