Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,9 @@ from catboost import CatBoostRegressor
|
|
17 |
import warnings
|
18 |
warnings.filterwarnings('ignore')
|
19 |
|
20 |
-
|
|
|
|
|
21 |
county_embeddings = pd.read_csv(embeddings_file_path).set_index('place')
|
22 |
numeric_cols = county_embeddings.select_dtypes(include=['number']).columns
|
23 |
county_embeddings_numeric = county_embeddings[numeric_cols]
|
@@ -28,7 +30,8 @@ def load_embeddings(embeddings_file_path):
|
|
28 |
county_embeddings_pca = pca.transform(county_embeddings_imputed)
|
29 |
return county_embeddings, county_embeddings_pca, pca, imputer
|
30 |
|
31 |
-
def load_unemployment_data(
|
|
|
32 |
unemployment_data = pd.read_csv(unemployment_file_path).set_index('place')
|
33 |
unemployment_long = unemployment_data.reset_index().melt(id_vars='place', var_name='date', value_name='unemployment_rate')
|
34 |
return unemployment_long
|
@@ -65,15 +68,18 @@ def preprocess_data(county_embeddings, county_embeddings_pca, unemployment_long,
|
|
65 |
|
66 |
return X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train
|
67 |
|
68 |
-
def train_and_evaluate_models(X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train):
|
69 |
# Define models
|
70 |
-
|
71 |
"Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
|
72 |
"XGBoost": XGBRegressor(n_estimators=100, random_state=42, tree_method='gpu_hist'),
|
73 |
"Ridge Regression": Ridge(alpha=1.0),
|
74 |
"CatBoost": CatBoostRegressor(iterations=100, random_seed=42, task_type="GPU")
|
75 |
}
|
76 |
|
|
|
|
|
|
|
77 |
results = {}
|
78 |
feature_importances = {}
|
79 |
|
@@ -123,10 +129,10 @@ def plot_metrics(results):
|
|
123 |
|
124 |
return rmse_plot, r2_plot
|
125 |
|
126 |
-
def main(
|
127 |
# Load data
|
128 |
-
county_embeddings, county_embeddings_pca, pca, imputer = load_embeddings(
|
129 |
-
unemployment_long = load_unemployment_data(
|
130 |
|
131 |
# Preprocess data
|
132 |
X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train = preprocess_data(
|
@@ -135,7 +141,7 @@ def main(embeddings_file_path, unemployment_file_path):
|
|
135 |
|
136 |
# Train and evaluate models
|
137 |
results, feature_importances, feature_names = train_and_evaluate_models(
|
138 |
-
X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train
|
139 |
)
|
140 |
|
141 |
# Plot metrics
|
@@ -151,41 +157,44 @@ def main(embeddings_file_path, unemployment_file_path):
|
|
151 |
|
152 |
def gradio_app():
|
153 |
with gr.Blocks() as demo:
|
154 |
-
gr.Markdown("
|
155 |
-
gr.Markdown("
|
156 |
|
157 |
with gr.Row():
|
158 |
-
|
159 |
-
|
|
|
|
|
160 |
|
161 |
-
|
162 |
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
return gr.update(value="Please upload both embeddings and unemployment data files."), None, None, None
|
171 |
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
|
176 |
# Run main analysis
|
177 |
-
results, rmse_plot, r2_plot, feature_importance_plots = main(
|
178 |
|
179 |
-
#
|
180 |
-
|
181 |
-
|
182 |
-
|
|
|
|
|
183 |
|
184 |
-
return results, rmse_plot, r2_plot,
|
185 |
|
186 |
run_button.click(
|
187 |
run_analysis,
|
188 |
-
inputs=
|
189 |
outputs=[output_results, output_rmse_plot, output_r2_plot, output_feature_importance]
|
190 |
)
|
191 |
|
|
|
17 |
import warnings
|
18 |
warnings.filterwarnings('ignore')
|
19 |
|
20 |
+
# Load datasets (Assuming the datasets are in the same directory)
|
21 |
+
def load_embeddings():
|
22 |
+
embeddings_file_path = 'county_embeddings.csv' # Adjust the file name if necessary
|
23 |
county_embeddings = pd.read_csv(embeddings_file_path).set_index('place')
|
24 |
numeric_cols = county_embeddings.select_dtypes(include=['number']).columns
|
25 |
county_embeddings_numeric = county_embeddings[numeric_cols]
|
|
|
30 |
county_embeddings_pca = pca.transform(county_embeddings_imputed)
|
31 |
return county_embeddings, county_embeddings_pca, pca, imputer
|
32 |
|
33 |
+
def load_unemployment_data():
|
34 |
+
unemployment_file_path = 'county_unemployment.csv' # Adjust the file name if necessary
|
35 |
unemployment_data = pd.read_csv(unemployment_file_path).set_index('place')
|
36 |
unemployment_long = unemployment_data.reset_index().melt(id_vars='place', var_name='date', value_name='unemployment_rate')
|
37 |
return unemployment_long
|
|
|
68 |
|
69 |
return X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train
|
70 |
|
71 |
+
def train_and_evaluate_models(X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train, selected_models):
|
72 |
# Define models
|
73 |
+
all_models = {
|
74 |
"Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
|
75 |
"XGBoost": XGBRegressor(n_estimators=100, random_state=42, tree_method='gpu_hist'),
|
76 |
"Ridge Regression": Ridge(alpha=1.0),
|
77 |
"CatBoost": CatBoostRegressor(iterations=100, random_seed=42, task_type="GPU")
|
78 |
}
|
79 |
|
80 |
+
# Filter selected models
|
81 |
+
models = {name: model for name, model in all_models.items() if name in selected_models}
|
82 |
+
|
83 |
results = {}
|
84 |
feature_importances = {}
|
85 |
|
|
|
129 |
|
130 |
return rmse_plot, r2_plot
|
131 |
|
132 |
+
def main(selected_models):
|
133 |
# Load data
|
134 |
+
county_embeddings, county_embeddings_pca, pca, imputer = load_embeddings()
|
135 |
+
unemployment_long = load_unemployment_data()
|
136 |
|
137 |
# Preprocess data
|
138 |
X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train = preprocess_data(
|
|
|
141 |
|
142 |
# Train and evaluate models
|
143 |
results, feature_importances, feature_names = train_and_evaluate_models(
|
144 |
+
X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train, selected_models
|
145 |
)
|
146 |
|
147 |
# Plot metrics
|
|
|
157 |
|
158 |
def gradio_app():
|
159 |
with gr.Blocks() as demo:
|
160 |
+
gr.Markdown("<h1 style='text-align: center'>County-Level Unemployment Rate Forecasting</h1>")
|
161 |
+
gr.Markdown("This app forecasts county-level unemployment rates using various machine learning models with GPU acceleration.")
|
162 |
|
163 |
with gr.Row():
|
164 |
+
with gr.Column(scale=1):
|
165 |
+
gr.Markdown("### Select Models to Train")
|
166 |
+
model_choices = ["Random Forest", "XGBoost", "Ridge Regression", "CatBoost"]
|
167 |
+
selected_models = gr.CheckboxGroup(choices=model_choices, value=model_choices, label="Models")
|
168 |
|
169 |
+
run_button = gr.Button("Run Analysis")
|
170 |
|
171 |
+
with gr.Column(scale=2):
|
172 |
+
output_results = gr.JSON(label="Model Performance Metrics")
|
173 |
+
output_rmse_plot = gr.Plot(label="RMSE Comparison")
|
174 |
+
output_r2_plot = gr.Plot(label="R-squared Comparison")
|
175 |
|
176 |
+
gr.Markdown("### Feature Importances")
|
177 |
+
output_feature_importance = gr.TabbedInterface([], [])
|
|
|
178 |
|
179 |
+
def run_analysis(selected_models):
|
180 |
+
if not selected_models:
|
181 |
+
return gr.update(value="Please select at least one model to train."), None, None, gr.update(tabs=[], contents=[])
|
182 |
|
183 |
# Run main analysis
|
184 |
+
results, rmse_plot, r2_plot, feature_importance_plots = main(selected_models)
|
185 |
|
186 |
+
# Prepare feature importance plots
|
187 |
+
fi_tabs = []
|
188 |
+
fi_plots = []
|
189 |
+
for model_name, fig in feature_importance_plots.items():
|
190 |
+
fi_tabs.append(model_name)
|
191 |
+
fi_plots.append(fig)
|
192 |
|
193 |
+
return results, rmse_plot, r2_plot, gr.update(tabs=fi_tabs, contents=fi_plots)
|
194 |
|
195 |
run_button.click(
|
196 |
run_analysis,
|
197 |
+
inputs=selected_models,
|
198 |
outputs=[output_results, output_rmse_plot, output_r2_plot, output_feature_importance]
|
199 |
)
|
200 |
|