LeonceNsh commited on
Commit
0058125
·
verified ·
1 Parent(s): 7053cae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -30
app.py CHANGED
@@ -17,7 +17,9 @@ from catboost import CatBoostRegressor
17
  import warnings
18
  warnings.filterwarnings('ignore')
19
 
20
- def load_embeddings(embeddings_file_path):
 
 
21
  county_embeddings = pd.read_csv(embeddings_file_path).set_index('place')
22
  numeric_cols = county_embeddings.select_dtypes(include=['number']).columns
23
  county_embeddings_numeric = county_embeddings[numeric_cols]
@@ -28,7 +30,8 @@ def load_embeddings(embeddings_file_path):
28
  county_embeddings_pca = pca.transform(county_embeddings_imputed)
29
  return county_embeddings, county_embeddings_pca, pca, imputer
30
 
31
- def load_unemployment_data(unemployment_file_path):
 
32
  unemployment_data = pd.read_csv(unemployment_file_path).set_index('place')
33
  unemployment_long = unemployment_data.reset_index().melt(id_vars='place', var_name='date', value_name='unemployment_rate')
34
  return unemployment_long
@@ -65,15 +68,18 @@ def preprocess_data(county_embeddings, county_embeddings_pca, unemployment_long,
65
 
66
  return X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train
67
 
68
- def train_and_evaluate_models(X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train):
69
  # Define models
70
- models = {
71
  "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
72
  "XGBoost": XGBRegressor(n_estimators=100, random_state=42, tree_method='gpu_hist'),
73
  "Ridge Regression": Ridge(alpha=1.0),
74
  "CatBoost": CatBoostRegressor(iterations=100, random_seed=42, task_type="GPU")
75
  }
76
 
 
 
 
77
  results = {}
78
  feature_importances = {}
79
 
@@ -123,10 +129,10 @@ def plot_metrics(results):
123
 
124
  return rmse_plot, r2_plot
125
 
126
- def main(embeddings_file_path, unemployment_file_path):
127
  # Load data
128
- county_embeddings, county_embeddings_pca, pca, imputer = load_embeddings(embeddings_file_path)
129
- unemployment_long = load_unemployment_data(unemployment_file_path)
130
 
131
  # Preprocess data
132
  X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train = preprocess_data(
@@ -135,7 +141,7 @@ def main(embeddings_file_path, unemployment_file_path):
135
 
136
  # Train and evaluate models
137
  results, feature_importances, feature_names = train_and_evaluate_models(
138
- X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train
139
  )
140
 
141
  # Plot metrics
@@ -151,41 +157,44 @@ def main(embeddings_file_path, unemployment_file_path):
151
 
152
  def gradio_app():
153
  with gr.Blocks() as demo:
154
- gr.Markdown("# County-Level Unemployment Rate Forecasting")
155
- gr.Markdown("Upload county embeddings and unemployment data to train models and visualize results.")
156
 
157
  with gr.Row():
158
- embeddings_file = gr.File(label="Upload County Embeddings CSV")
159
- unemployment_file = gr.File(label="Upload Unemployment Data CSV")
 
 
160
 
161
- run_button = gr.Button("Run Analysis")
162
 
163
- output_results = gr.JSON(label="Model Performance Metrics")
164
- output_rmse_plot = gr.Plot(label="RMSE Comparison")
165
- output_r2_plot = gr.Plot(label="R-squared Comparison")
166
- output_feature_importance = gr.Plot(label="Feature Importances")
167
 
168
- def run_analysis(embeddings_file, unemployment_file):
169
- if embeddings_file is None or unemployment_file is None:
170
- return gr.update(value="Please upload both embeddings and unemployment data files."), None, None, None
171
 
172
- # Read files
173
- embeddings_file_path = embeddings_file.name
174
- unemployment_file_path = unemployment_file.name
175
 
176
  # Run main analysis
177
- results, rmse_plot, r2_plot, feature_importance_plots = main(embeddings_file_path, unemployment_file_path)
178
 
179
- # For simplicity, display feature importance of Random Forest (if available)
180
- fi_plot = None
181
- if 'Random Forest' in feature_importance_plots:
182
- fi_plot = feature_importance_plots['Random Forest']
 
 
183
 
184
- return results, rmse_plot, r2_plot, fi_plot
185
 
186
  run_button.click(
187
  run_analysis,
188
- inputs=[embeddings_file, unemployment_file],
189
  outputs=[output_results, output_rmse_plot, output_r2_plot, output_feature_importance]
190
  )
191
 
 
17
  import warnings
18
  warnings.filterwarnings('ignore')
19
 
20
+ # Load datasets (Assuming the datasets are in the same directory)
21
+ def load_embeddings():
22
+ embeddings_file_path = 'county_embeddings.csv' # Adjust the file name if necessary
23
  county_embeddings = pd.read_csv(embeddings_file_path).set_index('place')
24
  numeric_cols = county_embeddings.select_dtypes(include=['number']).columns
25
  county_embeddings_numeric = county_embeddings[numeric_cols]
 
30
  county_embeddings_pca = pca.transform(county_embeddings_imputed)
31
  return county_embeddings, county_embeddings_pca, pca, imputer
32
 
33
+ def load_unemployment_data():
34
+ unemployment_file_path = 'county_unemployment.csv' # Adjust the file name if necessary
35
  unemployment_data = pd.read_csv(unemployment_file_path).set_index('place')
36
  unemployment_long = unemployment_data.reset_index().melt(id_vars='place', var_name='date', value_name='unemployment_rate')
37
  return unemployment_long
 
68
 
69
  return X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train
70
 
71
+ def train_and_evaluate_models(X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train, selected_models):
72
  # Define models
73
+ all_models = {
74
  "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
75
  "XGBoost": XGBRegressor(n_estimators=100, random_state=42, tree_method='gpu_hist'),
76
  "Ridge Regression": Ridge(alpha=1.0),
77
  "CatBoost": CatBoostRegressor(iterations=100, random_seed=42, task_type="GPU")
78
  }
79
 
80
+ # Filter selected models
81
+ models = {name: model for name, model in all_models.items() if name in selected_models}
82
+
83
  results = {}
84
  feature_importances = {}
85
 
 
129
 
130
  return rmse_plot, r2_plot
131
 
132
+ def main(selected_models):
133
  # Load data
134
+ county_embeddings, county_embeddings_pca, pca, imputer = load_embeddings()
135
+ unemployment_long = load_unemployment_data()
136
 
137
  # Preprocess data
138
  X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train = preprocess_data(
 
141
 
142
  # Train and evaluate models
143
  results, feature_importances, feature_names = train_and_evaluate_models(
144
+ X_train_pca, X_test_pca, y_train, y_test, numeric_cols_train, selected_models
145
  )
146
 
147
  # Plot metrics
 
157
 
158
  def gradio_app():
159
  with gr.Blocks() as demo:
160
+ gr.Markdown("<h1 style='text-align: center'>County-Level Unemployment Rate Forecasting</h1>")
161
+ gr.Markdown("This app forecasts county-level unemployment rates using various machine learning models with GPU acceleration.")
162
 
163
  with gr.Row():
164
+ with gr.Column(scale=1):
165
+ gr.Markdown("### Select Models to Train")
166
+ model_choices = ["Random Forest", "XGBoost", "Ridge Regression", "CatBoost"]
167
+ selected_models = gr.CheckboxGroup(choices=model_choices, value=model_choices, label="Models")
168
 
169
+ run_button = gr.Button("Run Analysis")
170
 
171
+ with gr.Column(scale=2):
172
+ output_results = gr.JSON(label="Model Performance Metrics")
173
+ output_rmse_plot = gr.Plot(label="RMSE Comparison")
174
+ output_r2_plot = gr.Plot(label="R-squared Comparison")
175
 
176
+ gr.Markdown("### Feature Importances")
177
+ output_feature_importance = gr.TabbedInterface([], [])
 
178
 
179
+ def run_analysis(selected_models):
180
+ if not selected_models:
181
+ return gr.update(value="Please select at least one model to train."), None, None, gr.update(tabs=[], contents=[])
182
 
183
  # Run main analysis
184
+ results, rmse_plot, r2_plot, feature_importance_plots = main(selected_models)
185
 
186
+ # Prepare feature importance plots
187
+ fi_tabs = []
188
+ fi_plots = []
189
+ for model_name, fig in feature_importance_plots.items():
190
+ fi_tabs.append(model_name)
191
+ fi_plots.append(fig)
192
 
193
+ return results, rmse_plot, r2_plot, gr.update(tabs=fi_tabs, contents=fi_plots)
194
 
195
  run_button.click(
196
  run_analysis,
197
+ inputs=selected_models,
198
  outputs=[output_results, output_rmse_plot, output_r2_plot, output_feature_importance]
199
  )
200