LeonceNsh commited on
Commit
b4e8a18
Β·
verified Β·
1 Parent(s): bc1835c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -259
app.py CHANGED
@@ -5,61 +5,90 @@ import plotly.express as px
5
  import folium
6
  import numpy as np
7
  import geopandas as gpd
8
- from branca.element import Element
 
 
 
 
9
  import os
10
- import openrouteservice
11
- from folium.plugins import MarkerCluster
12
- from folium.plugins import Search
13
-
14
- import matplotlib.pyplot as plt
15
- import io
16
- import base64
17
- import logfire
18
 
19
  # Logger setup
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
 
 
 
 
 
23
 
24
- logfire.configure()
25
- # ORS client setup
26
- ORS_API_KEY = os.getenv('ors')
27
- if not ORS_API_KEY:
28
- raise ValueError("OpenRouteService API key not found. Please set the 'ors' environment variable.")
29
-
30
- client = openrouteservice.Client(key=ORS_API_KEY)
31
-
32
- # Function to create isochrones around Autozone locations
33
- def create_isochrone_map():
34
- m = folium.Map(location=[35.8601, -86.6602], zoom_start=7)
35
- autozone_df = df_md_final1[df_md_final1['business_type'] == 'Autozone']
36
-
37
- for idx, row in autozone_df.iterrows():
38
- coords = (row['md_x'], row['md_y'])
39
- try:
40
- isochrone = client.isochrones(locations=[coords], profile='driving-car', range=[1800])
41
- folium.GeoJson(isochrone, name='Isochrones').add_to(m)
42
- except openrouteservice.exceptions.HTTPError as e:
43
- print(f"HTTPError: {e}")
44
- continue
45
- except Exception as e:
46
- print(f"An error occurred: {e}")
47
- continue
48
-
49
- folium.LayerControl().add_to(m)
50
- return m._repr_html_()
51
 
52
- # Data for 2020 Tennessee population by county
53
  population_2020_data = {
54
  'County': ['Shelby', 'Davidson', 'Knox', 'Hamilton', 'Rutherford', 'Williamson', 'Montgomery', 'Sumner', 'Blount', 'Washington',
55
  'Madison', 'Sevier', 'Maury', 'Wilson', 'Bradley'],
56
  'Population_2020': [929744, 715884, 478971, 366207, 341486, 247726, 220069, 196281, 135280, 133001,
57
  98823, 98380, 100974, 147737, 108620]
58
  }
59
-
60
- # Create a DataFrame for the top 15 counties
61
  df_population_2020 = pd.DataFrame(population_2020_data)
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  # Function to create a Folium map with selected geographical boundaries and markers
64
  def create_map(geo_layer="Counties", business_filters=["All"]):
65
  logger.info(f"Creating map with geo_layer: {geo_layer} and business_filters: {business_filters}")
@@ -70,8 +99,6 @@ def create_map(geo_layer="Counties", business_filters=["All"]):
70
  # Select the appropriate GeoDataFrame based on geo_layer
71
  if geo_layer == "Counties":
72
  geo_data = counties_geo
73
- elif geo_layer == "Zip Codes":
74
- geo_data = zcta_geo
75
  elif geo_layer == "HSAs":
76
  geo_data = hsa_geo
77
  elif geo_layer == "HRRs":
@@ -100,7 +127,8 @@ def create_map(geo_layer="Counties", business_filters=["All"]):
100
  for _, row in filtered_df.iterrows():
101
  folium.Marker(
102
  location=[row['md_y'], row['md_x']],
103
- popup=f"<b>{row['name']}</b>"
 
104
  ).add_to(marker_cluster)
105
 
106
  folium.LayerControl().add_to(m)
@@ -108,56 +136,23 @@ def create_map(geo_layer="Counties", business_filters=["All"]):
108
  logger.info("Map creation completed.")
109
  return m._repr_html_()
110
 
111
-
112
  # Function to create the bar plot for 2020 Tennessee population (top 15 counties)
113
  def plot_2020_population_top15():
114
  fig = px.bar(df_population_2020,
115
  x='County',
116
  y='Population_2020',
117
  title='Tennessee Population 2020',
118
- labels={'County': 'County', 'Population_2020': ''},
119
  color='Population_2020',
120
  color_continuous_scale='Blues')
121
 
122
  fig.update_layout(xaxis={'categoryorder':'total descending'}, template='plotly_white')
123
  return fig
124
 
125
- # Function to create the population distribution plot
126
- def plot_population_distribution():
127
- print(cbg_geographic_data.head())
128
- county_data = cbg_geographic_data.groupby('cntyname')['pop10'].sum().reset_index().sort_values(by='pop10', ascending=False)
129
-
130
- fig = px.bar(county_data.head(15),
131
- x="cntyname",
132
- y="pop10",
133
- title="2010 Population by County",
134
- labels={"cntyname": "County", "pop10": "2010 Population"},
135
- color='pop10',
136
- color_continuous_scale='Viridis')
137
- fig.update_layout(xaxis={'categoryorder':'total descending'}, template='plotly_white')
138
- return fig
139
-
140
- # Load datasets
141
- df_md_final1 = pd.read_csv("data/location-of-auto-businesses.csv")
142
- print(df_md_final1.columns)
143
- print(df_md_final1.info)
144
- cbg_geographic_data = pd.read_csv("data/cbg_geographic_data.csv")
145
- print(cbg_geographic_data.columns)
146
- print(cbg_geographic_data.info)
147
-
148
- # Create DataFrames for the 2020 and 2010 populations
149
- df_population_2010 = cbg_geographic_data.groupby('cntyname')['pop10'].sum().reset_index().sort_values(by='pop10', ascending=False)
150
- df_population_2010.rename(columns={'cntyname': 'County', 'pop10': 'Population_2010'}, inplace=True)
151
-
152
-
153
- # Merge the 2010 and 2020 population data for side-by-side comparison
154
- df_population_comparison = pd.merge(df_population_2010, df_population_2020, on='County')
155
-
156
  # Function to create a side-by-side bar chart for the 2010 and 2020 Tennessee population by county
157
  def plot_population_comparison():
158
  df_melted = df_population_comparison.melt(id_vars='County', value_vars=['Population_2010', 'Population_2020'],
159
  var_name='Year', value_name='Population')
160
- print(df_melted)
161
  fig = px.bar(df_melted,
162
  x='County',
163
  y='Population',
@@ -170,155 +165,61 @@ def plot_population_comparison():
170
  fig.update_layout(xaxis={'categoryorder': 'total descending'}, template='plotly_white')
171
  return fig
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
- # Load the County shapefile
175
- county_shapefile_path = "data/county/01_county-shape-file.shp"
176
- if not os.path.exists(county_shapefile_path):
177
- raise FileNotFoundError(f"County shapefile not found at {county_shapefile_path}. Please ensure the file exists.")
178
-
179
- counties_geo = gpd.read_file(county_shapefile_path)
180
-
181
- print("County Shapefile Columns:", counties_geo.info()) # Debugging line
182
- print("County Shapefile Columns:", counties_geo.head()) # Debugging line
183
-
184
- counties_geo = counties_geo[counties_geo['statefp'] == '47'] # Ensure correct column name for state FIPS
185
-
186
-
187
-
188
- # Define FIPS codes and abbreviations for TN, KY, IN, and AR
189
- state_fips = ['47'] # TN, KY, IN, AR FIPS codes
190
- state_abbr = ['TN']
191
-
192
- # Load the HSA shapefile
193
- hsa_shapefile_path = "data/hsa/01_hsa-shape-file.shp"
194
- if not os.path.exists(hsa_shapefile_path):
195
- raise FileNotFoundError(f"HSA shapefile not found at {hsa_shapefile_path}. Please ensure the file exists.")
196
-
197
-
198
- hsa_geo = gpd.read_file(hsa_shapefile_path)
199
- # Update the column name based on your shapefile
200
- if 'hsastate' in hsa_geo.columns:
201
- hsa_geo = hsa_geo[hsa_geo['hsastate'].isin(state_abbr)]
202
- elif 'STATEFP' in hsa_geo.columns:
203
- hsa_geo = hsa_geo[hsa_geo['STATEFP'].isin(state_fips)]
204
- else:
205
- raise KeyError("Column to filter state in HSA shapefile not found.")
206
-
207
- # Load the HRR shapefile
208
- hrr_shapefile_path = "data/hrr/01_hrr-shape-file.shp"
209
- if not os.path.exists(hrr_shapefile_path):
210
- raise FileNotFoundError(f"HRR shapefile not found at {hrr_shapefile_path}. Please ensure the file exists.")
211
-
212
- hrr_geo = gpd.read_file(hrr_shapefile_path)
213
- # Update the column name based on your shapefile
214
- if 'hrrstate' in hrr_geo.columns:
215
- hrr_geo = hrr_geo[hrr_geo['hrrstate'].isin(state_abbr)]
216
- elif 'STATEFP' in hrr_geo.columns:
217
- hrr_geo = hrr_geo[hrr_geo['STATEFP'].isin(state_fips)]
218
- else:
219
- raise KeyError("Column to filter state in HRR shapefile not found.")
220
-
221
 
222
- # Define Business
223
- df_md_final1['business_type'] = np.where(df_md_final1['name'].str.contains("Autozone", case=False, na=False), "Autozone",
224
- np.where(df_md_final1['name'].str.contains("Napa Auto Parts", case=False, na=False), "Napa Auto",
225
- np.where(df_md_final1['name'].str.contains("Firestone Complete Auto Care", case=False, na=False), "Firestone",
226
- np.where(df_md_final1['name'].str.contains("O'Reilly Auto Parts", case=False, na=False), "O'Reilly Auto",
227
- np.where(df_md_final1['name'].str.contains("Advance Auto Parts", case=False, na=False), "Advance Auto",
228
- np.where(df_md_final1['name'].str.contains("Toyota|Honda|Kia|Nissan|Chevy|Ford|Carmax|GMC", case=False, na=False),
229
- "Car Dealership",
230
- "Other Auto Repair Shops")
231
- )
232
- )
233
- )
234
- ))
235
 
236
- # Prepare data for modeling
237
- def prepare_model_data():
238
- # Aggregate number of businesses per county
239
- business_counts = df_md_final1.groupby('county').size().reset_index(name='business_count')
240
-
241
- # Merge with population data
242
- df_model = pd.merge(df_population_2020, business_counts, left_on='County', right_on='county', how='left')
243
- df_model['business_count'] = df_model['business_count'].fillna(0)
244
-
245
- # Drop redundant columns
246
- df_model = df_model.drop(columns=['county']) # Assuming 'county' is the same as 'County'
247
-
248
- # Additional feature engineering can be done here
249
-
250
- return df_model
251
 
252
- # Train Random Forest model
253
- def train_random_forest():
254
- df_model = prepare_model_data()
255
-
256
- # Features and target
257
- X = df_model[['Population_2020']]
258
- y = df_model['business_count']
259
-
260
- # Handle any missing values or additional preprocessing if necessary
261
- # For this example, we assume data is clean
262
-
263
- # Split the data
264
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
265
-
266
- # Initialize and train the model
267
- rf = RandomForestRegressor(n_estimators=100, random_state=42)
268
- rf.fit(X_train, y_train)
269
-
270
- # Predictions
271
- y_pred = rf.predict(X_test)
272
-
273
- # Metrics
274
- mse = mean_squared_error(y_test, y_pred)
275
- r2 = r2_score(y_test, y_pred)
276
-
277
- # SHAP explanation
278
- explainer = shap.Explainer(rf, X_train)
279
- shap_values = explainer(X_test)
280
-
281
- # Feature importance
282
- feature_importances = pd.DataFrame({
283
- 'feature': X.columns,
284
- 'importance': rf.feature_importances_
285
- }).sort_values(by='importance', ascending=False)
286
-
287
- return {
288
- 'model': rf,
289
- 'X_test': X_test,
290
- 'y_test': y_test,
291
- 'y_pred': y_pred,
292
- 'mse': mse,
293
- 'r2': r2,
294
- 'shap_values': shap_values,
295
- 'feature_importances': feature_importances
296
- }
297
 
298
- # Function to generate SHAP summary plot as a base64 string
299
- def get_shap_summary_plot(shap_values, X):
300
- plt.figure()
301
- shap.summary_plot(shap_values, X, show=False)
302
- buf = io.BytesIO()
303
- plt.savefig(buf, format="png", bbox_inches='tight')
304
- plt.close()
305
- buf.seek(0)
306
- img_base64 = base64.b64encode(buf.read()).decode('utf-8')
307
- return f"data:image/png;base64,{img_base64}"
308
 
309
- # Function to generate Feature Importance plot as a base64 string
310
- def get_feature_importance_plot(feature_importances):
311
- fig = px.bar(feature_importances,
312
- x='feature',
313
- y='importance',
314
- title='Feature Importance from Random Forest',
315
- labels={'feature': 'Feature', 'importance': 'Importance'},
316
- color='importance',
317
- color_continuous_scale='Blues')
318
- fig.update_layout(template='plotly_white')
319
- img_bytes = fig.to_image(format="png")
320
- img_base64 = base64.b64encode(img_bytes).decode('utf-8')
321
- return f"data:image/png;base64,{img_base64}"
322
 
323
  # Gradio Interface
324
  with gr.Blocks(theme=gr.themes.Default()) as app:
@@ -332,47 +233,21 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
332
  gr.Markdown("### 2020 Population by County")
333
  pop_dist = gr.Plot(plot_2020_population_top15)
334
 
 
 
 
 
335
  gr.Markdown("### πŸ› οΈ Auto Businesses in Tennessee")
336
  manual_table = gr.Dataframe(
337
- headers=["Location Name", "Street Address", "City", "State", "Postal Code"],
338
  datatype=["str", "str", "str", "str", "str"],
339
- value=[
340
- ["AutoZone", "257 Wears Valley Rd", "Pigeon Forge", "Tennessee", "37863"],
341
- ["Sterling Auto", "2064 Wilma Rudolph Blvd", "Clarksville", "Tennessee", "37040"],
342
- ["AutoZone", "257 Wears Valley Rd", "Pigeon Forge", "Tennessee", "37863"],
343
- ["Sterling Auto", "2064 Wilma Rudolph Blvd", "Clarksville", "Tennessee", "37040"],
344
- ["Advance Auto Parts", "2124 N Highland Ave", "Jackson", "Tennessee", "38305"],
345
- ["FRIENDSHIP HYUNDAI OF BRISTOL", "1841 Volunteer Pkwy", "Bristol", "Tennessee", "37620"],
346
- ["Advance Auto Parts", "45 Main St", "Savannah", "Tennessee", "38372"],
347
- ["O'Reilly Auto Parts", "493 Craighead St", "Nashville", "Tennessee", "37204"],
348
- ["O'Reilly Auto Parts", "864 Highway 51 N", "Covington", "Tennessee", "38019"],
349
- ["NAPA Auto Parts", "711 Murfreesboro Pike", "Nashville", "Tennessee", "37210"],
350
- ["Goodyear Auto Service Centers", "5407 Highway 153", "Hixson", "Tennessee", "37343"],
351
- ["NAPA Auto Parts", "100 Center St", "Johnson City", "Tennessee", "37615"],
352
- ["Cadillac,Buick,Chevrolet,GMC", "960 John R Rice Blvd", "Murfreesboro", "Tennessee", "37129"],
353
- ["AutoZone", "9760 Highway 64", "Lakeland", "Tennessee", "38002"],
354
- ["Honda", "1408 Highway 45 Byp", "Jackson", "Tennessee", "38305"],
355
- ["National Tire & Battery (NTB)", "532 Robert Rose Dr", "Murfreesboro", "Tennessee", "37129"],
356
- ["NAPA Auto Parts", "711 Murfreesboro Pike", "Nashville", "Tennessee", "37210"],
357
- ["Advance Auto Parts", "160 W Broadway", "Gallatin", "Tennessee", "37066"],
358
- ["Southern Tire Mart (STM)", "1551 S Wilcox Dr", "Kingsport", "Tennessee", "37660"],
359
- ["Chevrolet", "310 E 20th St", "Chattanooga", "Tennessee", "37408"],
360
- ["O'Reilly Auto Parts", "7534 Oak Ridge Hwy", "Knoxville", "Tennessee", "37931"],
361
- ["Goodyear Auto Service Centers", "971 Eastgate Loop", "Chattanooga", "Tennessee", "37411"],
362
- ["Firestone Complete Auto Care", "15127 Old Hickory Blvd", "Nashville", "Tennessee", "37211"],
363
- ["Christian Brothers Automotive", "10406 Kingston Pike", "Knoxville", "Tennessee", "37922"],
364
- ["Christian Brothers Automotive", "563 E Main St", "Hendersonville", "Tennessee", "37075"],
365
- ["O'Reilly Auto Parts", "101 Village Square Ln", "Mountain City", "Tennessee", "37683"],
366
- ["O'Reilly Auto Parts", "4219 Fort Henry Dr Ste A", "Kingsport", "Tennessee", "37663"],
367
- ["Precision Tune Auto Care", "4710 N Broadway St", "Knoxville", "Tennessee", "37918"],
368
- ["National Tire & Battery (NTB)", "234 Old Hickory Blvd", "Nashville", "Tennessee", "37221"]
369
- ], # Data values
370
- row_count=27, # Adjusted total number of rows
371
  interactive=False
372
  )
373
 
374
  gr.Markdown("### πŸ“ Interactive Map")
375
- map_output_overview = gr.HTML(lambda: create_map(geo_layer="Counties", business_filters=["All"]))
376
 
377
  with gr.Tab("πŸ“ Shops in TN Counties"):
378
  with gr.Row():
@@ -411,7 +286,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
411
  business_filter_hsa.change(fn=update_hsa_map, inputs=[business_filter_hsa], outputs=[shops_hsa_map])
412
  reset_button_hsa.click(fn=lambda: (["All"], create_map(geo_layer="HSAs", business_filters=["All"])),
413
  inputs=None, outputs=[business_filter_hsa, shops_hsa_map])
414
-
415
  with gr.Tab("πŸ“ Shops in TN HRRs"):
416
  with gr.Row():
417
  with gr.Column(scale=1):
@@ -431,20 +306,70 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
431
  reset_button_hrr.click(fn=lambda: (["All"], create_map(geo_layer="HRRs", business_filters=["All"])),
432
  inputs=None, outputs=[business_filter_hrr, shops_hrr_map])
433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  with gr.Tab("πŸ” Help"):
435
  gr.Markdown("""
436
  ## How to Use This Dashboard
437
 
438
  - **Overview Tab:** Provides population statistics and a summary map of all auto businesses in Tennessee.
439
 
440
- - **Shops in TN Counties/Zip Codes/HSAs/HRRs Tabs:**
441
- - **Filter by Business Type:** Use the checkboxes to select one or multiple Business to display on the map.
442
- - **Filter by Geographical Area:** Depending on the tab, you can filter businesses based on Counties, Zip Codes, HSAs, or HRRs.
443
  - **Reset Filters:** Click the reset button to clear all selected filters and view all businesses.
444
  - **Interactive Map:** Zoom in/out, click on markers to view business details, and use the search bar to find specific businesses.
445
-
 
 
 
 
 
446
  """)
447
 
448
  gr.Markdown("### πŸ“„ Source: Yellow Pages")
449
-
450
  app.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
5
  import folium
6
  import numpy as np
7
  import geopandas as gpd
8
+ from folium.plugins import MarkerCluster, Search
9
+ from sklearn.neighbors import BallTree
10
+ from geopy.geocoders import Nominatim
11
+ from geopy.extra.rate_limiter import RateLimiter
12
+ import math
13
  import os
 
 
 
 
 
 
 
 
14
 
15
  # Logger setup
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
18
 
19
+ # ORS client setup (if needed for additional functionalities)
20
+ # ORS_API_KEY = os.getenv('ors')
21
+ # if not ORS_API_KEY:
22
+ # raise ValueError("OpenRouteService API key not found. Please set the 'ors' environment variable.")
23
+ # client = openrouteservice.Client(key=ORS_API_KEY)
24
 
25
+ # Load datasets
26
+ df_md_final1 = pd.read_csv("data/location-of-auto-businesses.csv")
27
+ cbg_geographic_data = pd.read_csv("data/cbg_geographic_data.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # Create DataFrames for the 2020 and 2010 populations
30
  population_2020_data = {
31
  'County': ['Shelby', 'Davidson', 'Knox', 'Hamilton', 'Rutherford', 'Williamson', 'Montgomery', 'Sumner', 'Blount', 'Washington',
32
  'Madison', 'Sevier', 'Maury', 'Wilson', 'Bradley'],
33
  'Population_2020': [929744, 715884, 478971, 366207, 341486, 247726, 220069, 196281, 135280, 133001,
34
  98823, 98380, 100974, 147737, 108620]
35
  }
 
 
36
  df_population_2020 = pd.DataFrame(population_2020_data)
37
 
38
+ df_population_2010 = cbg_geographic_data.groupby('cntyname')['pop10'].sum().reset_index().sort_values(by='pop10', ascending=False)
39
+ df_population_2010.rename(columns={'cntyname': 'County', 'pop10': 'Population_2010'}, inplace=True)
40
+
41
+ # Merge the 2010 and 2020 population data for side-by-side comparison
42
+ df_population_comparison = pd.merge(df_population_2010, df_population_2020, on='County')
43
+
44
+ # Define Business Types
45
+ df_md_final1['business_type'] = np.where(df_md_final1['name'].str.contains("Autozone", case=False, na=False), "Autozone",
46
+ np.where(df_md_final1['name'].str.contains("Napa Auto Parts", case=False, na=False), "Napa Auto",
47
+ np.where(df_md_final1['name'].str.contains("Firestone Complete Auto Care", case=False, na=False), "Firestone",
48
+ np.where(df_md_final1['name'].str.contains("O'Reilly Auto Parts", case=False, na=False), "O'Reilly Auto",
49
+ np.where(df_md_final1['name'].str.contains("Advance Auto Parts", case=False, na=False), "Advance Auto",
50
+ np.where(df_md_final1['name'].str.contains("Toyota|Honda|Kia|Nissan|Chevy|Ford|Carmax|GMC", case=False, na=False),
51
+ "Car Dealership",
52
+ "Other Auto Repair Shops")
53
+ )
54
+ )
55
+ )
56
+ ))
57
+
58
+ # Load the County shapefile
59
+ county_shapefile_path = "data/county/01_county-shape-file.shp"
60
+ if not os.path.exists(county_shapefile_path):
61
+ raise FileNotFoundError(f"County shapefile not found at {county_shapefile_path}. Please ensure the file exists.")
62
+
63
+ counties_geo = gpd.read_file(county_shapefile_path)
64
+ counties_geo = counties_geo[counties_geo['statefp'] == '47'] # Tennessee FIPS code
65
+
66
+ # Load the HSA shapefile
67
+ hsa_shapefile_path = "data/hsa/01_hsa-shape-file.shp"
68
+ if not os.path.exists(hsa_shapefile_path):
69
+ raise FileNotFoundError(f"HSA shapefile not found at {hsa_shapefile_path}. Please ensure the file exists.")
70
+
71
+ hsa_geo = gpd.read_file(hsa_shapefile_path)
72
+ if 'hsastate' in hsa_geo.columns:
73
+ hsa_geo = hsa_geo[hsa_geo['hsastate'].isin(['TN'])]
74
+ elif 'STATEFP' in hsa_geo.columns:
75
+ hsa_geo = hsa_geo[hsa_geo['STATEFP'].isin(['47'])]
76
+ else:
77
+ raise KeyError("Column to filter state in HSA shapefile not found.")
78
+
79
+ # Load the HRR shapefile
80
+ hrr_shapefile_path = "data/hrr/01_hrr-shape-file.shp"
81
+ if not os.path.exists(hrr_shapefile_path):
82
+ raise FileNotFoundError(f"HRR shapefile not found at {hrr_shapefile_path}. Please ensure the file exists.")
83
+
84
+ hrr_geo = gpd.read_file(hrr_shapefile_path)
85
+ if 'hrrstate' in hrr_geo.columns:
86
+ hrr_geo = hrr_geo[hrr_geo['hrrstate'].isin(['TN'])]
87
+ elif 'STATEFP' in hrr_geo.columns:
88
+ hrr_geo = hrr_geo[hrr_geo['STATEFP'].isin(['47'])]
89
+ else:
90
+ raise KeyError("Column to filter state in HRR shapefile not found.")
91
+
92
  # Function to create a Folium map with selected geographical boundaries and markers
93
  def create_map(geo_layer="Counties", business_filters=["All"]):
94
  logger.info(f"Creating map with geo_layer: {geo_layer} and business_filters: {business_filters}")
 
99
  # Select the appropriate GeoDataFrame based on geo_layer
100
  if geo_layer == "Counties":
101
  geo_data = counties_geo
 
 
102
  elif geo_layer == "HSAs":
103
  geo_data = hsa_geo
104
  elif geo_layer == "HRRs":
 
127
  for _, row in filtered_df.iterrows():
128
  folium.Marker(
129
  location=[row['md_y'], row['md_x']],
130
+ popup=f"<b>{row['name']}</b><br>{row['address']}, {row['city']}, TN {row['postal_code']}",
131
+ icon=folium.Icon(color='blue', icon='info-sign')
132
  ).add_to(marker_cluster)
133
 
134
  folium.LayerControl().add_to(m)
 
136
  logger.info("Map creation completed.")
137
  return m._repr_html_()
138
 
 
139
  # Function to create the bar plot for 2020 Tennessee population (top 15 counties)
140
  def plot_2020_population_top15():
141
  fig = px.bar(df_population_2020,
142
  x='County',
143
  y='Population_2020',
144
  title='Tennessee Population 2020',
145
+ labels={'County': 'County', 'Population_2020': 'Population'},
146
  color='Population_2020',
147
  color_continuous_scale='Blues')
148
 
149
  fig.update_layout(xaxis={'categoryorder':'total descending'}, template='plotly_white')
150
  return fig
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  # Function to create a side-by-side bar chart for the 2010 and 2020 Tennessee population by county
153
  def plot_population_comparison():
154
  df_melted = df_population_comparison.melt(id_vars='County', value_vars=['Population_2010', 'Population_2020'],
155
  var_name='Year', value_name='Population')
 
156
  fig = px.bar(df_melted,
157
  x='County',
158
  y='Population',
 
165
  fig.update_layout(xaxis={'categoryorder': 'total descending'}, template='plotly_white')
166
  return fig
167
 
168
+ # Nearest Neighbor Search Setup
169
+ # Prepare the data for nearest neighbor search
170
+ def prepare_nearest_neighbor():
171
+ # Convert coordinates to radians for BallTree
172
+ coords = df_md_final1[['md_y', 'md_x']].to_numpy()
173
+ radians_coords = np.radians(coords)
174
+ tree = BallTree(radians_coords, metric='haversine')
175
+ return tree, radians_coords
176
+
177
+ tree, radians_coords = prepare_nearest_neighbor()
178
+
179
+ # Geocoder setup
180
+ geolocator = Nominatim(user_agent="tn_auto_shops_app")
181
+ geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
182
+
183
+ # Function to find the nearest shop
184
+ def find_nearest_shop(address=None, latitude=None, longitude=None):
185
+ if address:
186
+ location = geocode(address)
187
+ if not location:
188
+ return "Address not found. Please try a different address.", None
189
+ latitude, longitude = location.latitude, location.longitude
190
+ elif latitude is not None and longitude is not None:
191
+ pass # Coordinates are provided directly
192
+ else:
193
+ return "Please provide an address or select a location on the map.", None
194
 
195
+ # Convert to radians
196
+ point = np.radians([latitude, longitude]).reshape(1, -1)
197
+ dist, idx = tree.query(point, k=1)
198
+ nearest_idx = idx[0][0]
199
+ nearest_shop = df_md_final1.iloc[nearest_idx]
200
+ distance_km = dist[0][0] * 6371 # Earth's radius in kilometers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ # Create a Folium map showing the user location and the nearest shop
203
+ m = folium.Map(location=[latitude, longitude], zoom_start=12)
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ # Add user location marker
206
+ folium.Marker(
207
+ location=[latitude, longitude],
208
+ popup="Your Location",
209
+ icon=folium.Icon(color='red', icon='user')
210
+ ).add_to(m)
 
 
 
 
 
 
 
 
 
211
 
212
+ # Add nearest shop marker
213
+ folium.Marker(
214
+ location=[nearest_shop['md_y'], nearest_shop['md_x']],
215
+ popup=f"Nearest Shop: {nearest_shop['name']}<br>{nearest_shop['address']}, {nearest_shop['city']}, TN {nearest_shop['postal_code']}<br>Distance: {distance_km:.2f} km",
216
+ icon=folium.Icon(color='green', icon='shopping-cart')
217
+ ).add_to(m)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
+ # Draw a line between the user location and the nearest shop
220
+ folium.PolyLine(locations=[[latitude, longitude], [nearest_shop['md_y'], nearest_shop['md_x']]], color='blue').add_to(m)
 
 
 
 
 
 
 
 
221
 
222
+ return "", m._repr_html_()
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  # Gradio Interface
225
  with gr.Blocks(theme=gr.themes.Default()) as app:
 
233
  gr.Markdown("### 2020 Population by County")
234
  pop_dist = gr.Plot(plot_2020_population_top15)
235
 
236
+ with gr.Column():
237
+ gr.Markdown("### 2010 vs 2020 Population Comparison")
238
+ pop_comp = gr.Plot(plot_population_comparison)
239
+
240
  gr.Markdown("### πŸ› οΈ Auto Businesses in Tennessee")
241
  manual_table = gr.Dataframe(
242
+ headers=["Name", "Address", "City", "State", "Postal Code"],
243
  datatype=["str", "str", "str", "str", "str"],
244
+ value=df_md_final1[['name', 'address', 'city', 'state', 'postal_code']].head(10).values.tolist(), # Display first 10 for brevity
245
+ row_count=10,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  interactive=False
247
  )
248
 
249
  gr.Markdown("### πŸ“ Interactive Map")
250
+ map_output_overview = gr.HTML(create_map(geo_layer="Counties", business_filters=["All"]))
251
 
252
  with gr.Tab("πŸ“ Shops in TN Counties"):
253
  with gr.Row():
 
286
  business_filter_hsa.change(fn=update_hsa_map, inputs=[business_filter_hsa], outputs=[shops_hsa_map])
287
  reset_button_hsa.click(fn=lambda: (["All"], create_map(geo_layer="HSAs", business_filters=["All"])),
288
  inputs=None, outputs=[business_filter_hsa, shops_hsa_map])
289
+
290
  with gr.Tab("πŸ“ Shops in TN HRRs"):
291
  with gr.Row():
292
  with gr.Column(scale=1):
 
306
  reset_button_hrr.click(fn=lambda: (["All"], create_map(geo_layer="HRRs", business_filters=["All"])),
307
  inputs=None, outputs=[business_filter_hrr, shops_hrr_map])
308
 
309
+ with gr.Tab("πŸ” Nearest Shop Finder"):
310
+ gr.Markdown("## Find the Nearest Auto Shop in Tennessee")
311
+ with gr.Row():
312
+ with gr.Column(scale=1):
313
+ gr.Markdown("### Input Your Location")
314
+ address_input = gr.Textbox(label="Enter Address", placeholder="e.g., 123 Main St, Nashville, TN")
315
+ gr.Markdown("**OR**")
316
+ location_input = gr.Button("Click on Map to Select Location")
317
+ with gr.Column(scale=4):
318
+ nearest_map = gr.HTML()
319
+
320
+ # Hidden inputs to capture latitude and longitude from map clicks
321
+ lat_input = gr.Number(label="Latitude", visible=False)
322
+ lon_input = gr.Number(label="Longitude", visible=False)
323
+
324
+ # Function to handle map clicks and update latitude and longitude
325
+ def map_click_event(location):
326
+ latitude, longitude = location
327
+ return latitude, longitude
328
+
329
+ # Initialize the map for nearest shop finder
330
+ def initialize_nearest_map():
331
+ m = folium.Map(location=[35.8601, -86.6602], zoom_start=7)
332
+ folium.TileLayer('cartodbpositron').add_to(m)
333
+ folium.Marker(
334
+ location=[35.8601, -86.6602],
335
+ popup="Click on the map to select your location",
336
+ icon=folium.Icon(color='red', icon='info-sign')
337
+ ).add_to(m)
338
+ m.add_child(folium.LatLngPopup())
339
+ return m._repr_html_()
340
+
341
+ nearest_map.initial_value = initialize_nearest_map()
342
+
343
+ # Function to update the map based on user input
344
+ def update_nearest_map(address, latitude, longitude):
345
+ message, updated_map = find_nearest_shop(address, latitude, longitude)
346
+ return updated_map
347
+
348
+ # When the user submits an address
349
+ address_input.submit(fn=update_nearest_map, inputs=[address_input, lat_input, lon_input], outputs=[nearest_map])
350
+
351
+ # Note: Capturing map clicks directly in Gradio is non-trivial. As an alternative, users can enter their address.
352
+ # For more advanced interactivity, a custom frontend might be required.
353
+
354
  with gr.Tab("πŸ” Help"):
355
  gr.Markdown("""
356
  ## How to Use This Dashboard
357
 
358
  - **Overview Tab:** Provides population statistics and a summary map of all auto businesses in Tennessee.
359
 
360
+ - **Shops in TN Counties/HSAs/HRRs Tabs:**
361
+ - **Filter by Business Type:** Use the checkboxes to select one or multiple business types to display on the map.
362
+ - **Filter by Geographical Area:** Depending on the tab, you can filter businesses based on Counties, HSAs, or HRRs.
363
  - **Reset Filters:** Click the reset button to clear all selected filters and view all businesses.
364
  - **Interactive Map:** Zoom in/out, click on markers to view business details, and use the search bar to find specific businesses.
365
+
366
+ - **Nearest Shop Finder Tab:**
367
+ - **Enter Address:** Type your address in the textbox and press Enter to find the nearest auto shop.
368
+ - **Select Location on Map:** (Feature under development) Click on the map to select your location and find the nearest shop.
369
+ - **View Results:** The map will display your location and the nearest auto shop with a line connecting them.
370
+
371
  """)
372
 
373
  gr.Markdown("### πŸ“„ Source: Yellow Pages")
374
+
375
  app.launch(server_name="0.0.0.0", server_port=7860, share=True)