Update app.py
Browse files
app.py
CHANGED
@@ -5,61 +5,90 @@ import plotly.express as px
|
|
5 |
import folium
|
6 |
import numpy as np
|
7 |
import geopandas as gpd
|
8 |
-
from
|
|
|
|
|
|
|
|
|
9 |
import os
|
10 |
-
import openrouteservice
|
11 |
-
from folium.plugins import MarkerCluster
|
12 |
-
from folium.plugins import Search
|
13 |
-
|
14 |
-
import matplotlib.pyplot as plt
|
15 |
-
import io
|
16 |
-
import base64
|
17 |
-
import logfire
|
18 |
|
19 |
# Logger setup
|
20 |
logging.basicConfig(level=logging.INFO)
|
21 |
logger = logging.getLogger(__name__)
|
22 |
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
if not ORS_API_KEY:
|
28 |
-
raise ValueError("OpenRouteService API key not found. Please set the 'ors' environment variable.")
|
29 |
-
|
30 |
-
client = openrouteservice.Client(key=ORS_API_KEY)
|
31 |
-
|
32 |
-
# Function to create isochrones around Autozone locations
|
33 |
-
def create_isochrone_map():
|
34 |
-
m = folium.Map(location=[35.8601, -86.6602], zoom_start=7)
|
35 |
-
autozone_df = df_md_final1[df_md_final1['business_type'] == 'Autozone']
|
36 |
-
|
37 |
-
for idx, row in autozone_df.iterrows():
|
38 |
-
coords = (row['md_x'], row['md_y'])
|
39 |
-
try:
|
40 |
-
isochrone = client.isochrones(locations=[coords], profile='driving-car', range=[1800])
|
41 |
-
folium.GeoJson(isochrone, name='Isochrones').add_to(m)
|
42 |
-
except openrouteservice.exceptions.HTTPError as e:
|
43 |
-
print(f"HTTPError: {e}")
|
44 |
-
continue
|
45 |
-
except Exception as e:
|
46 |
-
print(f"An error occurred: {e}")
|
47 |
-
continue
|
48 |
-
|
49 |
-
folium.LayerControl().add_to(m)
|
50 |
-
return m._repr_html_()
|
51 |
|
52 |
-
#
|
53 |
population_2020_data = {
|
54 |
'County': ['Shelby', 'Davidson', 'Knox', 'Hamilton', 'Rutherford', 'Williamson', 'Montgomery', 'Sumner', 'Blount', 'Washington',
|
55 |
'Madison', 'Sevier', 'Maury', 'Wilson', 'Bradley'],
|
56 |
'Population_2020': [929744, 715884, 478971, 366207, 341486, 247726, 220069, 196281, 135280, 133001,
|
57 |
98823, 98380, 100974, 147737, 108620]
|
58 |
}
|
59 |
-
|
60 |
-
# Create a DataFrame for the top 15 counties
|
61 |
df_population_2020 = pd.DataFrame(population_2020_data)
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
# Function to create a Folium map with selected geographical boundaries and markers
|
64 |
def create_map(geo_layer="Counties", business_filters=["All"]):
|
65 |
logger.info(f"Creating map with geo_layer: {geo_layer} and business_filters: {business_filters}")
|
@@ -70,8 +99,6 @@ def create_map(geo_layer="Counties", business_filters=["All"]):
|
|
70 |
# Select the appropriate GeoDataFrame based on geo_layer
|
71 |
if geo_layer == "Counties":
|
72 |
geo_data = counties_geo
|
73 |
-
elif geo_layer == "Zip Codes":
|
74 |
-
geo_data = zcta_geo
|
75 |
elif geo_layer == "HSAs":
|
76 |
geo_data = hsa_geo
|
77 |
elif geo_layer == "HRRs":
|
@@ -100,7 +127,8 @@ def create_map(geo_layer="Counties", business_filters=["All"]):
|
|
100 |
for _, row in filtered_df.iterrows():
|
101 |
folium.Marker(
|
102 |
location=[row['md_y'], row['md_x']],
|
103 |
-
popup=f"<b>{row['name']}</b>"
|
|
|
104 |
).add_to(marker_cluster)
|
105 |
|
106 |
folium.LayerControl().add_to(m)
|
@@ -108,56 +136,23 @@ def create_map(geo_layer="Counties", business_filters=["All"]):
|
|
108 |
logger.info("Map creation completed.")
|
109 |
return m._repr_html_()
|
110 |
|
111 |
-
|
112 |
# Function to create the bar plot for 2020 Tennessee population (top 15 counties)
|
113 |
def plot_2020_population_top15():
|
114 |
fig = px.bar(df_population_2020,
|
115 |
x='County',
|
116 |
y='Population_2020',
|
117 |
title='Tennessee Population 2020',
|
118 |
-
labels={'County': 'County', 'Population_2020': ''},
|
119 |
color='Population_2020',
|
120 |
color_continuous_scale='Blues')
|
121 |
|
122 |
fig.update_layout(xaxis={'categoryorder':'total descending'}, template='plotly_white')
|
123 |
return fig
|
124 |
|
125 |
-
# Function to create the population distribution plot
|
126 |
-
def plot_population_distribution():
|
127 |
-
print(cbg_geographic_data.head())
|
128 |
-
county_data = cbg_geographic_data.groupby('cntyname')['pop10'].sum().reset_index().sort_values(by='pop10', ascending=False)
|
129 |
-
|
130 |
-
fig = px.bar(county_data.head(15),
|
131 |
-
x="cntyname",
|
132 |
-
y="pop10",
|
133 |
-
title="2010 Population by County",
|
134 |
-
labels={"cntyname": "County", "pop10": "2010 Population"},
|
135 |
-
color='pop10',
|
136 |
-
color_continuous_scale='Viridis')
|
137 |
-
fig.update_layout(xaxis={'categoryorder':'total descending'}, template='plotly_white')
|
138 |
-
return fig
|
139 |
-
|
140 |
-
# Load datasets
|
141 |
-
df_md_final1 = pd.read_csv("data/location-of-auto-businesses.csv")
|
142 |
-
print(df_md_final1.columns)
|
143 |
-
print(df_md_final1.info)
|
144 |
-
cbg_geographic_data = pd.read_csv("data/cbg_geographic_data.csv")
|
145 |
-
print(cbg_geographic_data.columns)
|
146 |
-
print(cbg_geographic_data.info)
|
147 |
-
|
148 |
-
# Create DataFrames for the 2020 and 2010 populations
|
149 |
-
df_population_2010 = cbg_geographic_data.groupby('cntyname')['pop10'].sum().reset_index().sort_values(by='pop10', ascending=False)
|
150 |
-
df_population_2010.rename(columns={'cntyname': 'County', 'pop10': 'Population_2010'}, inplace=True)
|
151 |
-
|
152 |
-
|
153 |
-
# Merge the 2010 and 2020 population data for side-by-side comparison
|
154 |
-
df_population_comparison = pd.merge(df_population_2010, df_population_2020, on='County')
|
155 |
-
|
156 |
# Function to create a side-by-side bar chart for the 2010 and 2020 Tennessee population by county
|
157 |
def plot_population_comparison():
|
158 |
df_melted = df_population_comparison.melt(id_vars='County', value_vars=['Population_2010', 'Population_2020'],
|
159 |
var_name='Year', value_name='Population')
|
160 |
-
print(df_melted)
|
161 |
fig = px.bar(df_melted,
|
162 |
x='County',
|
163 |
y='Population',
|
@@ -170,155 +165,61 @@ def plot_population_comparison():
|
|
170 |
fig.update_layout(xaxis={'categoryorder': 'total descending'}, template='plotly_white')
|
171 |
return fig
|
172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
-
#
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
print("County Shapefile Columns:", counties_geo.info()) # Debugging line
|
182 |
-
print("County Shapefile Columns:", counties_geo.head()) # Debugging line
|
183 |
-
|
184 |
-
counties_geo = counties_geo[counties_geo['statefp'] == '47'] # Ensure correct column name for state FIPS
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
# Define FIPS codes and abbreviations for TN, KY, IN, and AR
|
189 |
-
state_fips = ['47'] # TN, KY, IN, AR FIPS codes
|
190 |
-
state_abbr = ['TN']
|
191 |
-
|
192 |
-
# Load the HSA shapefile
|
193 |
-
hsa_shapefile_path = "data/hsa/01_hsa-shape-file.shp"
|
194 |
-
if not os.path.exists(hsa_shapefile_path):
|
195 |
-
raise FileNotFoundError(f"HSA shapefile not found at {hsa_shapefile_path}. Please ensure the file exists.")
|
196 |
-
|
197 |
-
|
198 |
-
hsa_geo = gpd.read_file(hsa_shapefile_path)
|
199 |
-
# Update the column name based on your shapefile
|
200 |
-
if 'hsastate' in hsa_geo.columns:
|
201 |
-
hsa_geo = hsa_geo[hsa_geo['hsastate'].isin(state_abbr)]
|
202 |
-
elif 'STATEFP' in hsa_geo.columns:
|
203 |
-
hsa_geo = hsa_geo[hsa_geo['STATEFP'].isin(state_fips)]
|
204 |
-
else:
|
205 |
-
raise KeyError("Column to filter state in HSA shapefile not found.")
|
206 |
-
|
207 |
-
# Load the HRR shapefile
|
208 |
-
hrr_shapefile_path = "data/hrr/01_hrr-shape-file.shp"
|
209 |
-
if not os.path.exists(hrr_shapefile_path):
|
210 |
-
raise FileNotFoundError(f"HRR shapefile not found at {hrr_shapefile_path}. Please ensure the file exists.")
|
211 |
-
|
212 |
-
hrr_geo = gpd.read_file(hrr_shapefile_path)
|
213 |
-
# Update the column name based on your shapefile
|
214 |
-
if 'hrrstate' in hrr_geo.columns:
|
215 |
-
hrr_geo = hrr_geo[hrr_geo['hrrstate'].isin(state_abbr)]
|
216 |
-
elif 'STATEFP' in hrr_geo.columns:
|
217 |
-
hrr_geo = hrr_geo[hrr_geo['STATEFP'].isin(state_fips)]
|
218 |
-
else:
|
219 |
-
raise KeyError("Column to filter state in HRR shapefile not found.")
|
220 |
-
|
221 |
|
222 |
-
#
|
223 |
-
|
224 |
-
np.where(df_md_final1['name'].str.contains("Napa Auto Parts", case=False, na=False), "Napa Auto",
|
225 |
-
np.where(df_md_final1['name'].str.contains("Firestone Complete Auto Care", case=False, na=False), "Firestone",
|
226 |
-
np.where(df_md_final1['name'].str.contains("O'Reilly Auto Parts", case=False, na=False), "O'Reilly Auto",
|
227 |
-
np.where(df_md_final1['name'].str.contains("Advance Auto Parts", case=False, na=False), "Advance Auto",
|
228 |
-
np.where(df_md_final1['name'].str.contains("Toyota|Honda|Kia|Nissan|Chevy|Ford|Carmax|GMC", case=False, na=False),
|
229 |
-
"Car Dealership",
|
230 |
-
"Other Auto Repair Shops")
|
231 |
-
)
|
232 |
-
)
|
233 |
-
)
|
234 |
-
))
|
235 |
|
236 |
-
#
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
df_model = pd.merge(df_population_2020, business_counts, left_on='County', right_on='county', how='left')
|
243 |
-
df_model['business_count'] = df_model['business_count'].fillna(0)
|
244 |
-
|
245 |
-
# Drop redundant columns
|
246 |
-
df_model = df_model.drop(columns=['county']) # Assuming 'county' is the same as 'County'
|
247 |
-
|
248 |
-
# Additional feature engineering can be done here
|
249 |
-
|
250 |
-
return df_model
|
251 |
|
252 |
-
#
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
y = df_model['business_count']
|
259 |
-
|
260 |
-
# Handle any missing values or additional preprocessing if necessary
|
261 |
-
# For this example, we assume data is clean
|
262 |
-
|
263 |
-
# Split the data
|
264 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
265 |
-
|
266 |
-
# Initialize and train the model
|
267 |
-
rf = RandomForestRegressor(n_estimators=100, random_state=42)
|
268 |
-
rf.fit(X_train, y_train)
|
269 |
-
|
270 |
-
# Predictions
|
271 |
-
y_pred = rf.predict(X_test)
|
272 |
-
|
273 |
-
# Metrics
|
274 |
-
mse = mean_squared_error(y_test, y_pred)
|
275 |
-
r2 = r2_score(y_test, y_pred)
|
276 |
-
|
277 |
-
# SHAP explanation
|
278 |
-
explainer = shap.Explainer(rf, X_train)
|
279 |
-
shap_values = explainer(X_test)
|
280 |
-
|
281 |
-
# Feature importance
|
282 |
-
feature_importances = pd.DataFrame({
|
283 |
-
'feature': X.columns,
|
284 |
-
'importance': rf.feature_importances_
|
285 |
-
}).sort_values(by='importance', ascending=False)
|
286 |
-
|
287 |
-
return {
|
288 |
-
'model': rf,
|
289 |
-
'X_test': X_test,
|
290 |
-
'y_test': y_test,
|
291 |
-
'y_pred': y_pred,
|
292 |
-
'mse': mse,
|
293 |
-
'r2': r2,
|
294 |
-
'shap_values': shap_values,
|
295 |
-
'feature_importances': feature_importances
|
296 |
-
}
|
297 |
|
298 |
-
#
|
299 |
-
|
300 |
-
plt.figure()
|
301 |
-
shap.summary_plot(shap_values, X, show=False)
|
302 |
-
buf = io.BytesIO()
|
303 |
-
plt.savefig(buf, format="png", bbox_inches='tight')
|
304 |
-
plt.close()
|
305 |
-
buf.seek(0)
|
306 |
-
img_base64 = base64.b64encode(buf.read()).decode('utf-8')
|
307 |
-
return f"data:image/png;base64,{img_base64}"
|
308 |
|
309 |
-
|
310 |
-
def get_feature_importance_plot(feature_importances):
|
311 |
-
fig = px.bar(feature_importances,
|
312 |
-
x='feature',
|
313 |
-
y='importance',
|
314 |
-
title='Feature Importance from Random Forest',
|
315 |
-
labels={'feature': 'Feature', 'importance': 'Importance'},
|
316 |
-
color='importance',
|
317 |
-
color_continuous_scale='Blues')
|
318 |
-
fig.update_layout(template='plotly_white')
|
319 |
-
img_bytes = fig.to_image(format="png")
|
320 |
-
img_base64 = base64.b64encode(img_bytes).decode('utf-8')
|
321 |
-
return f"data:image/png;base64,{img_base64}"
|
322 |
|
323 |
# Gradio Interface
|
324 |
with gr.Blocks(theme=gr.themes.Default()) as app:
|
@@ -332,47 +233,21 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
332 |
gr.Markdown("### 2020 Population by County")
|
333 |
pop_dist = gr.Plot(plot_2020_population_top15)
|
334 |
|
|
|
|
|
|
|
|
|
335 |
gr.Markdown("### π οΈ Auto Businesses in Tennessee")
|
336 |
manual_table = gr.Dataframe(
|
337 |
-
headers=["
|
338 |
datatype=["str", "str", "str", "str", "str"],
|
339 |
-
value=[
|
340 |
-
|
341 |
-
["Sterling Auto", "2064 Wilma Rudolph Blvd", "Clarksville", "Tennessee", "37040"],
|
342 |
-
["AutoZone", "257 Wears Valley Rd", "Pigeon Forge", "Tennessee", "37863"],
|
343 |
-
["Sterling Auto", "2064 Wilma Rudolph Blvd", "Clarksville", "Tennessee", "37040"],
|
344 |
-
["Advance Auto Parts", "2124 N Highland Ave", "Jackson", "Tennessee", "38305"],
|
345 |
-
["FRIENDSHIP HYUNDAI OF BRISTOL", "1841 Volunteer Pkwy", "Bristol", "Tennessee", "37620"],
|
346 |
-
["Advance Auto Parts", "45 Main St", "Savannah", "Tennessee", "38372"],
|
347 |
-
["O'Reilly Auto Parts", "493 Craighead St", "Nashville", "Tennessee", "37204"],
|
348 |
-
["O'Reilly Auto Parts", "864 Highway 51 N", "Covington", "Tennessee", "38019"],
|
349 |
-
["NAPA Auto Parts", "711 Murfreesboro Pike", "Nashville", "Tennessee", "37210"],
|
350 |
-
["Goodyear Auto Service Centers", "5407 Highway 153", "Hixson", "Tennessee", "37343"],
|
351 |
-
["NAPA Auto Parts", "100 Center St", "Johnson City", "Tennessee", "37615"],
|
352 |
-
["Cadillac,Buick,Chevrolet,GMC", "960 John R Rice Blvd", "Murfreesboro", "Tennessee", "37129"],
|
353 |
-
["AutoZone", "9760 Highway 64", "Lakeland", "Tennessee", "38002"],
|
354 |
-
["Honda", "1408 Highway 45 Byp", "Jackson", "Tennessee", "38305"],
|
355 |
-
["National Tire & Battery (NTB)", "532 Robert Rose Dr", "Murfreesboro", "Tennessee", "37129"],
|
356 |
-
["NAPA Auto Parts", "711 Murfreesboro Pike", "Nashville", "Tennessee", "37210"],
|
357 |
-
["Advance Auto Parts", "160 W Broadway", "Gallatin", "Tennessee", "37066"],
|
358 |
-
["Southern Tire Mart (STM)", "1551 S Wilcox Dr", "Kingsport", "Tennessee", "37660"],
|
359 |
-
["Chevrolet", "310 E 20th St", "Chattanooga", "Tennessee", "37408"],
|
360 |
-
["O'Reilly Auto Parts", "7534 Oak Ridge Hwy", "Knoxville", "Tennessee", "37931"],
|
361 |
-
["Goodyear Auto Service Centers", "971 Eastgate Loop", "Chattanooga", "Tennessee", "37411"],
|
362 |
-
["Firestone Complete Auto Care", "15127 Old Hickory Blvd", "Nashville", "Tennessee", "37211"],
|
363 |
-
["Christian Brothers Automotive", "10406 Kingston Pike", "Knoxville", "Tennessee", "37922"],
|
364 |
-
["Christian Brothers Automotive", "563 E Main St", "Hendersonville", "Tennessee", "37075"],
|
365 |
-
["O'Reilly Auto Parts", "101 Village Square Ln", "Mountain City", "Tennessee", "37683"],
|
366 |
-
["O'Reilly Auto Parts", "4219 Fort Henry Dr Ste A", "Kingsport", "Tennessee", "37663"],
|
367 |
-
["Precision Tune Auto Care", "4710 N Broadway St", "Knoxville", "Tennessee", "37918"],
|
368 |
-
["National Tire & Battery (NTB)", "234 Old Hickory Blvd", "Nashville", "Tennessee", "37221"]
|
369 |
-
], # Data values
|
370 |
-
row_count=27, # Adjusted total number of rows
|
371 |
interactive=False
|
372 |
)
|
373 |
|
374 |
gr.Markdown("### π Interactive Map")
|
375 |
-
map_output_overview = gr.HTML(
|
376 |
|
377 |
with gr.Tab("π Shops in TN Counties"):
|
378 |
with gr.Row():
|
@@ -411,7 +286,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
411 |
business_filter_hsa.change(fn=update_hsa_map, inputs=[business_filter_hsa], outputs=[shops_hsa_map])
|
412 |
reset_button_hsa.click(fn=lambda: (["All"], create_map(geo_layer="HSAs", business_filters=["All"])),
|
413 |
inputs=None, outputs=[business_filter_hsa, shops_hsa_map])
|
414 |
-
|
415 |
with gr.Tab("π Shops in TN HRRs"):
|
416 |
with gr.Row():
|
417 |
with gr.Column(scale=1):
|
@@ -431,20 +306,70 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
431 |
reset_button_hrr.click(fn=lambda: (["All"], create_map(geo_layer="HRRs", business_filters=["All"])),
|
432 |
inputs=None, outputs=[business_filter_hrr, shops_hrr_map])
|
433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
with gr.Tab("π Help"):
|
435 |
gr.Markdown("""
|
436 |
## How to Use This Dashboard
|
437 |
|
438 |
- **Overview Tab:** Provides population statistics and a summary map of all auto businesses in Tennessee.
|
439 |
|
440 |
-
- **Shops in TN Counties/
|
441 |
-
- **Filter by Business Type:** Use the checkboxes to select one or multiple
|
442 |
-
- **Filter by Geographical Area:** Depending on the tab, you can filter businesses based on Counties,
|
443 |
- **Reset Filters:** Click the reset button to clear all selected filters and view all businesses.
|
444 |
- **Interactive Map:** Zoom in/out, click on markers to view business details, and use the search bar to find specific businesses.
|
445 |
-
|
|
|
|
|
|
|
|
|
|
|
446 |
""")
|
447 |
|
448 |
gr.Markdown("### π Source: Yellow Pages")
|
449 |
-
|
450 |
app.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
|
|
5 |
import folium
|
6 |
import numpy as np
|
7 |
import geopandas as gpd
|
8 |
+
from folium.plugins import MarkerCluster, Search
|
9 |
+
from sklearn.neighbors import BallTree
|
10 |
+
from geopy.geocoders import Nominatim
|
11 |
+
from geopy.extra.rate_limiter import RateLimiter
|
12 |
+
import math
|
13 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Logger setup
|
16 |
logging.basicConfig(level=logging.INFO)
|
17 |
logger = logging.getLogger(__name__)
|
18 |
|
19 |
+
# ORS client setup (if needed for additional functionalities)
|
20 |
+
# ORS_API_KEY = os.getenv('ors')
|
21 |
+
# if not ORS_API_KEY:
|
22 |
+
# raise ValueError("OpenRouteService API key not found. Please set the 'ors' environment variable.")
|
23 |
+
# client = openrouteservice.Client(key=ORS_API_KEY)
|
24 |
|
25 |
+
# Load datasets
|
26 |
+
df_md_final1 = pd.read_csv("data/location-of-auto-businesses.csv")
|
27 |
+
cbg_geographic_data = pd.read_csv("data/cbg_geographic_data.csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
# Create DataFrames for the 2020 and 2010 populations
|
30 |
population_2020_data = {
|
31 |
'County': ['Shelby', 'Davidson', 'Knox', 'Hamilton', 'Rutherford', 'Williamson', 'Montgomery', 'Sumner', 'Blount', 'Washington',
|
32 |
'Madison', 'Sevier', 'Maury', 'Wilson', 'Bradley'],
|
33 |
'Population_2020': [929744, 715884, 478971, 366207, 341486, 247726, 220069, 196281, 135280, 133001,
|
34 |
98823, 98380, 100974, 147737, 108620]
|
35 |
}
|
|
|
|
|
36 |
df_population_2020 = pd.DataFrame(population_2020_data)
|
37 |
|
38 |
+
df_population_2010 = cbg_geographic_data.groupby('cntyname')['pop10'].sum().reset_index().sort_values(by='pop10', ascending=False)
|
39 |
+
df_population_2010.rename(columns={'cntyname': 'County', 'pop10': 'Population_2010'}, inplace=True)
|
40 |
+
|
41 |
+
# Merge the 2010 and 2020 population data for side-by-side comparison
|
42 |
+
df_population_comparison = pd.merge(df_population_2010, df_population_2020, on='County')
|
43 |
+
|
44 |
+
# Define Business Types
|
45 |
+
df_md_final1['business_type'] = np.where(df_md_final1['name'].str.contains("Autozone", case=False, na=False), "Autozone",
|
46 |
+
np.where(df_md_final1['name'].str.contains("Napa Auto Parts", case=False, na=False), "Napa Auto",
|
47 |
+
np.where(df_md_final1['name'].str.contains("Firestone Complete Auto Care", case=False, na=False), "Firestone",
|
48 |
+
np.where(df_md_final1['name'].str.contains("O'Reilly Auto Parts", case=False, na=False), "O'Reilly Auto",
|
49 |
+
np.where(df_md_final1['name'].str.contains("Advance Auto Parts", case=False, na=False), "Advance Auto",
|
50 |
+
np.where(df_md_final1['name'].str.contains("Toyota|Honda|Kia|Nissan|Chevy|Ford|Carmax|GMC", case=False, na=False),
|
51 |
+
"Car Dealership",
|
52 |
+
"Other Auto Repair Shops")
|
53 |
+
)
|
54 |
+
)
|
55 |
+
)
|
56 |
+
))
|
57 |
+
|
58 |
+
# Load the County shapefile
|
59 |
+
county_shapefile_path = "data/county/01_county-shape-file.shp"
|
60 |
+
if not os.path.exists(county_shapefile_path):
|
61 |
+
raise FileNotFoundError(f"County shapefile not found at {county_shapefile_path}. Please ensure the file exists.")
|
62 |
+
|
63 |
+
counties_geo = gpd.read_file(county_shapefile_path)
|
64 |
+
counties_geo = counties_geo[counties_geo['statefp'] == '47'] # Tennessee FIPS code
|
65 |
+
|
66 |
+
# Load the HSA shapefile
|
67 |
+
hsa_shapefile_path = "data/hsa/01_hsa-shape-file.shp"
|
68 |
+
if not os.path.exists(hsa_shapefile_path):
|
69 |
+
raise FileNotFoundError(f"HSA shapefile not found at {hsa_shapefile_path}. Please ensure the file exists.")
|
70 |
+
|
71 |
+
hsa_geo = gpd.read_file(hsa_shapefile_path)
|
72 |
+
if 'hsastate' in hsa_geo.columns:
|
73 |
+
hsa_geo = hsa_geo[hsa_geo['hsastate'].isin(['TN'])]
|
74 |
+
elif 'STATEFP' in hsa_geo.columns:
|
75 |
+
hsa_geo = hsa_geo[hsa_geo['STATEFP'].isin(['47'])]
|
76 |
+
else:
|
77 |
+
raise KeyError("Column to filter state in HSA shapefile not found.")
|
78 |
+
|
79 |
+
# Load the HRR shapefile
|
80 |
+
hrr_shapefile_path = "data/hrr/01_hrr-shape-file.shp"
|
81 |
+
if not os.path.exists(hrr_shapefile_path):
|
82 |
+
raise FileNotFoundError(f"HRR shapefile not found at {hrr_shapefile_path}. Please ensure the file exists.")
|
83 |
+
|
84 |
+
hrr_geo = gpd.read_file(hrr_shapefile_path)
|
85 |
+
if 'hrrstate' in hrr_geo.columns:
|
86 |
+
hrr_geo = hrr_geo[hrr_geo['hrrstate'].isin(['TN'])]
|
87 |
+
elif 'STATEFP' in hrr_geo.columns:
|
88 |
+
hrr_geo = hrr_geo[hrr_geo['STATEFP'].isin(['47'])]
|
89 |
+
else:
|
90 |
+
raise KeyError("Column to filter state in HRR shapefile not found.")
|
91 |
+
|
92 |
# Function to create a Folium map with selected geographical boundaries and markers
|
93 |
def create_map(geo_layer="Counties", business_filters=["All"]):
|
94 |
logger.info(f"Creating map with geo_layer: {geo_layer} and business_filters: {business_filters}")
|
|
|
99 |
# Select the appropriate GeoDataFrame based on geo_layer
|
100 |
if geo_layer == "Counties":
|
101 |
geo_data = counties_geo
|
|
|
|
|
102 |
elif geo_layer == "HSAs":
|
103 |
geo_data = hsa_geo
|
104 |
elif geo_layer == "HRRs":
|
|
|
127 |
for _, row in filtered_df.iterrows():
|
128 |
folium.Marker(
|
129 |
location=[row['md_y'], row['md_x']],
|
130 |
+
popup=f"<b>{row['name']}</b><br>{row['address']}, {row['city']}, TN {row['postal_code']}",
|
131 |
+
icon=folium.Icon(color='blue', icon='info-sign')
|
132 |
).add_to(marker_cluster)
|
133 |
|
134 |
folium.LayerControl().add_to(m)
|
|
|
136 |
logger.info("Map creation completed.")
|
137 |
return m._repr_html_()
|
138 |
|
|
|
139 |
# Function to create the bar plot for 2020 Tennessee population (top 15 counties)
|
140 |
def plot_2020_population_top15():
|
141 |
fig = px.bar(df_population_2020,
|
142 |
x='County',
|
143 |
y='Population_2020',
|
144 |
title='Tennessee Population 2020',
|
145 |
+
labels={'County': 'County', 'Population_2020': 'Population'},
|
146 |
color='Population_2020',
|
147 |
color_continuous_scale='Blues')
|
148 |
|
149 |
fig.update_layout(xaxis={'categoryorder':'total descending'}, template='plotly_white')
|
150 |
return fig
|
151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
# Function to create a side-by-side bar chart for the 2010 and 2020 Tennessee population by county
|
153 |
def plot_population_comparison():
|
154 |
df_melted = df_population_comparison.melt(id_vars='County', value_vars=['Population_2010', 'Population_2020'],
|
155 |
var_name='Year', value_name='Population')
|
|
|
156 |
fig = px.bar(df_melted,
|
157 |
x='County',
|
158 |
y='Population',
|
|
|
165 |
fig.update_layout(xaxis={'categoryorder': 'total descending'}, template='plotly_white')
|
166 |
return fig
|
167 |
|
168 |
+
# Nearest Neighbor Search Setup
|
169 |
+
# Prepare the data for nearest neighbor search
|
170 |
+
def prepare_nearest_neighbor():
|
171 |
+
# Convert coordinates to radians for BallTree
|
172 |
+
coords = df_md_final1[['md_y', 'md_x']].to_numpy()
|
173 |
+
radians_coords = np.radians(coords)
|
174 |
+
tree = BallTree(radians_coords, metric='haversine')
|
175 |
+
return tree, radians_coords
|
176 |
+
|
177 |
+
tree, radians_coords = prepare_nearest_neighbor()
|
178 |
+
|
179 |
+
# Geocoder setup
|
180 |
+
geolocator = Nominatim(user_agent="tn_auto_shops_app")
|
181 |
+
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
|
182 |
+
|
183 |
+
# Function to find the nearest shop
|
184 |
+
def find_nearest_shop(address=None, latitude=None, longitude=None):
|
185 |
+
if address:
|
186 |
+
location = geocode(address)
|
187 |
+
if not location:
|
188 |
+
return "Address not found. Please try a different address.", None
|
189 |
+
latitude, longitude = location.latitude, location.longitude
|
190 |
+
elif latitude is not None and longitude is not None:
|
191 |
+
pass # Coordinates are provided directly
|
192 |
+
else:
|
193 |
+
return "Please provide an address or select a location on the map.", None
|
194 |
|
195 |
+
# Convert to radians
|
196 |
+
point = np.radians([latitude, longitude]).reshape(1, -1)
|
197 |
+
dist, idx = tree.query(point, k=1)
|
198 |
+
nearest_idx = idx[0][0]
|
199 |
+
nearest_shop = df_md_final1.iloc[nearest_idx]
|
200 |
+
distance_km = dist[0][0] * 6371 # Earth's radius in kilometers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
# Create a Folium map showing the user location and the nearest shop
|
203 |
+
m = folium.Map(location=[latitude, longitude], zoom_start=12)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
+
# Add user location marker
|
206 |
+
folium.Marker(
|
207 |
+
location=[latitude, longitude],
|
208 |
+
popup="Your Location",
|
209 |
+
icon=folium.Icon(color='red', icon='user')
|
210 |
+
).add_to(m)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
+
# Add nearest shop marker
|
213 |
+
folium.Marker(
|
214 |
+
location=[nearest_shop['md_y'], nearest_shop['md_x']],
|
215 |
+
popup=f"Nearest Shop: {nearest_shop['name']}<br>{nearest_shop['address']}, {nearest_shop['city']}, TN {nearest_shop['postal_code']}<br>Distance: {distance_km:.2f} km",
|
216 |
+
icon=folium.Icon(color='green', icon='shopping-cart')
|
217 |
+
).add_to(m)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
219 |
+
# Draw a line between the user location and the nearest shop
|
220 |
+
folium.PolyLine(locations=[[latitude, longitude], [nearest_shop['md_y'], nearest_shop['md_x']]], color='blue').add_to(m)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
+
return "", m._repr_html_()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
# Gradio Interface
|
225 |
with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
|
233 |
gr.Markdown("### 2020 Population by County")
|
234 |
pop_dist = gr.Plot(plot_2020_population_top15)
|
235 |
|
236 |
+
with gr.Column():
|
237 |
+
gr.Markdown("### 2010 vs 2020 Population Comparison")
|
238 |
+
pop_comp = gr.Plot(plot_population_comparison)
|
239 |
+
|
240 |
gr.Markdown("### π οΈ Auto Businesses in Tennessee")
|
241 |
manual_table = gr.Dataframe(
|
242 |
+
headers=["Name", "Address", "City", "State", "Postal Code"],
|
243 |
datatype=["str", "str", "str", "str", "str"],
|
244 |
+
value=df_md_final1[['name', 'address', 'city', 'state', 'postal_code']].head(10).values.tolist(), # Display first 10 for brevity
|
245 |
+
row_count=10,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
interactive=False
|
247 |
)
|
248 |
|
249 |
gr.Markdown("### π Interactive Map")
|
250 |
+
map_output_overview = gr.HTML(create_map(geo_layer="Counties", business_filters=["All"]))
|
251 |
|
252 |
with gr.Tab("π Shops in TN Counties"):
|
253 |
with gr.Row():
|
|
|
286 |
business_filter_hsa.change(fn=update_hsa_map, inputs=[business_filter_hsa], outputs=[shops_hsa_map])
|
287 |
reset_button_hsa.click(fn=lambda: (["All"], create_map(geo_layer="HSAs", business_filters=["All"])),
|
288 |
inputs=None, outputs=[business_filter_hsa, shops_hsa_map])
|
289 |
+
|
290 |
with gr.Tab("π Shops in TN HRRs"):
|
291 |
with gr.Row():
|
292 |
with gr.Column(scale=1):
|
|
|
306 |
reset_button_hrr.click(fn=lambda: (["All"], create_map(geo_layer="HRRs", business_filters=["All"])),
|
307 |
inputs=None, outputs=[business_filter_hrr, shops_hrr_map])
|
308 |
|
309 |
+
with gr.Tab("π Nearest Shop Finder"):
|
310 |
+
gr.Markdown("## Find the Nearest Auto Shop in Tennessee")
|
311 |
+
with gr.Row():
|
312 |
+
with gr.Column(scale=1):
|
313 |
+
gr.Markdown("### Input Your Location")
|
314 |
+
address_input = gr.Textbox(label="Enter Address", placeholder="e.g., 123 Main St, Nashville, TN")
|
315 |
+
gr.Markdown("**OR**")
|
316 |
+
location_input = gr.Button("Click on Map to Select Location")
|
317 |
+
with gr.Column(scale=4):
|
318 |
+
nearest_map = gr.HTML()
|
319 |
+
|
320 |
+
# Hidden inputs to capture latitude and longitude from map clicks
|
321 |
+
lat_input = gr.Number(label="Latitude", visible=False)
|
322 |
+
lon_input = gr.Number(label="Longitude", visible=False)
|
323 |
+
|
324 |
+
# Function to handle map clicks and update latitude and longitude
|
325 |
+
def map_click_event(location):
|
326 |
+
latitude, longitude = location
|
327 |
+
return latitude, longitude
|
328 |
+
|
329 |
+
# Initialize the map for nearest shop finder
|
330 |
+
def initialize_nearest_map():
|
331 |
+
m = folium.Map(location=[35.8601, -86.6602], zoom_start=7)
|
332 |
+
folium.TileLayer('cartodbpositron').add_to(m)
|
333 |
+
folium.Marker(
|
334 |
+
location=[35.8601, -86.6602],
|
335 |
+
popup="Click on the map to select your location",
|
336 |
+
icon=folium.Icon(color='red', icon='info-sign')
|
337 |
+
).add_to(m)
|
338 |
+
m.add_child(folium.LatLngPopup())
|
339 |
+
return m._repr_html_()
|
340 |
+
|
341 |
+
nearest_map.initial_value = initialize_nearest_map()
|
342 |
+
|
343 |
+
# Function to update the map based on user input
|
344 |
+
def update_nearest_map(address, latitude, longitude):
|
345 |
+
message, updated_map = find_nearest_shop(address, latitude, longitude)
|
346 |
+
return updated_map
|
347 |
+
|
348 |
+
# When the user submits an address
|
349 |
+
address_input.submit(fn=update_nearest_map, inputs=[address_input, lat_input, lon_input], outputs=[nearest_map])
|
350 |
+
|
351 |
+
# Note: Capturing map clicks directly in Gradio is non-trivial. As an alternative, users can enter their address.
|
352 |
+
# For more advanced interactivity, a custom frontend might be required.
|
353 |
+
|
354 |
with gr.Tab("π Help"):
|
355 |
gr.Markdown("""
|
356 |
## How to Use This Dashboard
|
357 |
|
358 |
- **Overview Tab:** Provides population statistics and a summary map of all auto businesses in Tennessee.
|
359 |
|
360 |
+
- **Shops in TN Counties/HSAs/HRRs Tabs:**
|
361 |
+
- **Filter by Business Type:** Use the checkboxes to select one or multiple business types to display on the map.
|
362 |
+
- **Filter by Geographical Area:** Depending on the tab, you can filter businesses based on Counties, HSAs, or HRRs.
|
363 |
- **Reset Filters:** Click the reset button to clear all selected filters and view all businesses.
|
364 |
- **Interactive Map:** Zoom in/out, click on markers to view business details, and use the search bar to find specific businesses.
|
365 |
+
|
366 |
+
- **Nearest Shop Finder Tab:**
|
367 |
+
- **Enter Address:** Type your address in the textbox and press Enter to find the nearest auto shop.
|
368 |
+
- **Select Location on Map:** (Feature under development) Click on the map to select your location and find the nearest shop.
|
369 |
+
- **View Results:** The map will display your location and the nearest auto shop with a line connecting them.
|
370 |
+
|
371 |
""")
|
372 |
|
373 |
gr.Markdown("### π Source: Yellow Pages")
|
374 |
+
|
375 |
app.launch(server_name="0.0.0.0", server_port=7860, share=True)
|