cassiebuhler commited on
Commit
608b34b
·
1 Parent(s): e5c3ed4

changed proj of parquet to match pmtiles

Browse files

when parquet/pmtiles projections don't match, m.fit_bounds() doesn't work and won't show any data layers after it's called.

Files changed (3) hide show
  1. app/app.py +20 -33
  2. app/variables.py +1 -2
  3. preprocess/preprocess.ipynb +8 -5
app/app.py CHANGED
@@ -18,20 +18,10 @@ from functools import reduce
18
  from variables import *
19
  from utils import *
20
 
21
-
22
-
23
- # Create the duckdb connection directly from the sqlalchemy engine instead.
24
- # Not as elegant as `ibis.duckdb.connect()` but shares connection with sqlalchemy.
25
- ## Create the engine
26
- #cwd = pathlib.Path.cwd()
27
- #connect_args = {'preload_extensions':['spatial']}
28
- #eng = sqlalchemy.create_engine(f"duckdb:///{cwd}/duck.db",connect_args = connect_args)
29
- #con = ibis.duckdb.from_connection(eng.raw_connection())
30
-
31
  ## Create the table from remote parquet only if it doesn't already exist on disk
32
-
33
  con = ibis.duckdb.connect("duck.db", extensions=["spatial"])
34
  current_tables = con.list_tables()
 
35
  if "mydata" not in set(current_tables):
36
  tbl = con.read_parquet(ca_parquet)
37
  con.create_table("mydata", tbl)
@@ -45,7 +35,6 @@ for key in [
45
  'svi']:
46
  if key not in st.session_state:
47
  st.session_state[key] = False
48
-
49
 
50
  st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
51
 
@@ -184,12 +173,11 @@ def run_sql(query,color_choice):
184
  output = few_shot_structured_llm.invoke(query)
185
  sql_query = output.sql_query
186
  explanation =output.explanation
187
-
188
  if not sql_query: # if the chatbot can't generate a SQL query.
189
  st.success(explanation)
190
  return pd.DataFrame({'id' : []})
191
 
192
-
193
  result = ca.sql(sql_query).execute()
194
  if result.empty :
195
  explanation = "This query did not return any results. Please try again with a different query."
@@ -204,11 +192,17 @@ def run_sql(query,color_choice):
204
  elif ("id" and "geom" in result.columns):
205
  style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
206
  legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
 
 
 
 
 
207
  m.add_legend(legend_dict=legend_d, position='bottom-left')
208
  m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
209
  m.fit_bounds(result.total_bounds.tolist())
210
  result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
211
  else:
 
212
  st.write(result) # if we aren't mapping, just print out the data
213
 
214
  with st.popover("Explanation"):
@@ -230,10 +224,11 @@ def summary_table_sql(ca, column, colors, ids): # get df for charts + df_tab for
230
  chatbot_toggles = {key: False for key in [
231
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
232
  'fire', 'rxburn', 'disadvantaged_communities',
233
- 'svi'
234
  ]}
235
 
236
 
 
237
  #############
238
 
239
 
@@ -243,7 +238,6 @@ with st.sidebar:
243
 
244
  color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
245
  colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
246
- # alpha = st.slider("transparency", 0.0, 1.0, 0.7)
247
  alpha = 0.8
248
  st.divider()
249
 
@@ -312,7 +306,6 @@ with st.sidebar:
312
 
313
  if show_richness:
314
  m.add_tile_layer(url_sr, name="MOBI Species Richness",opacity=a_bio)
315
-
316
  if show_rsr:
317
  m.add_tile_layer(url_rsr, name="MOBI Range-Size Rarity", opacity=a_bio)
318
 
@@ -329,10 +322,6 @@ with st.sidebar:
329
  m.add_cog_layer(url_man_carbon, palette="purples", name="Manageable Carbon", opacity = a_climate, fit_bounds=False)
330
 
331
 
332
- # # Justice40 Section
333
- # with st.expander("🌱 Climate & Economic Justice"):
334
- # a_justice = st.slider("transparency", 0.0, 1.0, 0.07, key = "social justice")
335
-
336
  # People Section
337
  with st.expander("🏡 People"):
338
  a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
@@ -348,15 +337,15 @@ with st.sidebar:
348
  # Fire Section
349
  with st.expander("🔥 Fire"):
350
  a_fire = st.slider("transparency", 0.0, 1.0, 0.15, key = "calfire")
351
- show_fire_10 = st.toggle("Fires (2013-2023)", key = "fire", value=chatbot_toggles['fire'])
352
 
353
- show_rx_10 = st.toggle("Prescribed Burns (2013-2023)", key = "rxburn", value=chatbot_toggles['rxburn'])
354
 
355
 
356
- if show_fire_10:
357
  m.add_pmtiles(url_calfire, style=fire_style, name="CALFIRE Fire Polygons (2013-2023)", opacity=a_fire, tooltip=False, fit_bounds = False)
358
 
359
- if show_rx_10:
360
  m.add_pmtiles(url_rxburn, style=rx_style, name="CAL FIRE Prescribed Burns (2013-2023)", opacity=a_fire, tooltip=False, fit_bounds = False)
361
 
362
 
@@ -384,7 +373,6 @@ with st.sidebar:
384
  <svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' class='bi bi-github ' style='height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;margin-right:4px;' aria-hidden='true' role='img'><path d='M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z'></path></svg>Source Code: </p> <a href='https://github.com/boettiger-lab/ca-30x30' target='_blank'>https://github.com/boettiger-lab/ca-30x30</a>
385
  """, unsafe_allow_html=True)# adding github logo
386
 
387
-
388
  # Display CA 30x30 Data
389
  if 'out' not in locals():
390
  style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
@@ -395,9 +383,10 @@ if 'out' not in locals():
395
  legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
396
 
397
  m.add_legend(legend_dict = legend_d, position = 'bottom-left')
398
- m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds = True)
399
-
400
 
 
401
  column = select_column[color_choice]
402
 
403
  select_colors = {
@@ -434,7 +423,7 @@ irr_carbon_chart = bar_chart(df, column, 'mean_irrecoverable_carbon', "Irrecover
434
  man_carbon_chart = bar_chart(df, column, 'mean_manageable_carbon', "Manageable Carbon (2018)")
435
  fire_10_chart = bar_chart(df, column, 'mean_fire', "Fires (2013-2023)")
436
  rx_10_chart = bar_chart(df, column, 'mean_rxburn',"Prescribed Burns (2013-2023)")
437
- justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2020)")
438
  svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
439
 
440
 
@@ -474,10 +463,10 @@ with main:
474
  if show_sv:
475
  st.altair_chart(svi_chart, use_container_width=True)
476
 
477
- if show_fire_10:
478
  st.altair_chart(fire_10_chart, use_container_width=True)
479
 
480
- if show_rx_10:
481
  st.altair_chart(rx_10_chart, use_container_width=True)
482
 
483
 
@@ -487,8 +476,6 @@ st.caption("***The label 'established' is inferred from the California Protected
487
 
488
  st.caption("***Under California’s 30x30 framework, only GAP codes 1 and 2 are counted toward the conservation goal.")
489
 
490
-
491
-
492
  st.divider()
493
 
494
  with open('app/footer.md', 'r') as file:
 
18
  from variables import *
19
  from utils import *
20
 
 
 
 
 
 
 
 
 
 
 
21
  ## Create the table from remote parquet only if it doesn't already exist on disk
 
22
  con = ibis.duckdb.connect("duck.db", extensions=["spatial"])
23
  current_tables = con.list_tables()
24
+
25
  if "mydata" not in set(current_tables):
26
  tbl = con.read_parquet(ca_parquet)
27
  con.create_table("mydata", tbl)
 
35
  'svi']:
36
  if key not in st.session_state:
37
  st.session_state[key] = False
 
38
 
39
  st.set_page_config(layout="wide", page_title="CA Protected Areas Explorer", page_icon=":globe:")
40
 
 
173
  output = few_shot_structured_llm.invoke(query)
174
  sql_query = output.sql_query
175
  explanation =output.explanation
176
+
177
  if not sql_query: # if the chatbot can't generate a SQL query.
178
  st.success(explanation)
179
  return pd.DataFrame({'id' : []})
180
 
 
181
  result = ca.sql(sql_query).execute()
182
  if result.empty :
183
  explanation = "This query did not return any results. Please try again with a different query."
 
192
  elif ("id" and "geom" in result.columns):
193
  style = get_pmtiles_style_llm(style_options[color_choice], result["id"].tolist())
194
  legend_d = {cat: color for cat, color in style_options[color_choice]['stops']}
195
+
196
+ # shorten legend for ecoregions
197
+ if color_choice == "Ecoregion":
198
+ legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
199
+
200
  m.add_legend(legend_dict=legend_d, position='bottom-left')
201
  m.add_pmtiles(ca_pmtiles, style=style, opacity=alpha, tooltip=True, fit_bounds=True)
202
  m.fit_bounds(result.total_bounds.tolist())
203
  result = result.drop('geom',axis = 1) #printing to streamlit so I need to drop geom
204
  else:
205
+
206
  st.write(result) # if we aren't mapping, just print out the data
207
 
208
  with st.popover("Explanation"):
 
224
  chatbot_toggles = {key: False for key in [
225
  'richness', 'rsr', 'irrecoverable_carbon', 'manageable_carbon',
226
  'fire', 'rxburn', 'disadvantaged_communities',
227
+ 'svi',
228
  ]}
229
 
230
 
231
+
232
  #############
233
 
234
 
 
238
 
239
  color_choice = st.radio("Group by:", style_options, key = "color", help = "Select a category to change map colors and chart groupings.")
240
  colorby_vals = getColorVals(style_options, color_choice) #get options for selected color_by column
 
241
  alpha = 0.8
242
  st.divider()
243
 
 
306
 
307
  if show_richness:
308
  m.add_tile_layer(url_sr, name="MOBI Species Richness",opacity=a_bio)
 
309
  if show_rsr:
310
  m.add_tile_layer(url_rsr, name="MOBI Range-Size Rarity", opacity=a_bio)
311
 
 
322
  m.add_cog_layer(url_man_carbon, palette="purples", name="Manageable Carbon", opacity = a_climate, fit_bounds=False)
323
 
324
 
 
 
 
 
325
  # People Section
326
  with st.expander("🏡 People"):
327
  a_people = st.slider("transparency", 0.0, 1.0, 0.1, key = "SVI")
 
337
  # Fire Section
338
  with st.expander("🔥 Fire"):
339
  a_fire = st.slider("transparency", 0.0, 1.0, 0.15, key = "calfire")
340
+ show_fire = st.toggle("Fires (2013-2023)", key = "fire", value=chatbot_toggles['fire'])
341
 
342
+ show_rxburn = st.toggle("Prescribed Burns (2013-2023)", key = "rxburn", value=chatbot_toggles['rxburn'])
343
 
344
 
345
+ if show_fire:
346
  m.add_pmtiles(url_calfire, style=fire_style, name="CALFIRE Fire Polygons (2013-2023)", opacity=a_fire, tooltip=False, fit_bounds = False)
347
 
348
+ if show_rxburn:
349
  m.add_pmtiles(url_rxburn, style=rx_style, name="CAL FIRE Prescribed Burns (2013-2023)", opacity=a_fire, tooltip=False, fit_bounds = False)
350
 
351
 
 
373
  <svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' class='bi bi-github ' style='height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;margin-right:4px;' aria-hidden='true' role='img'><path d='M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z'></path></svg>Source Code: </p> <a href='https://github.com/boettiger-lab/ca-30x30' target='_blank'>https://github.com/boettiger-lab/ca-30x30</a>
374
  """, unsafe_allow_html=True)# adding github logo
375
 
 
376
  # Display CA 30x30 Data
377
  if 'out' not in locals():
378
  style = get_pmtiles_style(style_options[color_choice], alpha, filter_cols, filter_vals)
 
383
  legend_d = {key.replace("California", "CA"): value for key, value in legend_d.items()}
384
 
385
  m.add_legend(legend_dict = legend_d, position = 'bottom-left')
386
+ m.add_pmtiles(ca_pmtiles, style=style, name="CA", opacity=alpha, tooltip=True, fit_bounds=True)
387
+
388
 
389
+
390
  column = select_column[color_choice]
391
 
392
  select_colors = {
 
423
  man_carbon_chart = bar_chart(df, column, 'mean_manageable_carbon', "Manageable Carbon (2018)")
424
  fire_10_chart = bar_chart(df, column, 'mean_fire', "Fires (2013-2023)")
425
  rx_10_chart = bar_chart(df, column, 'mean_rxburn',"Prescribed Burns (2013-2023)")
426
+ justice40_chart = bar_chart(df, column, 'mean_disadvantaged', "Disadvantaged Communities (2021)")
427
  svi_chart = bar_chart(df, column, 'mean_svi', "Social Vulnerability Index (2022)")
428
 
429
 
 
463
  if show_sv:
464
  st.altair_chart(svi_chart, use_container_width=True)
465
 
466
+ if show_fire:
467
  st.altair_chart(fire_10_chart, use_container_width=True)
468
 
469
+ if show_rxburn:
470
  st.altair_chart(rx_10_chart, use_container_width=True)
471
 
472
 
 
476
 
477
  st.caption("***Under California’s 30x30 framework, only GAP codes 1 and 2 are counted toward the conservation goal.")
478
 
 
 
479
  st.divider()
480
 
481
  with open('app/footer.md', 'r') as file:
app/variables.py CHANGED
@@ -1,8 +1,7 @@
1
  # urls for main layer
2
- ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/1bc81f4f0678143421f73645f0ba830aa1cb8617/ca-30x30.parquet"
3
  ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/c58913a279d13c414722c4299b0e0867e923946a/ca-30x30.pmtiles"
4
 
5
-
6
  ca_area_acres = 1.014e8 #acres
7
  style_choice = "GAP Status Code"
8
 
 
1
  # urls for main layer
2
+ ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/8d5d938c04d3206e6bfb04055b5e779c4c28222f/ca-30x30.parquet"
3
  ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/c58913a279d13c414722c4299b0e0867e923946a/ca-30x30.pmtiles"
4
 
 
5
  ca_area_acres = 1.014e8 #acres
6
  style_choice = "GAP Status Code"
7
 
preprocess/preprocess.ipynb CHANGED
@@ -10,7 +10,7 @@
10
  },
11
  {
12
  "cell_type": "code",
13
- "execution_count": null,
14
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
15
  "metadata": {
16
  "editable": true,
@@ -452,6 +452,10 @@
452
  " gdf_stats = gdf_stats.reset_index() \n",
453
  "\n",
454
  "gdf_stats = gdf_stats.rename(columns ={'ca_id':'id'}) #reverting back to \"id\" col name, since we are finished with exact_extract() \n",
 
 
 
 
455
  "gdf_stats.to_parquet(ca_parquet) # save results "
456
  ]
457
  },
@@ -476,15 +480,14 @@
476
  "hf_upload('ca-30x30.parquet', ca_parquet)\n",
477
  "s3_cp(ca_parquet, \"s3://public-ca30x30/ca-30x30.parquet\", \"minio\")\n",
478
  "\n",
479
- "#to use PMTiles, need to convert to 4326 and geojson\n",
480
- "ca_4326 = (con\n",
481
  " .read_parquet(ca_parquet)\n",
482
- " .mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")) \n",
483
  " .filter(_.status != 'non-conserved') #omitting the non-conserved to only for pmtiles \n",
484
  " )\n",
485
  "\n",
486
  "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
487
- "ca_4326.execute().set_crs(\"epsg:4326\").to_file(path + 'ca-30x30.geojson') \n",
488
  "pmtiles = to_pmtiles(path+ 'ca-30x30.geojson', ca_pmtiles, options = ['--extend-zooms-if-still-dropping'])\n",
489
  "\n",
490
  "# upload pmtiles to minio and HF\n",
 
10
  },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": 1,
14
  "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
15
  "metadata": {
16
  "editable": true,
 
452
  " gdf_stats = gdf_stats.reset_index() \n",
453
  "\n",
454
  "gdf_stats = gdf_stats.rename(columns ={'ca_id':'id'}) #reverting back to \"id\" col name, since we are finished with exact_extract() \n",
455
+ "\n",
456
+ "\n",
457
+ "# reproject to epsg:4326 since that's what pmtiles requires and we want to match that \n",
458
+ "gdf_stats = gdf_stats.to_crs(\"epsg:4326\")\n",
459
  "gdf_stats.to_parquet(ca_parquet) # save results "
460
  ]
461
  },
 
480
  "hf_upload('ca-30x30.parquet', ca_parquet)\n",
481
  "s3_cp(ca_parquet, \"s3://public-ca30x30/ca-30x30.parquet\", \"minio\")\n",
482
  "\n",
483
+ "#to use PMTiles, need to convert to geojson\n",
484
+ "ca_geojson = (con\n",
485
  " .read_parquet(ca_parquet)\n",
 
486
  " .filter(_.status != 'non-conserved') #omitting the non-conserved to only for pmtiles \n",
487
  " )\n",
488
  "\n",
489
  "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
490
+ "ca_geojson.execute().to_file(path + 'ca-30x30.geojson') \n",
491
  "pmtiles = to_pmtiles(path+ 'ca-30x30.geojson', ca_pmtiles, options = ['--extend-zooms-if-still-dropping'])\n",
492
  "\n",
493
  "# upload pmtiles to minio and HF\n",