cassiebuhler commited on
Commit
10e8f52
·
1 Parent(s): 5ee179c

adding justice40

Browse files
Files changed (1) hide show
  1. get_zonal_stats.ipynb +290 -149
get_zonal_stats.ipynb CHANGED
@@ -36,7 +36,7 @@
36
  },
37
  {
38
  "cell_type": "code",
39
- "execution_count": 19,
40
  "id": "8b5656db-2d1d-4ca8-826d-7588126e52e8",
41
  "metadata": {},
42
  "outputs": [],
@@ -65,7 +65,7 @@
65
  },
66
  {
67
  "cell_type": "code",
68
- "execution_count": 20,
69
  "id": "9a0e3446-16ac-40b0-9e34-db0157038c5a",
70
  "metadata": {},
71
  "outputs": [],
@@ -112,6 +112,7 @@
112
  "metadata": {},
113
  "outputs": [],
114
  "source": [
 
115
  "# aws s3 cp s3://vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif . --endpoint-url=https://data.source.coop\n",
116
  "# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_bii_100m_cog.tif . --endpoint-url=https://data.source.coop\n",
117
  "# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_fii_100m_cog.tif . --endpoint-url=https://data.source.coop\n",
@@ -125,6 +126,14 @@
125
  "# ! aws s3 cp s3://cboettig/social-vulnerability/svi2020_us_tract.parquet . --endpoint-url=https://data.source.coop\n"
126
  ]
127
  },
 
 
 
 
 
 
 
 
128
  {
129
  "cell_type": "code",
130
  "execution_count": 21,
@@ -170,6 +179,67 @@
170
  " col_name = \"rsr\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")"
171
  ]
172
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  {
174
  "cell_type": "code",
175
  "execution_count": 23,
@@ -201,6 +271,14 @@
201
  " col_name = \"deforest_carbon\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
202
  ]
203
  },
 
 
 
 
 
 
 
 
204
  {
205
  "cell_type": "code",
206
  "execution_count": 24,
@@ -292,72 +370,6 @@
292
  "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"crop_reduction\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
293
  ]
294
  },
295
- {
296
- "cell_type": "code",
297
- "execution_count": null,
298
- "id": "19c3e402-8712-450f-b3dd-af9d0c01689c",
299
- "metadata": {},
300
- "outputs": [],
301
- "source": [
302
- "%%time\n",
303
- "tif_file = 'irrecoverable_c_total_2018.tif'\n",
304
- "vec_file = './cpad-stats-temp.parquet'\n",
305
- "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"irrecoverable_carbon\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n",
306
- "\n"
307
- ]
308
- },
309
- {
310
- "cell_type": "code",
311
- "execution_count": null,
312
- "id": "c55c777a-48ce-4403-a171-cfc0d2351df6",
313
- "metadata": {},
314
- "outputs": [],
315
- "source": [
316
- "%%time\n",
317
- "tif_file = 'manageable_c_total_2018.tif'\n",
318
- "vec_file = './cpad-stats-temp.parquet'\n",
319
- "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"manageable_carbon\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
320
- ]
321
- },
322
- {
323
- "cell_type": "code",
324
- "execution_count": null,
325
- "id": "2ce56a66-34e3-4f61-95ae-65d1f06bc468",
326
- "metadata": {},
327
- "outputs": [],
328
- "source": [
329
- "%%time\n",
330
- "tif_file = 'combined_rwr_2022.tif'\n",
331
- "vec_file = './cpad-stats-temp.parquet'\n",
332
- "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"all_species_rwr\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
333
- ]
334
- },
335
- {
336
- "cell_type": "code",
337
- "execution_count": null,
338
- "id": "a3260b3a-d2eb-4cda-afac-679b362d8b71",
339
- "metadata": {},
340
- "outputs": [],
341
- "source": [
342
- "%%time\n",
343
- "tif_file = 'combined_sr_2022.tif'\n",
344
- "vec_file = './cpad-stats-temp.parquet'\n",
345
- "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"all_species_richness\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
346
- ]
347
- },
348
- {
349
- "cell_type": "code",
350
- "execution_count": null,
351
- "id": "d5133f36-404e-4f6a-a90b-eb5f098e6f06",
352
- "metadata": {},
353
- "outputs": [],
354
- "source": [
355
- "%%time\n",
356
- "tif_file = 'combined_sr_2022.tif'\n",
357
- "vec_file = './cpad-stats-temp.parquet'\n",
358
- "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"all_species_richness\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
359
- ]
360
- },
361
  {
362
  "cell_type": "code",
363
  "execution_count": 20,
@@ -385,12 +397,12 @@
385
  "id": "f8e037d4-7a34-42bc-941f-0c09ee80ef3b",
386
  "metadata": {},
387
  "source": [
388
- "# Convert vector to tif "
389
  ]
390
  },
391
  {
392
  "cell_type": "code",
393
- "execution_count": 24,
394
  "id": "c4a19013-65f1-4eef-be2d-0cf1be3d0f7f",
395
  "metadata": {},
396
  "outputs": [],
@@ -443,6 +455,14 @@
443
  " dest.update_tags(1, TIFFTAG_RESOLUTION_UNIT=\"Meter\")\n"
444
  ]
445
  },
 
 
 
 
 
 
 
 
446
  {
447
  "cell_type": "code",
448
  "execution_count": 25,
@@ -614,9 +634,130 @@
614
  "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"housing_transit\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
615
  ]
616
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617
  {
618
  "cell_type": "code",
619
- "execution_count": 3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620
  "id": "2e4de199-82d4-4e2b-8572-6fe19b57d1ee",
621
  "metadata": {},
622
  "outputs": [
@@ -654,9 +795,9 @@
654
  " <th>...</th>\n",
655
  " <th>all_species_rwr</th>\n",
656
  " <th>all_species_richness</th>\n",
657
- " <th>crop_expansion</th>\n",
658
  " <th>human_impact</th>\n",
659
  " <th>svi</th>\n",
 
660
  " <th>svi_socioeconomic_status</th>\n",
661
  " <th>svi_household_char</th>\n",
662
  " <th>svi_racial_ethnic_minority</th>\n",
@@ -680,14 +821,14 @@
680
  " <td>...</td>\n",
681
  " <td>0.355</td>\n",
682
  " <td>346.0</td>\n",
683
- " <td>0</td>\n",
684
  " <td>2339.00</td>\n",
685
  " <td>0.521</td>\n",
 
686
  " <td>0.29</td>\n",
687
  " <td>0.522</td>\n",
688
  " <td>0.428</td>\n",
689
  " <td>0.816</td>\n",
690
- " <td>MULTIPOLYGON Z (((-123.94358 41.95869 0, -123....</td>\n",
691
  " </tr>\n",
692
  " <tr>\n",
693
  " <th>1</th>\n",
@@ -704,14 +845,14 @@
704
  " <td>...</td>\n",
705
  " <td>0.355</td>\n",
706
  " <td>346.0</td>\n",
707
- " <td>0</td>\n",
708
  " <td>870.50</td>\n",
709
  " <td>0.521</td>\n",
 
710
  " <td>0.29</td>\n",
711
  " <td>0.522</td>\n",
712
  " <td>0.428</td>\n",
713
  " <td>0.816</td>\n",
714
- " <td>MULTIPOLYGON Z (((-123.98793 41.94847 0, -123....</td>\n",
715
  " </tr>\n",
716
  " <tr>\n",
717
  " <th>2</th>\n",
@@ -728,14 +869,14 @@
728
  " <td>...</td>\n",
729
  " <td>0.355</td>\n",
730
  " <td>346.0</td>\n",
731
- " <td>0</td>\n",
732
  " <td>429.00</td>\n",
733
  " <td>0.521</td>\n",
 
734
  " <td>0.29</td>\n",
735
  " <td>0.522</td>\n",
736
  " <td>0.429</td>\n",
737
  " <td>0.816</td>\n",
738
- " <td>MULTIPOLYGON Z (((-123.87957 41.97172 0, -123....</td>\n",
739
  " </tr>\n",
740
  " <tr>\n",
741
  " <th>3</th>\n",
@@ -752,14 +893,14 @@
752
  " <td>...</td>\n",
753
  " <td>0.355</td>\n",
754
  " <td>346.0</td>\n",
755
- " <td>0</td>\n",
756
  " <td>3907.00</td>\n",
757
  " <td>0.521</td>\n",
 
758
  " <td>0.29</td>\n",
759
  " <td>0.522</td>\n",
760
  " <td>0.428</td>\n",
761
  " <td>0.816</td>\n",
762
- " <td>MULTIPOLYGON Z (((-123.84466 41.99139 0, -123....</td>\n",
763
  " </tr>\n",
764
  " <tr>\n",
765
  " <th>4</th>\n",
@@ -776,18 +917,18 @@
776
  " <td>...</td>\n",
777
  " <td>0.355</td>\n",
778
  " <td>346.0</td>\n",
779
- " <td>0</td>\n",
780
  " <td>698.25</td>\n",
781
  " <td>0.521</td>\n",
 
782
  " <td>0.29</td>\n",
783
  " <td>0.522</td>\n",
784
  " <td>0.428</td>\n",
785
  " <td>0.816</td>\n",
786
- " <td>MULTIPOLYGON Z (((-123.86194 41.98176 0, -123....</td>\n",
787
  " </tr>\n",
788
  " </tbody>\n",
789
  "</table>\n",
790
- "<p>5 rows × 28 columns</p>\n",
791
  "</div>"
792
  ],
793
  "text/plain": [
@@ -805,19 +946,19 @@
805
  "3 United States Forest Service Federal 0 0.293964 100004 \n",
806
  "4 United States Forest Service Federal 0 0.912564 100005 \n",
807
  "\n",
808
- " type ... all_species_rwr all_species_richness crop_expansion \\\n",
809
- "0 Land ... 0.355 346.0 0 \n",
810
- "1 Land ... 0.355 346.0 0 \n",
811
- "2 Land ... 0.355 346.0 0 \n",
812
- "3 Land ... 0.355 346.0 0 \n",
813
- "4 Land ... 0.355 346.0 0 \n",
814
  "\n",
815
- " human_impact svi svi_socioeconomic_status svi_household_char \\\n",
816
- "0 2339.00 0.521 0.29 0.522 \n",
817
- "1 870.50 0.521 0.29 0.522 \n",
818
- "2 429.00 0.521 0.29 0.522 \n",
819
- "3 3907.00 0.521 0.29 0.522 \n",
820
- "4 698.25 0.521 0.29 0.522 \n",
821
  "\n",
822
  " svi_racial_ethnic_minority svi_housing_transit \\\n",
823
  "0 0.428 0.816 \n",
@@ -827,56 +968,57 @@
827
  "4 0.428 0.816 \n",
828
  "\n",
829
  " geom \n",
830
- "0 MULTIPOLYGON Z (((-123.94358 41.95869 0, -123.... \n",
831
- "1 MULTIPOLYGON Z (((-123.98793 41.94847 0, -123.... \n",
832
- "2 MULTIPOLYGON Z (((-123.87957 41.97172 0, -123.... \n",
833
- "3 MULTIPOLYGON Z (((-123.84466 41.99139 0, -123.... \n",
834
- "4 MULTIPOLYGON Z (((-123.86194 41.98176 0, -123.... \n",
835
  "\n",
836
- "[5 rows x 28 columns]"
837
  ]
838
  },
839
- "execution_count": 3,
840
  "metadata": {},
841
  "output_type": "execute_result"
842
  }
843
  ],
844
  "source": [
845
  "## clean up\n",
846
- "\n",
847
  "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
848
  "ca_geom = con.read_parquet(\"ca2024-30m.parquet\").cast({\"geom\":\"geometry\"}).select(\"id\",\"geom\")\n",
849
  "\n",
850
- "\n",
851
- "\n",
852
  "ca = (con\n",
853
- " .read_parquet(\"cpad-stats-temp.parquet\")\n",
854
- " .mutate(richness = _.richness.round(3))\n",
855
- " .mutate(rsr = _.rsr.round(3))\n",
856
- " .mutate(deforest_carbon = _.deforest_carbon.round(3))\n",
857
- " .mutate(biodiversity_intactness_loss = _.biodiversity_intactness_loss.round(3))\n",
858
- " .mutate(forest_integrity_loss = _.forest_integrity_loss.round(3))\n",
859
- " .cast({\"crop_expansion\": \"int64\"})\n",
860
- " .cast({\"crop_reduction\": \"int64\"})\n",
861
- " .cast({\"manageable_carbon\": \"int64\"})\n",
862
- " .cast({\"irrecoverable_carbon\": \"int64\"})\n",
863
- " .mutate(all_species_rwr = _.all_species_rwr.round(3))\n",
864
- " .mutate(all_species_richness = _.all_species_richness.round(3))\n",
865
- " .mutate(human_impact = _.human_impact.round(3))\n",
866
- " .mutate(svi = _.SVI.round(3))\n",
867
- " .mutate(svi_socioeconomic_status = _.socioeconomic_status.round(3))\n",
868
- " .mutate(svi_household_char = _.household_char.round(3))\n",
869
- " .mutate(svi_racial_ethnic_minority = _.racial_ethnic_minority.round(3))\n",
870
- " .mutate(svi_housing_transit = _.housing_transit.round(3))\n",
871
- " .drop(\"geometry\",\"__index_level_0__\")\n",
872
- " # .rename(geom = \"geometry\")\n",
873
- " # .cast({\"geom\":\"geometry\"})\n",
874
- " # .mutate(geom=_.geom.convert('EPSG:3857', 'EPSG:4326'))\n",
875
- " .join(ca_geom, \"id\", how = \"inner\")\n",
876
- " .drop(\"SVI\", \"socioeconomic_status\",\"household_char\",\"racial_ethnic_minority\",\"housing_transit\" )\n",
 
 
 
 
 
 
877
  "\n",
878
- " )\n",
879
- "# \n",
880
  "ca.head(5).execute()\n"
881
  ]
882
  },
@@ -890,7 +1032,7 @@
890
  },
891
  {
892
  "cell_type": "code",
893
- "execution_count": 4,
894
  "id": "05c791c9-888a-483a-9dbb-a2ba7eb1bce2",
895
  "metadata": {},
896
  "outputs": [
@@ -945,7 +1087,7 @@
945
  },
946
  {
947
  "cell_type": "code",
948
- "execution_count": 5,
949
  "id": "1f2d179d-6d47-4e84-83c6-7cb3d969fc00",
950
  "metadata": {},
951
  "outputs": [
@@ -954,20 +1096,19 @@
954
  "output_type": "stream",
955
  "text": [
956
  "cpad-stats.geojson:6: ignoring dimensions beyond two: in JSON object [-123.94358428532209,41.95869046159588,0]\n",
957
- "cpad-stats.geojson:6: ignoring dimensions beyond two: in JSON object {\"type\":\"Feature\",\"properties\":{\"established\":2024,\"reGAP\":2,\"name\":\"Six Rivers National Forest\",\"access_type\":\"Open Access\",\"manager\":\"United States Forest Service\",\"manager_type\":\"Federal\",\"Easement\":0,\"Acres\":0.19176257,\"id\":100001,\"type\":\"Land\",\"richness\":4,\"rsr\":0.007,\"deforest_carbon\":0,\"biodiversity_intactness_loss\":0,\"forest_integrity_loss\":0,\"crop_reduction\":0,\"irrecoverable_carbon\":4,\"manageable_carbon\":85,\"all_species_rwr\":0.355,\"all_species_richness\":346,\"crop_expansion\":0,\"human_...\n",
958
- "81196 features, 78827308 bytes of geometry and attributes, 2702235 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes\n",
959
- "Choosing a maxzoom of -z10 for features typically 1205 feet (368 meters) apart, and at least 78 feet (24 meters) apart\n",
960
  "Choosing a maxzoom of -z13 for resolution of about 39 feet (11 meters) within features\n",
961
- "tile 4/2/6 size is 801307 with detail 12, >500000 \n",
962
- "tile 5/5/12 size is 1313015 with detail 12, >500000 \n",
963
- "tile 5/5/12 size is 780845 with detail 11, >500000 \n",
964
- "tile 6/11/25 size is 603643 with detail 12, >500000 \n",
965
- "tile 6/10/24 size is 1087317 with detail 12, >500000 \n",
966
- "tile 6/10/24 size is 742154 with detail 11, >500000 \n",
967
- "tile 7/20/49 size is 704536 with detail 12, >500000 \n",
968
- "tile 7/20/49 size is 504066 with detail 11, >500000 \n",
969
- "tile 7/22/51 size is 655744 with detail 12, >500000 \n",
970
- " 99.9% 13/1332/3068 \n",
971
  " 100.0% 13/1278/3041 \r"
972
  ]
973
  },
@@ -981,12 +1122,12 @@
981
  {
982
  "data": {
983
  "application/vnd.jupyter.widget-view+json": {
984
- "model_id": "82d13b05559b4f32a947f75fb2391e6a",
985
  "version_major": 2,
986
  "version_minor": 0
987
  },
988
  "text/plain": [
989
- "cpad-stats.pmtiles: 0%| | 0.00/95.3M [00:00<?, ?B/s]"
990
  ]
991
  },
992
  "metadata": {},
@@ -995,7 +1136,7 @@
995
  {
996
  "data": {
997
  "application/vnd.jupyter.widget-view+json": {
998
- "model_id": "61b3432b09064da4a46762eb514face4",
999
  "version_major": 2,
1000
  "version_minor": 0
1001
  },
 
36
  },
37
  {
38
  "cell_type": "code",
39
+ "execution_count": 61,
40
  "id": "8b5656db-2d1d-4ca8-826d-7588126e52e8",
41
  "metadata": {},
42
  "outputs": [],
 
65
  },
66
  {
67
  "cell_type": "code",
68
+ "execution_count": 62,
69
  "id": "9a0e3446-16ac-40b0-9e34-db0157038c5a",
70
  "metadata": {},
71
  "outputs": [],
 
112
  "metadata": {},
113
  "outputs": [],
114
  "source": [
115
+ "# getting local copies of data \n",
116
  "# aws s3 cp s3://vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif . --endpoint-url=https://data.source.coop\n",
117
  "# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_bii_100m_cog.tif . --endpoint-url=https://data.source.coop\n",
118
  "# aws s3 cp s3://vizzuality/lg-land-carbon-data/natcrop_fii_100m_cog.tif . --endpoint-url=https://data.source.coop\n",
 
126
  "# ! aws s3 cp s3://cboettig/social-vulnerability/svi2020_us_tract.parquet . --endpoint-url=https://data.source.coop\n"
127
  ]
128
  },
129
+ {
130
+ "cell_type": "markdown",
131
+ "id": "531e7f88-1ce1-4027-b0ab-aab597e9a2b2",
132
+ "metadata": {},
133
+ "source": [
134
+ "# Biodiversity Data"
135
+ ]
136
+ },
137
  {
138
  "cell_type": "code",
139
  "execution_count": 21,
 
179
  " col_name = \"rsr\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")"
180
  ]
181
  },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": null,
185
+ "id": "d5133f36-404e-4f6a-a90b-eb5f098e6f06",
186
+ "metadata": {},
187
+ "outputs": [],
188
+ "source": [
189
+ "%%time\n",
190
+ "tif_file = 'combined_sr_2022.tif'\n",
191
+ "vec_file = './cpad-stats-temp.parquet'\n",
192
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"all_species_richness\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "execution_count": null,
198
+ "id": "2ce56a66-34e3-4f61-95ae-65d1f06bc468",
199
+ "metadata": {},
200
+ "outputs": [],
201
+ "source": [
202
+ "%%time\n",
203
+ "tif_file = 'combined_rwr_2022.tif'\n",
204
+ "vec_file = './cpad-stats-temp.parquet'\n",
205
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"all_species_rwr\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
206
+ ]
207
+ },
208
+ {
209
+ "cell_type": "markdown",
210
+ "id": "6c129894-3775-4842-8767-f81a8f626d2c",
211
+ "metadata": {},
212
+ "source": [
213
+ "# Carbon Data"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "code",
218
+ "execution_count": null,
219
+ "id": "19c3e402-8712-450f-b3dd-af9d0c01689c",
220
+ "metadata": {},
221
+ "outputs": [],
222
+ "source": [
223
+ "%%time\n",
224
+ "tif_file = 'irrecoverable_c_total_2018.tif'\n",
225
+ "vec_file = './cpad-stats-temp.parquet'\n",
226
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"irrecoverable_carbon\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n",
227
+ "\n"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "code",
232
+ "execution_count": null,
233
+ "id": "c55c777a-48ce-4403-a171-cfc0d2351df6",
234
+ "metadata": {},
235
+ "outputs": [],
236
+ "source": [
237
+ "%%time\n",
238
+ "tif_file = 'manageable_c_total_2018.tif'\n",
239
+ "vec_file = './cpad-stats-temp.parquet'\n",
240
+ "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"manageable_carbon\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
241
+ ]
242
+ },
243
  {
244
  "cell_type": "code",
245
  "execution_count": 23,
 
271
  " col_name = \"deforest_carbon\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
272
  ]
273
  },
274
+ {
275
+ "cell_type": "markdown",
276
+ "id": "096c00a8-57af-41d7-93cc-85d85414aa4f",
277
+ "metadata": {},
278
+ "source": [
279
+ "# Human Impact Data"
280
+ ]
281
+ },
282
  {
283
  "cell_type": "code",
284
  "execution_count": 24,
 
370
  "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"crop_reduction\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
371
  ]
372
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  {
374
  "cell_type": "code",
375
  "execution_count": 20,
 
397
  "id": "f8e037d4-7a34-42bc-941f-0c09ee80ef3b",
398
  "metadata": {},
399
  "source": [
400
+ "# Need to convert SVI & Justice40 files to tif"
401
  ]
402
  },
403
  {
404
  "cell_type": "code",
405
+ "execution_count": 60,
406
  "id": "c4a19013-65f1-4eef-be2d-0cf1be3d0f7f",
407
  "metadata": {},
408
  "outputs": [],
 
455
  " dest.update_tags(1, TIFFTAG_RESOLUTION_UNIT=\"Meter\")\n"
456
  ]
457
  },
458
+ {
459
+ "cell_type": "markdown",
460
+ "id": "f4925a74-5ed2-49a4-845b-6a0f0398a43e",
461
+ "metadata": {},
462
+ "source": [
463
+ "# SVI"
464
+ ]
465
+ },
466
  {
467
  "cell_type": "code",
468
  "execution_count": 25,
 
634
  "df = big_zonal_stats(vec_file, tif_file, stats = ['mean'], col_name = \"housing_transit\", n_jobs=threads, verbose=0).to_parquet(\"cpad-stats-temp.parquet\")\n"
635
  ]
636
  },
637
+ {
638
+ "cell_type": "markdown",
639
+ "id": "ff4b6604-9828-4882-90bd-554c21f5c6e6",
640
+ "metadata": {},
641
+ "source": [
642
+ "# Justice40 "
643
+ ]
644
+ },
645
+ {
646
+ "cell_type": "code",
647
+ "execution_count": 56,
648
+ "id": "3678a91f-72f7-4339-a409-a97776cba043",
649
+ "metadata": {},
650
+ "outputs": [
651
+ {
652
+ "name": "stdout",
653
+ "output_type": "stream",
654
+ "text": [
655
+ "EPSG:4326\n",
656
+ "Unique values in raster: [0. 1.]\n"
657
+ ]
658
+ }
659
+ ],
660
+ "source": [
661
+ "#clean up\n",
662
+ "justice40 = (con\n",
663
+ " .read_parquet(\"disadvantaged-communities.parquet\")\n",
664
+ " .rename(geometry = \"SHAPE\",justice40=\"Disadvan\")\n",
665
+ " .filter(_.StateName == \"California\")\n",
666
+ " .mutate(geometry = _.geometry.convert(\"ESRI:102039\",\"EPSG:4326\"))\n",
667
+ " .select(\"justice40\",\"geometry\")\n",
668
+ " )\n",
669
+ "gdf = justice40.execute()\n",
670
+ "get_geotiff(gdf,\"ca_justice40.tif\",\"justice40\")\n",
671
+ "\n"
672
+ ]
673
+ },
674
+ {
675
+ "cell_type": "code",
676
+ "execution_count": 73,
677
+ "id": "8faa425f-6f9c-4189-a53a-24dd0250c539",
678
+ "metadata": {},
679
+ "outputs": [],
680
+ "source": [
681
+ "#justice40 is binary data so we want to get the percentage of polygon where justice40 = 1. \n",
682
+ "\n",
683
+ "def big_zonal_stats_binary(vec_file, tif_file, col_name, n_jobs, verbose=10, timeout=10000):\n",
684
+ " gdf = gpd.read_parquet(vec_file)\n",
685
+ " if gdf.crs is None:\n",
686
+ " gdf = gdf.set_crs(\"EPSG:4326\")\n",
687
+ " gdf = gdf.rename(columns={\"geom\": \"geometry\"})\n",
688
+ " gdf = gdf.set_geometry(\"geometry\")\n",
689
+ " gdf = gdf[gdf[\"geometry\"].notna()].copy()\n",
690
+ "\n",
691
+ " with rasterio.open(tif_file) as src:\n",
692
+ " raster_crs = src.crs\n",
693
+ " gdf = gdf.to_crs(raster_crs) \n",
694
+ " \n",
695
+ " california_polygon = box(*gdf.total_bounds)\n",
696
+ " \n",
697
+ " out_image, out_transform = mask(src, [california_polygon], crop=True, nodata=src.nodata)\n",
698
+ "\n",
699
+ " if out_image.ndim == 3:\n",
700
+ " out_image = out_image[0]\n",
701
+ "\n",
702
+ " def get_stats(geom_slice):\n",
703
+ " geom = [geom_slice.geometry]\n",
704
+ " masked_image, _ = mask(src, geom, crop=True, all_touched=True, nodata=src.nodata)\n",
705
+ " \n",
706
+ " # If the masked area is empty, return None\n",
707
+ " if masked_image.size == 0:\n",
708
+ " return {'percentage_1': None}\n",
709
+ " \n",
710
+ " # Count 1s and calculate percentage\n",
711
+ " count_1 = (masked_image == 1).sum()\n",
712
+ " total_count = (masked_image != src.nodata).sum()\n",
713
+ " \n",
714
+ " # Calculate percentage of justice40 = 1 within the polygon\n",
715
+ " percentage_1 = (count_1 / total_count) * 100 if total_count > 0 else None\n",
716
+ " \n",
717
+ " return {'percentage_1': percentage_1}\n",
718
+ "\n",
719
+ " output = [get_stats(row) for row in gdf.itertuples()]\n",
720
+ " \n",
721
+ " gdf[col_name] = [res['percentage_1'] for res in output]\n",
722
+ " return gdf\n",
723
+ "\n",
724
+ "# Run the function\n"
725
+ ]
726
+ },
727
  {
728
  "cell_type": "code",
729
+ "execution_count": 74,
730
+ "id": "f5f7297a-e31b-4d3c-ae1c-7e3d78bc141a",
731
+ "metadata": {},
732
+ "outputs": [
733
+ {
734
+ "name": "stdout",
735
+ "output_type": "stream",
736
+ "text": [
737
+ "CPU times: user 2min 2s, sys: 29.1 s, total: 2min 31s\n",
738
+ "Wall time: 2min 32s\n"
739
+ ]
740
+ }
741
+ ],
742
+ "source": [
743
+ "%%time\n",
744
+ "tif_file = 'ca_justice40.tif'\n",
745
+ "vec_file = './cpad-stats-temp.parquet'\n",
746
+ "df = big_zonal_stats_binary(vec_file, tif_file, col_name=\"justice40\", n_jobs=threads, verbose=0)\n",
747
+ "df.to_parquet(\"cpad-stats-temp.parquet\")\n"
748
+ ]
749
+ },
750
+ {
751
+ "cell_type": "markdown",
752
+ "id": "e3083b85-1322-4188-ac08-e73c2570978c",
753
+ "metadata": {},
754
+ "source": [
755
+ "# Cleaning up"
756
+ ]
757
+ },
758
+ {
759
+ "cell_type": "code",
760
+ "execution_count": 130,
761
  "id": "2e4de199-82d4-4e2b-8572-6fe19b57d1ee",
762
  "metadata": {},
763
  "outputs": [
 
795
  " <th>...</th>\n",
796
  " <th>all_species_rwr</th>\n",
797
  " <th>all_species_richness</th>\n",
 
798
  " <th>human_impact</th>\n",
799
  " <th>svi</th>\n",
800
+ " <th>justice40</th>\n",
801
  " <th>svi_socioeconomic_status</th>\n",
802
  " <th>svi_household_char</th>\n",
803
  " <th>svi_racial_ethnic_minority</th>\n",
 
821
  " <td>...</td>\n",
822
  " <td>0.355</td>\n",
823
  " <td>346.0</td>\n",
 
824
  " <td>2339.00</td>\n",
825
  " <td>0.521</td>\n",
826
+ " <td>0.0</td>\n",
827
  " <td>0.29</td>\n",
828
  " <td>0.522</td>\n",
829
  " <td>0.428</td>\n",
830
  " <td>0.816</td>\n",
831
+ " <td>MULTIPOLYGON Z (((-123.94358 41.95869 0.00000,...</td>\n",
832
  " </tr>\n",
833
  " <tr>\n",
834
  " <th>1</th>\n",
 
845
  " <td>...</td>\n",
846
  " <td>0.355</td>\n",
847
  " <td>346.0</td>\n",
 
848
  " <td>870.50</td>\n",
849
  " <td>0.521</td>\n",
850
+ " <td>0.0</td>\n",
851
  " <td>0.29</td>\n",
852
  " <td>0.522</td>\n",
853
  " <td>0.428</td>\n",
854
  " <td>0.816</td>\n",
855
+ " <td>MULTIPOLYGON Z (((-123.98793 41.94847 0.00000,...</td>\n",
856
  " </tr>\n",
857
  " <tr>\n",
858
  " <th>2</th>\n",
 
869
  " <td>...</td>\n",
870
  " <td>0.355</td>\n",
871
  " <td>346.0</td>\n",
 
872
  " <td>429.00</td>\n",
873
  " <td>0.521</td>\n",
874
+ " <td>0.0</td>\n",
875
  " <td>0.29</td>\n",
876
  " <td>0.522</td>\n",
877
  " <td>0.429</td>\n",
878
  " <td>0.816</td>\n",
879
+ " <td>MULTIPOLYGON Z (((-123.87957 41.97172 0.00000,...</td>\n",
880
  " </tr>\n",
881
  " <tr>\n",
882
  " <th>3</th>\n",
 
893
  " <td>...</td>\n",
894
  " <td>0.355</td>\n",
895
  " <td>346.0</td>\n",
 
896
  " <td>3907.00</td>\n",
897
  " <td>0.521</td>\n",
898
+ " <td>0.0</td>\n",
899
  " <td>0.29</td>\n",
900
  " <td>0.522</td>\n",
901
  " <td>0.428</td>\n",
902
  " <td>0.816</td>\n",
903
+ " <td>MULTIPOLYGON Z (((-123.84466 41.99139 0.00000,...</td>\n",
904
  " </tr>\n",
905
  " <tr>\n",
906
  " <th>4</th>\n",
 
917
  " <td>...</td>\n",
918
  " <td>0.355</td>\n",
919
  " <td>346.0</td>\n",
 
920
  " <td>698.25</td>\n",
921
  " <td>0.521</td>\n",
922
+ " <td>0.0</td>\n",
923
  " <td>0.29</td>\n",
924
  " <td>0.522</td>\n",
925
  " <td>0.428</td>\n",
926
  " <td>0.816</td>\n",
927
+ " <td>MULTIPOLYGON Z (((-123.86194 41.98176 0.00000,...</td>\n",
928
  " </tr>\n",
929
  " </tbody>\n",
930
  "</table>\n",
931
+ "<p>5 rows × 25 columns</p>\n",
932
  "</div>"
933
  ],
934
  "text/plain": [
 
946
  "3 United States Forest Service Federal 0 0.293964 100004 \n",
947
  "4 United States Forest Service Federal 0 0.912564 100005 \n",
948
  "\n",
949
+ " type ... all_species_rwr all_species_richness human_impact svi \\\n",
950
+ "0 Land ... 0.355 346.0 2339.00 0.521 \n",
951
+ "1 Land ... 0.355 346.0 870.50 0.521 \n",
952
+ "2 Land ... 0.355 346.0 429.00 0.521 \n",
953
+ "3 Land ... 0.355 346.0 3907.00 0.521 \n",
954
+ "4 Land ... 0.355 346.0 698.25 0.521 \n",
955
  "\n",
956
+ " justice40 svi_socioeconomic_status svi_household_char \\\n",
957
+ "0 0.0 0.29 0.522 \n",
958
+ "1 0.0 0.29 0.522 \n",
959
+ "2 0.0 0.29 0.522 \n",
960
+ "3 0.0 0.29 0.522 \n",
961
+ "4 0.0 0.29 0.522 \n",
962
  "\n",
963
  " svi_racial_ethnic_minority svi_housing_transit \\\n",
964
  "0 0.428 0.816 \n",
 
968
  "4 0.428 0.816 \n",
969
  "\n",
970
  " geom \n",
971
+ "0 MULTIPOLYGON Z (((-123.94358 41.95869 0.00000,... \n",
972
+ "1 MULTIPOLYGON Z (((-123.98793 41.94847 0.00000,... \n",
973
+ "2 MULTIPOLYGON Z (((-123.87957 41.97172 0.00000,... \n",
974
+ "3 MULTIPOLYGON Z (((-123.84466 41.99139 0.00000,... \n",
975
+ "4 MULTIPOLYGON Z (((-123.86194 41.98176 0.00000,... \n",
976
  "\n",
977
+ "[5 rows x 25 columns]"
978
  ]
979
  },
980
+ "execution_count": 130,
981
  "metadata": {},
982
  "output_type": "execute_result"
983
  }
984
  ],
985
  "source": [
986
  "## clean up\n",
 
987
  "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
988
  "ca_geom = con.read_parquet(\"ca2024-30m.parquet\").cast({\"geom\":\"geometry\"}).select(\"id\",\"geom\")\n",
989
  "\n",
 
 
990
  "ca = (con\n",
991
+ " .read_parquet(\"cpad-stats-temp.parquet\")\n",
992
+ " .cast({\n",
993
+ " \"crop_expansion\": \"int64\",\n",
994
+ " \"crop_reduction\": \"int64\",\n",
995
+ " \"manageable_carbon\": \"int64\",\n",
996
+ " \"irrecoverable_carbon\": \"int64\"\n",
997
+ " })\n",
998
+ " .rename(svi = \"SVI\")\n",
999
+ " .mutate(\n",
1000
+ " richness=_.richness.round(3),\n",
1001
+ " rsr=_.rsr.round(3),\n",
1002
+ " all_species_rwr=_.all_species_rwr.round(3),\n",
1003
+ " all_species_richness=_.all_species_richness.round(3),\n",
1004
+ " svi=_.svi.round(3),\n",
1005
+ " justice40=(_.justice40/100).round(3),\n",
1006
+ " svi_socioeconomic_status=_.socioeconomic_status.round(3),\n",
1007
+ " svi_household_char=_.household_char.round(3),\n",
1008
+ " svi_racial_ethnic_minority=_.racial_ethnic_minority.round(3),\n",
1009
+ " svi_housing_transit=_.housing_transit.round(3),\n",
1010
+ " human_impact=_.human_impact.round(3),\n",
1011
+ " deforest_carbon=_.deforest_carbon.round(3),\n",
1012
+ " biodiversity_intactness_loss=_.biodiversity_intactness_loss.round(3),\n",
1013
+ " forest_integrity_loss=_.forest_integrity_loss.round(3),\n",
1014
+ " )\n",
1015
+ " .drop(\"geometry\", \"__index_level_0__\",\"socioeconomic_status\", \"household_char\", \"racial_ethnic_minority\", \"housing_transit\", \n",
1016
+ " \"biodiversity_intactness_loss\",\"forest_integrity_loss\",\"crop_reduction\",\"crop_expansion\"\n",
1017
+ " ) \n",
1018
+ " .join(ca_geom, \"id\", how=\"inner\")\n",
1019
+ " )\n",
1020
+ "\n",
1021
  "\n",
 
 
1022
  "ca.head(5).execute()\n"
1023
  ]
1024
  },
 
1032
  },
1033
  {
1034
  "cell_type": "code",
1035
+ "execution_count": 131,
1036
  "id": "05c791c9-888a-483a-9dbb-a2ba7eb1bce2",
1037
  "metadata": {},
1038
  "outputs": [
 
1087
  },
1088
  {
1089
  "cell_type": "code",
1090
+ "execution_count": 132,
1091
  "id": "1f2d179d-6d47-4e84-83c6-7cb3d969fc00",
1092
  "metadata": {},
1093
  "outputs": [
 
1096
  "output_type": "stream",
1097
  "text": [
1098
  "cpad-stats.geojson:6: ignoring dimensions beyond two: in JSON object [-123.94358428532209,41.95869046159588,0]\n",
1099
+ "cpad-stats.geojson:6: ignoring dimensions beyond two: in JSON object {\"type\":\"Feature\",\"properties\":{\"established\":2024,\"reGAP\":2,\"name\":\"Six Rivers National Forest\",\"access_type\":\"Open Access\",\"manager\":\"United States Forest Service\",\"manager_type\":\"Federal\",\"Easement\":0,\"Acres\":0.19176256656646729,\"id\":100001,\"type\":\"Land\",\"richness\":4,\"rsr\":0.007,\"deforest_carbon\":0,\"irrecoverable_carbon\":4,\"manageable_carbon\":85,\"all_species_rwr\":0.355,\"all_species_richness\":346,\"human_impact\":2339,\"svi\":0.521,\"justice40\":0,\"svi_socioeconomic_status\":0.29,\"svi_household_ch...\n",
1100
+ "81196 features, 77613888 bytes of geometry, 3021623 bytes of string pool\n",
1101
+ "Choosing a maxzoom of -z10 for features typically 1202 feet (367 meters) apart, and at least 77 feet (24 meters) apart\n",
1102
  "Choosing a maxzoom of -z13 for resolution of about 39 feet (11 meters) within features\n",
1103
+ "tile 4/2/6 size is 739078 with detail 12, >500000 \n",
1104
+ "tile 5/5/12 size is 1225784 with detail 12, >500000 \n",
1105
+ "tile 5/5/12 size is 720473 with detail 11, >500000 \n",
1106
+ "tile 6/11/25 size is 559822 with detail 12, >500000 \n",
1107
+ "tile 6/10/24 size is 1012378 with detail 12, >500000 \n",
1108
+ "tile 6/10/24 size is 686077 with detail 11, >500000 \n",
1109
+ "tile 7/20/49 size is 642314 with detail 12, >500000 \n",
1110
+ "tile 7/22/51 size is 600090 with detail 12, >500000 \n",
1111
+ " 99.9% 13/1335/3225 \n",
 
1112
  " 100.0% 13/1278/3041 \r"
1113
  ]
1114
  },
 
1122
  {
1123
  "data": {
1124
  "application/vnd.jupyter.widget-view+json": {
1125
+ "model_id": "0cb245402c8d42e4ab020ace580d13eb",
1126
  "version_major": 2,
1127
  "version_minor": 0
1128
  },
1129
  "text/plain": [
1130
+ "cpad-stats.pmtiles: 0%| | 0.00/90.4M [00:00<?, ?B/s]"
1131
  ]
1132
  },
1133
  "metadata": {},
 
1136
  {
1137
  "data": {
1138
  "application/vnd.jupyter.widget-view+json": {
1139
+ "model_id": "78f91869800d474ebd77500c15db063f",
1140
  "version_major": 2,
1141
  "version_minor": 0
1142
  },