cassiebuhler commited on
Commit
ae1519e
·
1 Parent(s): 430f057

fixed justice40 stats

Browse files
Files changed (1) hide show
  1. get_zonal_stats.ipynb +67 -65
get_zonal_stats.ipynb CHANGED
@@ -516,9 +516,7 @@
516
  " .mutate(geometry = _.geometry.convert(\"ESRI:102039\",\"EPSG:4326\"))\n",
517
  " .select(\"justice40\",\"geometry\")\n",
518
  " )\n",
519
- "gdf = justice40.execute()\n",
520
- "get_geotiff(gdf,\"ca_justice40.tif\",\"justice40\")\n",
521
- "\n"
522
  ]
523
  },
524
  {
@@ -528,63 +526,67 @@
528
  "metadata": {},
529
  "outputs": [],
530
  "source": [
531
- "#justice40 is binary data so we want to get the percentage of polygon where justice40 = 1. \n",
532
  "\n",
533
- "def big_zonal_stats_binary(vec_file, tif_file, col_name, n_jobs, verbose=10, timeout=10000):\n",
 
534
  " gdf = gpd.read_parquet(vec_file)\n",
 
 
 
535
  " if gdf.crs is None:\n",
536
  " gdf = gdf.set_crs(\"EPSG:4326\")\n",
537
- " gdf = gdf.rename(columns={\"geom\": \"geometry\"})\n",
538
- " gdf = gdf.set_geometry(\"geometry\")\n",
539
- " gdf = gdf[gdf[\"geometry\"].notna()].copy()\n",
540
- "\n",
541
- " with rasterio.open(tif_file) as src:\n",
542
- " raster_crs = src.crs\n",
543
- " gdf = gdf.to_crs(raster_crs) \n",
544
- " \n",
545
- " california_polygon = box(*gdf.total_bounds)\n",
 
 
 
 
 
 
 
 
 
 
546
  " \n",
547
- " out_image, out_transform = mask(src, [california_polygon], crop=True, nodata=src.nodata)\n",
548
- "\n",
549
- " if out_image.ndim == 3:\n",
550
- " out_image = out_image[0]\n",
551
- "\n",
552
- " def get_stats(geom_slice):\n",
553
- " geom = [geom_slice.geometry]\n",
554
- " masked_image, _ = mask(src, geom, crop=True, all_touched=True, nodata=src.nodata)\n",
555
- " \n",
556
- " # If the masked area is empty, return None\n",
557
- " if masked_image.size == 0:\n",
558
- " return {'percentage_1': None}\n",
559
- " \n",
560
- " # Count 1s and calculate percentage\n",
561
- " count_1 = (masked_image == 1).sum()\n",
562
- " total_count = (masked_image != src.nodata).sum()\n",
563
- " \n",
564
- " # Calculate percentage of justice40 = 1 within the polygon\n",
565
- " percentage_1 = (count_1 / total_count) * 100 if total_count > 0 else None\n",
566
  " \n",
567
- " return {'percentage_1': percentage_1}\n",
568
- "\n",
569
- " output = [get_stats(row) for row in gdf.itertuples()]\n",
 
 
 
 
570
  " \n",
571
- " gdf[col_name] = [res['percentage_1'] for res in output]\n",
 
572
  " return gdf\n",
573
  "\n",
574
- "# Run the function\n"
575
  ]
576
  },
577
  {
578
  "cell_type": "code",
579
  "execution_count": null,
580
- "id": "f5f7297a-e31b-4d3c-ae1c-7e3d78bc141a",
581
  "metadata": {},
582
  "outputs": [],
583
  "source": [
584
  "%%time\n",
585
- "tif_file = 'ca_justice40.tif'\n",
586
  "vec_file = './cpad-stats-temp.parquet'\n",
587
- "df = big_zonal_stats_binary(vec_file, tif_file, col_name=\"justice40\", n_jobs=threads, verbose=0)\n",
 
588
  "df.to_parquet(\"cpad-stats-temp.parquet\")\n"
589
  ]
590
  },
@@ -610,34 +612,34 @@
610
  "ca = (con\n",
611
  " .read_parquet(\"cpad-stats-temp.parquet\")\n",
612
  " .cast({\n",
613
- " \"crop_expansion\": \"int64\",\n",
614
- " \"crop_reduction\": \"int64\",\n",
615
- " \"manageable_carbon\": \"int64\",\n",
616
- " \"irrecoverable_carbon\": \"int64\"\n",
617
- " })\n",
618
  " .rename(svi = \"SVI\")\n",
619
  " .mutate(\n",
620
  " richness=_.richness.round(3),\n",
621
- " rsr=_.rsr.round(3),\n",
622
- " all_species_rwr=_.all_species_rwr.round(3),\n",
623
- " all_species_richness=_.all_species_richness.round(3),\n",
624
- " svi=_.svi.round(3),\n",
625
- " justice40=(_.justice40/100).round(3),\n",
626
- " svi_socioeconomic_status=_.socioeconomic_status.round(3),\n",
627
- " svi_household_char=_.household_char.round(3),\n",
628
- " svi_racial_ethnic_minority=_.racial_ethnic_minority.round(3),\n",
629
- " svi_housing_transit=_.housing_transit.round(3),\n",
630
- " human_impact=_.human_impact.round(3),\n",
631
- " deforest_carbon=_.deforest_carbon.round(3),\n",
632
- " biodiversity_intactness_loss=_.biodiversity_intactness_loss.round(3),\n",
633
- " forest_integrity_loss=_.forest_integrity_loss.round(3),\n",
 
 
 
 
 
 
634
  " )\n",
635
- " .drop(\"geometry\", \"__index_level_0__\",\"socioeconomic_status\", \"household_char\", \"racial_ethnic_minority\", \"housing_transit\", \n",
636
- " \"biodiversity_intactness_loss\",\"forest_integrity_loss\",\"crop_reduction\",\"crop_expansion\"\n",
637
- " ) \n",
638
- " .join(ca_geom, \"id\", how=\"inner\")\n",
639
- " )\n",
640
- "\n",
641
  "\n",
642
  "ca.head(5).execute()\n"
643
  ]
 
516
  " .mutate(geometry = _.geometry.convert(\"ESRI:102039\",\"EPSG:4326\"))\n",
517
  " .select(\"justice40\",\"geometry\")\n",
518
  " )\n",
519
+ "justice40.execute().to_parquet(\"ca_justice40.parquet\")"
 
 
520
  ]
521
  },
522
  {
 
526
  "metadata": {},
527
  "outputs": [],
528
  "source": [
529
+ "# #justice40 is either 0 or 1, so we want to get the percentage of polygon where justice40 = 1. \n",
530
  "\n",
531
+ "def big_zonal_stats_binary(vec_file, justice40_file, col_name,projected_crs=\"EPSG:3310\"):\n",
532
+ " # Read both vector files as GeoDataFrames\n",
533
  " gdf = gpd.read_parquet(vec_file)\n",
534
+ " justice40_gdf = gpd.read_parquet(justice40_file)\n",
535
+ " \n",
536
+ " # Set CRS if not already set (assuming both should be in EPSG:4326, modify if needed)\n",
537
  " if gdf.crs is None:\n",
538
  " gdf = gdf.set_crs(\"EPSG:4326\")\n",
539
+ " if justice40_gdf.crs is None:\n",
540
+ " justice40_gdf = justice40_gdf.set_crs(\"EPSG:4326\")\n",
541
+ " # Ensure both GeoDataFrames are in the same CRS and reproject to a projected CRS for area calculations\n",
542
+ " gdf = gdf.to_crs(projected_crs)\n",
543
+ " justice40_gdf = justice40_gdf.to_crs(projected_crs)\n",
544
+ " \n",
545
+ " # Ensure both GeoDataFrames are in the same CRS\n",
546
+ " gdf = gdf.to_crs(justice40_gdf.crs)\n",
547
+ " \n",
548
+ " # Filter justice40 polygons where justice40 == 1\n",
549
+ " justice40_gdf = justice40_gdf[justice40_gdf['justice40'] == 1].copy()\n",
550
+ " \n",
551
+ " # Prepare a list to hold percentage of justice40 == 1 for each polygon\n",
552
+ " percentages = []\n",
553
+ " \n",
554
+ " # Iterate over each polygon in the main GeoDataFrame\n",
555
+ " for geom in gdf.geometry:\n",
556
+ " # Find intersecting justice40 polygons\n",
557
+ " justice40_intersections = justice40_gdf[justice40_gdf.intersects(geom)].copy()\n",
558
  " \n",
559
+ " # Calculate the intersection area\n",
560
+ " if not justice40_intersections.empty:\n",
561
+ " justice40_intersections['intersection'] = justice40_intersections.intersection(geom)\n",
562
+ " total_intersection_area = justice40_intersections['intersection'].area.sum()\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  " \n",
564
+ " # Calculate percentage based on original polygon's area\n",
565
+ " percentage_1 = (total_intersection_area / geom.area) \n",
566
+ " else:\n",
567
+ " percentage_1 = 0.0 # No intersection with justice40 == 1 polygons\n",
568
+ " \n",
569
+ " # Append result\n",
570
+ " percentages.append(percentage_1)\n",
571
  " \n",
572
+ " # Add results to the original GeoDataFrame\n",
573
+ " gdf[col_name] = percentages\n",
574
  " return gdf\n",
575
  "\n",
576
+ "\n"
577
  ]
578
  },
579
  {
580
  "cell_type": "code",
581
  "execution_count": null,
582
+ "id": "fe80fc28-73ce-4a26-9925-851c2798e467",
583
  "metadata": {},
584
  "outputs": [],
585
  "source": [
586
  "%%time\n",
 
587
  "vec_file = './cpad-stats-temp.parquet'\n",
588
+ "\n",
589
+ "df = big_zonal_stats_binary(vec_file, \"ca_justice40.parquet\", col_name=\"percent_disadvantaged\")\n",
590
  "df.to_parquet(\"cpad-stats-temp.parquet\")\n"
591
  ]
592
  },
 
612
  "ca = (con\n",
613
  " .read_parquet(\"cpad-stats-temp.parquet\")\n",
614
  " .cast({\n",
615
+ " \"crop_expansion\": \"int64\",\n",
616
+ " \"crop_reduction\": \"int64\",\n",
617
+ " \"manageable_carbon\": \"int64\",\n",
618
+ " \"irrecoverable_carbon\": \"int64\"\n",
619
+ " })\n",
620
  " .rename(svi = \"SVI\")\n",
621
  " .mutate(\n",
622
  " richness=_.richness.round(3),\n",
623
+ " rsr=_.rsr.round(3),\n",
624
+ " all_species_rwr=_.all_species_rwr.round(3),\n",
625
+ " all_species_richness=_.all_species_richness.round(3),\n",
626
+ " percent_disadvantaged=(_.percent_disadvantaged).round(3),\n",
627
+ " svi=_.svi.round(3),\n",
628
+ " svi_socioeconomic_status=_.socioeconomic_status.round(3),\n",
629
+ " svi_household_char=_.household_char.round(3),\n",
630
+ " svi_racial_ethnic_minority=_.racial_ethnic_minority.round(3),\n",
631
+ " svi_housing_transit=_.housing_transit.round(3),\n",
632
+ " human_impact=_.human_impact.round(3),\n",
633
+ " deforest_carbon=_.deforest_carbon.round(3),\n",
634
+ " biodiversity_intactness_loss=_.biodiversity_intactness_loss.round(3),\n",
635
+ " forest_integrity_loss=_.forest_integrity_loss.round(3),\n",
636
+ " )\n",
637
+ " .select('established', 'reGAP', 'name', 'access_type', 'manager', 'manager_type', 'Easement', 'Acres', 'id', 'type','richness', \n",
638
+ " 'rsr', 'all_species_rwr', 'all_species_richness','deforest_carbon', 'irrecoverable_carbon', 'manageable_carbon', 'human_impact',\n",
639
+ " 'percent_disadvantaged','svi', 'svi_socioeconomic_status', 'svi_household_char', \n",
640
+ " 'svi_racial_ethnic_minority','svi_housing_transit')\n",
641
+ " .join(ca_geom, \"id\", how=\"inner\")\n",
642
  " )\n",
 
 
 
 
 
 
643
  "\n",
644
  "ca.head(5).execute()\n"
645
  ]