Spaces:
Running
Running
cassiebuhler
commited on
Commit
·
ae1519e
1
Parent(s):
430f057
fixed justice40 stats
Browse files- get_zonal_stats.ipynb +67 -65
get_zonal_stats.ipynb
CHANGED
@@ -516,9 +516,7 @@
|
|
516 |
" .mutate(geometry = _.geometry.convert(\"ESRI:102039\",\"EPSG:4326\"))\n",
|
517 |
" .select(\"justice40\",\"geometry\")\n",
|
518 |
" )\n",
|
519 |
-
"
|
520 |
-
"get_geotiff(gdf,\"ca_justice40.tif\",\"justice40\")\n",
|
521 |
-
"\n"
|
522 |
]
|
523 |
},
|
524 |
{
|
@@ -528,63 +526,67 @@
|
|
528 |
"metadata": {},
|
529 |
"outputs": [],
|
530 |
"source": [
|
531 |
-
"#justice40 is
|
532 |
"\n",
|
533 |
-
"def big_zonal_stats_binary(vec_file,
|
|
|
534 |
" gdf = gpd.read_parquet(vec_file)\n",
|
|
|
|
|
|
|
535 |
" if gdf.crs is None:\n",
|
536 |
" gdf = gdf.set_crs(\"EPSG:4326\")\n",
|
537 |
-
"
|
538 |
-
"
|
539 |
-
"
|
540 |
-
"\n",
|
541 |
-
"
|
542 |
-
"
|
543 |
-
"
|
544 |
-
"
|
545 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
" \n",
|
547 |
-
"
|
548 |
-
"
|
549 |
-
"
|
550 |
-
"
|
551 |
-
"\n",
|
552 |
-
" def get_stats(geom_slice):\n",
|
553 |
-
" geom = [geom_slice.geometry]\n",
|
554 |
-
" masked_image, _ = mask(src, geom, crop=True, all_touched=True, nodata=src.nodata)\n",
|
555 |
-
" \n",
|
556 |
-
" # If the masked area is empty, return None\n",
|
557 |
-
" if masked_image.size == 0:\n",
|
558 |
-
" return {'percentage_1': None}\n",
|
559 |
-
" \n",
|
560 |
-
" # Count 1s and calculate percentage\n",
|
561 |
-
" count_1 = (masked_image == 1).sum()\n",
|
562 |
-
" total_count = (masked_image != src.nodata).sum()\n",
|
563 |
-
" \n",
|
564 |
-
" # Calculate percentage of justice40 = 1 within the polygon\n",
|
565 |
-
" percentage_1 = (count_1 / total_count) * 100 if total_count > 0 else None\n",
|
566 |
" \n",
|
567 |
-
"
|
568 |
-
"\n",
|
569 |
-
"
|
|
|
|
|
|
|
|
|
570 |
" \n",
|
571 |
-
"
|
|
|
572 |
" return gdf\n",
|
573 |
"\n",
|
574 |
-
"
|
575 |
]
|
576 |
},
|
577 |
{
|
578 |
"cell_type": "code",
|
579 |
"execution_count": null,
|
580 |
-
"id": "
|
581 |
"metadata": {},
|
582 |
"outputs": [],
|
583 |
"source": [
|
584 |
"%%time\n",
|
585 |
-
"tif_file = 'ca_justice40.tif'\n",
|
586 |
"vec_file = './cpad-stats-temp.parquet'\n",
|
587 |
-
"
|
|
|
588 |
"df.to_parquet(\"cpad-stats-temp.parquet\")\n"
|
589 |
]
|
590 |
},
|
@@ -610,34 +612,34 @@
|
|
610 |
"ca = (con\n",
|
611 |
" .read_parquet(\"cpad-stats-temp.parquet\")\n",
|
612 |
" .cast({\n",
|
613 |
-
"
|
614 |
-
"
|
615 |
-
"
|
616 |
-
"
|
617 |
-
"
|
618 |
" .rename(svi = \"SVI\")\n",
|
619 |
" .mutate(\n",
|
620 |
" richness=_.richness.round(3),\n",
|
621 |
-
"
|
622 |
-
"
|
623 |
-
"
|
624 |
-
"
|
625 |
-
"
|
626 |
-
"
|
627 |
-
"
|
628 |
-
"
|
629 |
-
"
|
630 |
-
"
|
631 |
-
"
|
632 |
-
"
|
633 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
634 |
" )\n",
|
635 |
-
" .drop(\"geometry\", \"__index_level_0__\",\"socioeconomic_status\", \"household_char\", \"racial_ethnic_minority\", \"housing_transit\", \n",
|
636 |
-
" \"biodiversity_intactness_loss\",\"forest_integrity_loss\",\"crop_reduction\",\"crop_expansion\"\n",
|
637 |
-
" ) \n",
|
638 |
-
" .join(ca_geom, \"id\", how=\"inner\")\n",
|
639 |
-
" )\n",
|
640 |
-
"\n",
|
641 |
"\n",
|
642 |
"ca.head(5).execute()\n"
|
643 |
]
|
|
|
516 |
" .mutate(geometry = _.geometry.convert(\"ESRI:102039\",\"EPSG:4326\"))\n",
|
517 |
" .select(\"justice40\",\"geometry\")\n",
|
518 |
" )\n",
|
519 |
+
"justice40.execute().to_parquet(\"ca_justice40.parquet\")"
|
|
|
|
|
520 |
]
|
521 |
},
|
522 |
{
|
|
|
526 |
"metadata": {},
|
527 |
"outputs": [],
|
528 |
"source": [
|
529 |
+
"# #justice40 is either 0 or 1, so we want to get the percentage of polygon where justice40 = 1. \n",
|
530 |
"\n",
|
531 |
+
"def big_zonal_stats_binary(vec_file, justice40_file, col_name,projected_crs=\"EPSG:3310\"):\n",
|
532 |
+
" # Read both vector files as GeoDataFrames\n",
|
533 |
" gdf = gpd.read_parquet(vec_file)\n",
|
534 |
+
" justice40_gdf = gpd.read_parquet(justice40_file)\n",
|
535 |
+
" \n",
|
536 |
+
" # Set CRS if not already set (assuming both should be in EPSG:4326, modify if needed)\n",
|
537 |
" if gdf.crs is None:\n",
|
538 |
" gdf = gdf.set_crs(\"EPSG:4326\")\n",
|
539 |
+
" if justice40_gdf.crs is None:\n",
|
540 |
+
" justice40_gdf = justice40_gdf.set_crs(\"EPSG:4326\")\n",
|
541 |
+
" # Ensure both GeoDataFrames are in the same CRS and reproject to a projected CRS for area calculations\n",
|
542 |
+
" gdf = gdf.to_crs(projected_crs)\n",
|
543 |
+
" justice40_gdf = justice40_gdf.to_crs(projected_crs)\n",
|
544 |
+
" \n",
|
545 |
+
" # Ensure both GeoDataFrames are in the same CRS\n",
|
546 |
+
" gdf = gdf.to_crs(justice40_gdf.crs)\n",
|
547 |
+
" \n",
|
548 |
+
" # Filter justice40 polygons where justice40 == 1\n",
|
549 |
+
" justice40_gdf = justice40_gdf[justice40_gdf['justice40'] == 1].copy()\n",
|
550 |
+
" \n",
|
551 |
+
" # Prepare a list to hold percentage of justice40 == 1 for each polygon\n",
|
552 |
+
" percentages = []\n",
|
553 |
+
" \n",
|
554 |
+
" # Iterate over each polygon in the main GeoDataFrame\n",
|
555 |
+
" for geom in gdf.geometry:\n",
|
556 |
+
" # Find intersecting justice40 polygons\n",
|
557 |
+
" justice40_intersections = justice40_gdf[justice40_gdf.intersects(geom)].copy()\n",
|
558 |
" \n",
|
559 |
+
" # Calculate the intersection area\n",
|
560 |
+
" if not justice40_intersections.empty:\n",
|
561 |
+
" justice40_intersections['intersection'] = justice40_intersections.intersection(geom)\n",
|
562 |
+
" total_intersection_area = justice40_intersections['intersection'].area.sum()\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
563 |
" \n",
|
564 |
+
" # Calculate percentage based on original polygon's area\n",
|
565 |
+
" percentage_1 = (total_intersection_area / geom.area) \n",
|
566 |
+
" else:\n",
|
567 |
+
" percentage_1 = 0.0 # No intersection with justice40 == 1 polygons\n",
|
568 |
+
" \n",
|
569 |
+
" # Append result\n",
|
570 |
+
" percentages.append(percentage_1)\n",
|
571 |
" \n",
|
572 |
+
" # Add results to the original GeoDataFrame\n",
|
573 |
+
" gdf[col_name] = percentages\n",
|
574 |
" return gdf\n",
|
575 |
"\n",
|
576 |
+
"\n"
|
577 |
]
|
578 |
},
|
579 |
{
|
580 |
"cell_type": "code",
|
581 |
"execution_count": null,
|
582 |
+
"id": "fe80fc28-73ce-4a26-9925-851c2798e467",
|
583 |
"metadata": {},
|
584 |
"outputs": [],
|
585 |
"source": [
|
586 |
"%%time\n",
|
|
|
587 |
"vec_file = './cpad-stats-temp.parquet'\n",
|
588 |
+
"\n",
|
589 |
+
"df = big_zonal_stats_binary(vec_file, \"ca_justice40.parquet\", col_name=\"percent_disadvantaged\")\n",
|
590 |
"df.to_parquet(\"cpad-stats-temp.parquet\")\n"
|
591 |
]
|
592 |
},
|
|
|
612 |
"ca = (con\n",
|
613 |
" .read_parquet(\"cpad-stats-temp.parquet\")\n",
|
614 |
" .cast({\n",
|
615 |
+
" \"crop_expansion\": \"int64\",\n",
|
616 |
+
" \"crop_reduction\": \"int64\",\n",
|
617 |
+
" \"manageable_carbon\": \"int64\",\n",
|
618 |
+
" \"irrecoverable_carbon\": \"int64\"\n",
|
619 |
+
" })\n",
|
620 |
" .rename(svi = \"SVI\")\n",
|
621 |
" .mutate(\n",
|
622 |
" richness=_.richness.round(3),\n",
|
623 |
+
" rsr=_.rsr.round(3),\n",
|
624 |
+
" all_species_rwr=_.all_species_rwr.round(3),\n",
|
625 |
+
" all_species_richness=_.all_species_richness.round(3),\n",
|
626 |
+
" percent_disadvantaged=(_.percent_disadvantaged).round(3),\n",
|
627 |
+
" svi=_.svi.round(3),\n",
|
628 |
+
" svi_socioeconomic_status=_.socioeconomic_status.round(3),\n",
|
629 |
+
" svi_household_char=_.household_char.round(3),\n",
|
630 |
+
" svi_racial_ethnic_minority=_.racial_ethnic_minority.round(3),\n",
|
631 |
+
" svi_housing_transit=_.housing_transit.round(3),\n",
|
632 |
+
" human_impact=_.human_impact.round(3),\n",
|
633 |
+
" deforest_carbon=_.deforest_carbon.round(3),\n",
|
634 |
+
" biodiversity_intactness_loss=_.biodiversity_intactness_loss.round(3),\n",
|
635 |
+
" forest_integrity_loss=_.forest_integrity_loss.round(3),\n",
|
636 |
+
" )\n",
|
637 |
+
" .select('established', 'reGAP', 'name', 'access_type', 'manager', 'manager_type', 'Easement', 'Acres', 'id', 'type','richness', \n",
|
638 |
+
" 'rsr', 'all_species_rwr', 'all_species_richness','deforest_carbon', 'irrecoverable_carbon', 'manageable_carbon', 'human_impact',\n",
|
639 |
+
" 'percent_disadvantaged','svi', 'svi_socioeconomic_status', 'svi_household_char', \n",
|
640 |
+
" 'svi_racial_ethnic_minority','svi_housing_transit')\n",
|
641 |
+
" .join(ca_geom, \"id\", how=\"inner\")\n",
|
642 |
" )\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
"\n",
|
644 |
"ca.head(5).execute()\n"
|
645 |
]
|