Spaces:

boettiger-lab
/

ca-30x30

Running

App Files Files Community

cassiebuhler commited on Nov 2, 2024

Commit

ae1519e

1 Parent(s): 430f057

fixed justice40 stats

Browse files

Files changed (1) hide show

get_zonal_stats.ipynb +67 -65

get_zonal_stats.ipynb CHANGED Viewed

@@ -516,9 +516,7 @@
     "        .mutate(geometry = _.geometry.convert(\"ESRI:102039\",\"EPSG:4326\"))\n",
     "        .select(\"justice40\",\"geometry\")\n",
     "        )\n",
-    "gdf = justice40.execute()\n",
-    "get_geotiff(gdf,\"ca_justice40.tif\",\"justice40\")\n",
-    "\n"
    ]
   },
   {
@@ -528,63 +526,67 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#justice40 is binary data so we want to get the percentage of polygon where justice40 = 1.  \n",
     "\n",
-    "def big_zonal_stats_binary(vec_file, tif_file, col_name, n_jobs, verbose=10, timeout=10000):\n",
     "    gdf = gpd.read_parquet(vec_file)\n",
     "    if gdf.crs is None:\n",
     "        gdf = gdf.set_crs(\"EPSG:4326\")\n",
-    "    gdf = gdf.rename(columns={\"geom\": \"geometry\"})\n",
-    "    gdf = gdf.set_geometry(\"geometry\")\n",
-    "    gdf = gdf[gdf[\"geometry\"].notna()].copy()\n",
-    "\n",
-    "    with rasterio.open(tif_file) as src:\n",
-    "        raster_crs = src.crs\n",
-    "        gdf = gdf.to_crs(raster_crs) \n",
-    "        \n",
-    "        california_polygon = box(*gdf.total_bounds)\n",
     "        \n",
-    "        out_image, out_transform = mask(src, [california_polygon], crop=True, nodata=src.nodata)\n",
-    "\n",
-    "        if out_image.ndim == 3:\n",
-    "            out_image = out_image[0]\n",
-    "\n",
-    "        def get_stats(geom_slice):\n",
-    "            geom = [geom_slice.geometry]\n",
-    "            masked_image, _ = mask(src, geom, crop=True, all_touched=True, nodata=src.nodata)\n",
-    "            \n",
-    "            # If the masked area is empty, return None\n",
-    "            if masked_image.size == 0:\n",
-    "                return {'percentage_1': None}\n",
-    "            \n",
-    "            # Count 1s and calculate percentage\n",
-    "            count_1 = (masked_image == 1).sum()\n",
-    "            total_count = (masked_image != src.nodata).sum()\n",
-    "            \n",
-    "            # Calculate percentage of justice40 = 1 within the polygon\n",
-    "            percentage_1 = (count_1 / total_count) * 100 if total_count > 0 else None\n",
     "            \n",
-    "            return {'percentage_1': percentage_1}\n",
-    "\n",
-    "        output = [get_stats(row) for row in gdf.itertuples()]\n",
     "    \n",
-    "    gdf[col_name] = [res['percentage_1'] for res in output]\n",
     "    return gdf\n",
     "\n",
-    "# Run the function\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f5f7297a-e31b-4d3c-ae1c-7e3d78bc141a",
    "metadata": {},
    "outputs": [],
    "source": [
     "%%time\n",
-    "tif_file = 'ca_justice40.tif'\n",
     "vec_file = './cpad-stats-temp.parquet'\n",
-    "df = big_zonal_stats_binary(vec_file, tif_file, col_name=\"justice40\", n_jobs=threads, verbose=0)\n",
     "df.to_parquet(\"cpad-stats-temp.parquet\")\n"
    ]
   },
@@ -610,34 +612,34 @@
     "ca = (con\n",
     "        .read_parquet(\"cpad-stats-temp.parquet\")\n",
     "        .cast({\n",
-    "            \"crop_expansion\": \"int64\",\n",
-    "            \"crop_reduction\": \"int64\",\n",
-    "            \"manageable_carbon\": \"int64\",\n",
-    "            \"irrecoverable_carbon\": \"int64\"\n",
-    "        })\n",
     "        .rename(svi = \"SVI\")\n",
     "        .mutate(\n",
     "            richness=_.richness.round(3),\n",
-    "            rsr=_.rsr.round(3),\n",
-    "            all_species_rwr=_.all_species_rwr.round(3),\n",
-    "            all_species_richness=_.all_species_richness.round(3),\n",
-    "            svi=_.svi.round(3),\n",
-    "            justice40=(_.justice40/100).round(3),\n",
-    "            svi_socioeconomic_status=_.socioeconomic_status.round(3),\n",
-    "            svi_household_char=_.household_char.round(3),\n",
-    "            svi_racial_ethnic_minority=_.racial_ethnic_minority.round(3),\n",
-    "            svi_housing_transit=_.housing_transit.round(3),\n",
-    "            human_impact=_.human_impact.round(3),\n",
-    "            deforest_carbon=_.deforest_carbon.round(3),\n",
-    "            biodiversity_intactness_loss=_.biodiversity_intactness_loss.round(3),\n",
-    "            forest_integrity_loss=_.forest_integrity_loss.round(3),\n",
     "        )\n",
-    "        .drop(\"geometry\", \"__index_level_0__\",\"socioeconomic_status\", \"household_char\", \"racial_ethnic_minority\", \"housing_transit\", \n",
-    "              \"biodiversity_intactness_loss\",\"forest_integrity_loss\",\"crop_reduction\",\"crop_expansion\"\n",
-    "             )    \n",
-    "        .join(ca_geom, \"id\", how=\"inner\")\n",
-    "    )\n",
-    "\n",
     "\n",
     "ca.head(5).execute()\n"
    ]

     "        .mutate(geometry = _.geometry.convert(\"ESRI:102039\",\"EPSG:4326\"))\n",
     "        .select(\"justice40\",\"geometry\")\n",
     "        )\n",
+    "justice40.execute().to_parquet(\"ca_justice40.parquet\")"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "# #justice40 is either 0 or 1, so we want to get the percentage of polygon where justice40 = 1.  \n",
     "\n",
+    "def big_zonal_stats_binary(vec_file, justice40_file, col_name,projected_crs=\"EPSG:3310\"):\n",
+    "    # Read both vector files as GeoDataFrames\n",
     "    gdf = gpd.read_parquet(vec_file)\n",
+    "    justice40_gdf = gpd.read_parquet(justice40_file)\n",
+    "    \n",
+    "    # Set CRS if not already set (assuming both should be in EPSG:4326, modify if needed)\n",
     "    if gdf.crs is None:\n",
     "        gdf = gdf.set_crs(\"EPSG:4326\")\n",
+    "    if justice40_gdf.crs is None:\n",
+    "        justice40_gdf = justice40_gdf.set_crs(\"EPSG:4326\")\n",
+    "     # Ensure both GeoDataFrames are in the same CRS and reproject to a projected CRS for area calculations\n",
+    "    gdf = gdf.to_crs(projected_crs)\n",
+    "    justice40_gdf = justice40_gdf.to_crs(projected_crs)\n",
+    "    \n",
+    "    # Ensure both GeoDataFrames are in the same CRS\n",
+    "    gdf = gdf.to_crs(justice40_gdf.crs)\n",
+    "    \n",
+    "    # Filter justice40 polygons where justice40 == 1\n",
+    "    justice40_gdf = justice40_gdf[justice40_gdf['justice40'] == 1].copy()\n",
+    "    \n",
+    "    # Prepare a list to hold percentage of justice40 == 1 for each polygon\n",
+    "    percentages = []\n",
+    "    \n",
+    "    # Iterate over each polygon in the main GeoDataFrame\n",
+    "    for geom in gdf.geometry:\n",
+    "        # Find intersecting justice40 polygons\n",
+    "        justice40_intersections = justice40_gdf[justice40_gdf.intersects(geom)].copy()\n",
     "        \n",
+    "        # Calculate the intersection area\n",
+    "        if not justice40_intersections.empty:\n",
+    "            justice40_intersections['intersection'] = justice40_intersections.intersection(geom)\n",
+    "            total_intersection_area = justice40_intersections['intersection'].area.sum()\n",
     "            \n",
+    "            # Calculate percentage based on original polygon's area\n",
+    "            percentage_1 = (total_intersection_area / geom.area) \n",
+    "        else:\n",
+    "            percentage_1 = 0.0  # No intersection with justice40 == 1 polygons\n",
+    "        \n",
+    "        # Append result\n",
+    "        percentages.append(percentage_1)\n",
     "    \n",
+    "    # Add results to the original GeoDataFrame\n",
+    "    gdf[col_name] = percentages\n",
     "    return gdf\n",
     "\n",
+    "\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "fe80fc28-73ce-4a26-9925-851c2798e467",
    "metadata": {},
    "outputs": [],
    "source": [
     "%%time\n",
     "vec_file = './cpad-stats-temp.parquet'\n",
+    "\n",
+    "df = big_zonal_stats_binary(vec_file, \"ca_justice40.parquet\", col_name=\"percent_disadvantaged\")\n",
     "df.to_parquet(\"cpad-stats-temp.parquet\")\n"
    ]
   },
     "ca = (con\n",
     "        .read_parquet(\"cpad-stats-temp.parquet\")\n",
     "        .cast({\n",
+    "                \"crop_expansion\": \"int64\",\n",
+    "                \"crop_reduction\": \"int64\",\n",
+    "                \"manageable_carbon\": \"int64\",\n",
+    "                \"irrecoverable_carbon\": \"int64\"\n",
+    "            })\n",
     "        .rename(svi = \"SVI\")\n",
     "        .mutate(\n",
     "            richness=_.richness.round(3),\n",
+    "                rsr=_.rsr.round(3),\n",
+    "                all_species_rwr=_.all_species_rwr.round(3),\n",
+    "                all_species_richness=_.all_species_richness.round(3),\n",
+    "                percent_disadvantaged=(_.percent_disadvantaged).round(3),\n",
+    "                svi=_.svi.round(3),\n",
+    "                svi_socioeconomic_status=_.socioeconomic_status.round(3),\n",
+    "                svi_household_char=_.household_char.round(3),\n",
+    "                svi_racial_ethnic_minority=_.racial_ethnic_minority.round(3),\n",
+    "                svi_housing_transit=_.housing_transit.round(3),\n",
+    "                human_impact=_.human_impact.round(3),\n",
+    "                deforest_carbon=_.deforest_carbon.round(3),\n",
+    "                biodiversity_intactness_loss=_.biodiversity_intactness_loss.round(3),\n",
+    "                forest_integrity_loss=_.forest_integrity_loss.round(3),\n",
+    "            )\n",
+    "          .select('established', 'reGAP', 'name', 'access_type', 'manager', 'manager_type', 'Easement', 'Acres', 'id', 'type','richness', \n",
+    "                 'rsr', 'all_species_rwr', 'all_species_richness','deforest_carbon', 'irrecoverable_carbon', 'manageable_carbon', 'human_impact',\n",
+    "                 'percent_disadvantaged','svi', 'svi_socioeconomic_status', 'svi_household_char', \n",
+    "                 'svi_racial_ethnic_minority','svi_housing_transit')\n",
+    "          .join(ca_geom, \"id\", how=\"inner\")\n",
     "        )\n",
     "\n",
     "ca.head(5).execute()\n"
    ]