cassiebuhler commited on
Commit
d11530a
·
1 Parent(s): dd0c7df

adding in justice40

Browse files
Files changed (1) hide show
  1. preprocess/preprocess.ipynb +7 -3
preprocess/preprocess.ipynb CHANGED
@@ -41,8 +41,10 @@
41
  "ca_all_parquet = path + \"ca-all.parquet\"\n",
42
  "ca_all_stats = path + \"ca-all-stats.parquet\"\n",
43
  "ca_final_parquet = \"ca_30x30_stats.parquet\"\n",
 
44
  "#vector data \n",
45
  "svi = path + 'SVI2022_US_tract' #4326\n",
 
46
  "fire = path + 'calfire-2023'#4326\n",
47
  "rxburn = path + 'calfire-rxburn-2023'#4326\n",
48
  "\n",
@@ -322,8 +324,8 @@
322
  "outputs": [],
323
  "source": [
324
  "%%time\n",
325
- "vectors = [svi,fire,rxburn]\n",
326
- "names = ['svi','fire','rxburn']\n",
327
  "\n",
328
  "gdf = gpd.read_parquet(ca_all_parquet) # CA Nature data \n",
329
  "gdf = gdf.set_index('id') # set the index to the col we are joining on for gpd.join()\n",
@@ -339,7 +341,9 @@
339
  " # filter: only want CA data, not nationwide. \n",
340
  " if name == 'svi': \n",
341
  " gdf_temp = gdf_temp[gdf_temp['STATE']==\"California\"]\n",
342
- " \n",
 
 
343
  " reproject_vectors(file, gdf_temp) # change projection to match CA Nature data \n",
344
  " gdf_zonal = vector_vector_stats(ca_all_parquet, file + '-epsg3310.parquet') #compute zonal stats \n",
345
  " gdf_zonal = gdf_zonal.rename(columns ={'overlap_fraction':name}) \n",
 
41
  "ca_all_parquet = path + \"ca-all.parquet\"\n",
42
  "ca_all_stats = path + \"ca-all-stats.parquet\"\n",
43
  "ca_final_parquet = \"ca_30x30_stats.parquet\"\n",
44
+ "\n",
45
  "#vector data \n",
46
  "svi = path + 'SVI2022_US_tract' #4326\n",
47
+ "justice40 = path + 'disadvantaged-communities'\n",
48
  "fire = path + 'calfire-2023'#4326\n",
49
  "rxburn = path + 'calfire-rxburn-2023'#4326\n",
50
  "\n",
 
324
  "outputs": [],
325
  "source": [
326
  "%%time\n",
327
+ "vectors = [svi,justice40 ,fire,rxburn]\n",
328
+ "names = ['svi','disadvantaged_communities','fire','rxburn']\n",
329
  "\n",
330
  "gdf = gpd.read_parquet(ca_all_parquet) # CA Nature data \n",
331
  "gdf = gdf.set_index('id') # set the index to the col we are joining on for gpd.join()\n",
 
341
  " # filter: only want CA data, not nationwide. \n",
342
  " if name == 'svi': \n",
343
  " gdf_temp = gdf_temp[gdf_temp['STATE']==\"California\"]\n",
344
+ " if name == 'disadvantaged_communities':\n",
345
+ " gdf_temp = gdf_temp[gdf_temp['StateName']==\"California\"]\n",
346
+ "\n",
347
  " reproject_vectors(file, gdf_temp) # change projection to match CA Nature data \n",
348
  " gdf_zonal = vector_vector_stats(ca_all_parquet, file + '-epsg3310.parquet') #compute zonal stats \n",
349
  " gdf_zonal = gdf_zonal.rename(columns ={'overlap_fraction':name}) \n",