Spaces:
Sleeping
Sleeping
grouping
Browse files- preprocess.py +31 -16
preprocess.py
CHANGED
@@ -5,21 +5,28 @@ import fiona
|
|
5 |
import geopandas as gpd
|
6 |
import rioxarray
|
7 |
from shapely.geometry import box
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# +
|
10 |
|
11 |
fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
|
12 |
parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
|
13 |
# gdb = "https://data.source.coop/cboettig/pad-us-3/PADUS3/PAD_US3_0.gdb" # original, all tables
|
14 |
-
|
15 |
-
con = ibis.duckdb.connect()
|
16 |
-
con.load_extension("spatial")
|
17 |
-
threads = -1
|
18 |
-
|
19 |
# or read the fgb version, much slower
|
20 |
# pad = con.read_geo(fgb)
|
21 |
# pad = con.read_parquet(parquet)
|
22 |
# Currently ibis doesn't detect that this is GeoParquet. We need a SQL escape-hatch to cast the geometry
|
|
|
|
|
23 |
con.raw_sql(f"CREATE OR REPLACE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
|
24 |
pad = con.table("pad")
|
25 |
# -
|
@@ -30,12 +37,9 @@ pad = con.table("pad")
|
|
30 |
meta = fiona.open(fgb)
|
31 |
crs = meta.crs
|
32 |
|
33 |
-
# +
|
34 |
## optional getting bounds
|
35 |
-
cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
|
36 |
-
|
37 |
# extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
|
38 |
-
r = rioxarray.open_rasterio(
|
39 |
bounds = box(*r.rio.transform_bounds(crs))
|
40 |
|
41 |
# +
|
@@ -89,18 +93,29 @@ pad_grouping = (
|
|
89 |
)
|
90 |
.mutate(bucket = case)
|
91 |
.select(categorical_columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
)
|
93 |
|
94 |
pad_grouping.to_parquet("pad-groupings.parquet")
|
95 |
# -
|
96 |
|
97 |
-
agency_name = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-agency-name.parquet").select(manager_name_id = "Code", manager_name = "Dom")
|
98 |
-
agency_type = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-agency-type.parquet").select(manager_type_id = "Code", manager_type = "Dom")
|
99 |
-
desig_type = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-desgination-type.parquet").select(designation_type_id = "Code", designation_type = "Dom")
|
100 |
-
public_access = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-public-access.parquet").select(public_access_id = "Code", public_access = "Dom")
|
101 |
-
state_name = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-state-name.parquet").select(state = "Code", state_name = "Dom")
|
102 |
-
iucn = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-iucn.parquet").select(iucn_code = "CODE", iucn_category = "DOM")
|
103 |
-
|
104 |
(pad_parquet
|
105 |
.rename(manager_name_id = "Mang_Name",
|
106 |
manager_type_id = "Mang_Type",
|
|
|
5 |
import geopandas as gpd
|
6 |
import rioxarray
|
7 |
from shapely.geometry import box
|
8 |
+
con = ibis.duckdb.connect()
|
9 |
+
con.load_extension("spatial")
|
10 |
+
threads = -1
|
11 |
+
|
12 |
+
agency_name = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-agency-name.parquet").select(manager_name_id = "Code", manager_name = "Dom")
|
13 |
+
agency_type = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-agency-type.parquet").select(manager_type_id = "Code", manager_type = "Dom")
|
14 |
+
desig_type = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-desgination-type.parquet").select(designation_type_id = "Code", designation_type = "Dom")
|
15 |
+
public_access = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-public-access.parquet").select(public_access_id = "Code", public_access = "Dom")
|
16 |
+
state_name = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-state-name.parquet").select(state = "Code", state_name = "Dom")
|
17 |
+
iucn = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-iucn.parquet").select(iucn_code = "CODE", iucn_category = "DOM")
|
18 |
|
19 |
# +
|
20 |
|
21 |
fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
|
22 |
parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
|
23 |
# gdb = "https://data.source.coop/cboettig/pad-us-3/PADUS3/PAD_US3_0.gdb" # original, all tables
|
|
|
|
|
|
|
|
|
|
|
24 |
# or read the fgb version, much slower
|
25 |
# pad = con.read_geo(fgb)
|
26 |
# pad = con.read_parquet(parquet)
|
27 |
# Currently ibis doesn't detect that this is GeoParquet. We need a SQL escape-hatch to cast the geometry
|
28 |
+
|
29 |
+
|
30 |
con.raw_sql(f"CREATE OR REPLACE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
|
31 |
pad = con.table("pad")
|
32 |
# -
|
|
|
37 |
meta = fiona.open(fgb)
|
38 |
crs = meta.crs
|
39 |
|
|
|
40 |
## optional getting bounds
|
|
|
|
|
41 |
# extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
|
42 |
+
r = rioxarray.open_rasterio("https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif")
|
43 |
bounds = box(*r.rio.transform_bounds(crs))
|
44 |
|
45 |
# +
|
|
|
93 |
)
|
94 |
.mutate(bucket = case)
|
95 |
.select(categorical_columns)
|
96 |
+
.rename(manager_name_id = "Mang_Name",
|
97 |
+
manager_type_id = "Mang_Type",
|
98 |
+
designation_type_id = "Des_Tp",
|
99 |
+
public_access_id = "Pub_Access",
|
100 |
+
category = "FeatClass",
|
101 |
+
iucn_code = "IUCN_Cat",
|
102 |
+
gap_code = "GAP_Sts",
|
103 |
+
state = "State_Nm",
|
104 |
+
easement_holder = "EsmtHldr",
|
105 |
+
date_established = "Date_Est",
|
106 |
+
area_name = "Unit_Nm")
|
107 |
+
.left_join(agency_name, "manager_name_id")
|
108 |
+
.left_join(agency_type, "manager_type_id")
|
109 |
+
.left_join(desig_type, "designation_type_id")
|
110 |
+
.left_join(public_access, "public_access_id")
|
111 |
+
.left_join(state_name, "state")
|
112 |
+
.left_join(iucn, "iucn_code")
|
113 |
+
.select(~s.contains("_right"))
|
114 |
)
|
115 |
|
116 |
pad_grouping.to_parquet("pad-groupings.parquet")
|
117 |
# -
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
(pad_parquet
|
120 |
.rename(manager_name_id = "Mang_Name",
|
121 |
manager_type_id = "Mang_Type",
|