Spaces:
Sleeping
Sleeping
preprocess
Browse files- preprocess.py +122 -0
preprocess.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ibis
|
2 |
+
from ibis import _
|
3 |
+
import rioxarray
|
4 |
+
from shapely.geometry import box
|
5 |
+
import fiona
|
6 |
+
|
7 |
+
# +
|
8 |
+
# read crs, ideally we could do this with st_read_meta() in ibis+duckdb
|
9 |
+
fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
|
10 |
+
v = fiona.open(fgb)
|
11 |
+
crs = v.crs
|
12 |
+
|
13 |
+
# extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
|
14 |
+
cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
|
15 |
+
r = rioxarray.open_rasterio(cog)
|
16 |
+
bounds = box(*r.rio.transform_bounds(crs))
|
17 |
+
|
18 |
+
|
19 |
+
# +
|
20 |
+
#import leafmap
|
21 |
+
#leafmap.cog_validate(cog)
|
22 |
+
|
23 |
+
# +
|
24 |
+
con = ibis.duckdb.connect()
|
25 |
+
|
26 |
+
# We could just read the flatgeobuf with read_geo.
|
27 |
+
# it is not as fast as working with the (Geo)Parquet
|
28 |
+
# pad = con.read_geo(fgb)
|
29 |
+
# -
|
30 |
+
|
31 |
+
# Unfortunately, ibis doesn't detect that this is GeoParquet. We need a SQL escape-hatch to cast the geometry
|
32 |
+
parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
|
33 |
+
con = ibis.duckdb.connect()
|
34 |
+
con.load_extension("spatial")
|
35 |
+
con.raw_sql(f"CREATE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
|
36 |
+
pad = con.table("pad")
|
37 |
+
|
38 |
+
pad.columns
|
39 |
+
|
40 |
+
# +
|
41 |
+
# Now we can do all the usual SQL queries to subset the data. Note the `geom.within()` spatial filter!
|
42 |
+
|
43 |
+
focal_columns = ["bucket", "FeatClass", "Mang_Name", "Mang_Type", "Des_Tp",
|
44 |
+
"Pub_Access", "GAP_Sts", "IUCN_Cat", "Unit_Nm", "geom"]
|
45 |
+
public = ["DIST", "LOC", "FED", "STAT", "JNT"]
|
46 |
+
|
47 |
+
case = (
|
48 |
+
ibis.case()
|
49 |
+
.when( (_.Mang_Type.isin(public) & (_.GAP_Sts <= 2)), "public")
|
50 |
+
.when( (_.Mang_Type.isin(public) & (_.GAP_Sts > 2)), "mixed")
|
51 |
+
.when( (_.Mang_Type.isin(["PVT", "NGO"])), "private")
|
52 |
+
.when( (_.Mang_Type == "TRIB"), "tribal")
|
53 |
+
.end()
|
54 |
+
)
|
55 |
+
|
56 |
+
pad_labeled = (
|
57 |
+
pad.
|
58 |
+
filter((_.FeatClass.isin(["Easement", "Fee"])) | (
|
59 |
+
(_.FeatClass == "Proclamation") & (_.Mang_Name == "TRIB"))
|
60 |
+
).
|
61 |
+
filter(_.Mang_Type.notin(["UNK", "TERR"])).
|
62 |
+
filter(_.geom.within(bounds)).
|
63 |
+
mutate(GAP_Sts = _.GAP_Sts.cast("int")).
|
64 |
+
mutate(bucket = case).
|
65 |
+
select(focal_columns).
|
66 |
+
mutate(row_n=ibis.row_number())
|
67 |
+
)
|
68 |
+
|
69 |
+
# -
|
70 |
+
|
71 |
+
pad_labeled.filter(_.row_n < 10).to_pandas()
|
72 |
+
|
73 |
+
# +
|
74 |
+
# # %%time
|
75 |
+
# testing -- only the lower 48 states!
|
76 |
+
# (pad.filter(_.geom.within(bounds)).group_by([_.State_Nm]).aggregate(n = _.count()).to_pandas())
|
77 |
+
# -
|
78 |
+
|
79 |
+
start = 0
|
80 |
+
end = 10000
|
81 |
+
df = pad_labeled.filter([_.row_n > start, _.row_n <= end]).to_pandas()
|
82 |
+
#from_wkb(df.geometry)
|
83 |
+
|
84 |
+
import geopandas
|
85 |
+
geo = geopandas.GeoDataFrame(
|
86 |
+
df, geometry=df.geometry, crs=crs
|
87 |
+
)
|
88 |
+
geo.shape
|
89 |
+
|
90 |
+
# +
|
91 |
+
#geo.geometry.values
|
92 |
+
# -
|
93 |
+
|
94 |
+
raster = (rioxarray.
|
95 |
+
open_rasterio('/vsicurl/'+cog, masked=True).
|
96 |
+
rio.clip(geo.geometry.values, crs, from_disk=True).
|
97 |
+
sel(band=1).drop("band")
|
98 |
+
)
|
99 |
+
|
100 |
+
|
101 |
+
# +
|
102 |
+
# https://corteva.github.io/geocube/html/examples/zonal_statistics.html
|
103 |
+
from geocube.api.core import make_geocube
|
104 |
+
import xarray
|
105 |
+
out_grid = make_geocube(
|
106 |
+
vector_data=geo,
|
107 |
+
measurements=["row_n"],
|
108 |
+
like=raster, # ensure the data are on the same grid
|
109 |
+
)
|
110 |
+
|
111 |
+
# merge the two together
|
112 |
+
out_grid["richness"] = (raster.dims, raster.values, raster.attrs, raster.encoding)
|
113 |
+
# -
|
114 |
+
|
115 |
+
grouped_raster = out_grid.drop("spatial_ref").groupby(out_grid.row_n)
|
116 |
+
grid_mean = grouped_raster.mean().rename({"richness": "richness_mean"})
|
117 |
+
zonal_stats = xarray.merge([grid_mean]).to_dataframe()
|
118 |
+
|
119 |
+
geo = geo.merge(zonal_stats, how="left", on="row_n")
|
120 |
+
|
121 |
+
geo.plot(column="richness_mean", legend=True)
|
122 |
+
|