Spaces:
Sleeping
Sleeping
Create earthview.py
Browse filesCopy of https://huggingface.co/spaces/satellogic/EarthView-Viewer/blob/main/earthview.py
- earthview.py +173 -0
earthview.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_dataset as _load_dataset
|
2 |
+
from os import environ
|
3 |
+
from PIL import Image
|
4 |
+
import numpy as np
|
5 |
+
import json
|
6 |
+
|
7 |
+
from pyarrow.parquet import ParquetFile
|
8 |
+
from pyarrow import Table as pa_Table
|
9 |
+
from datasets import Dataset
|
10 |
+
|
11 |
+
DATASET = "satellogic/EarthView"
|
12 |
+
|
13 |
+
sets = {
|
14 |
+
"satellogic": {
|
15 |
+
"shards" : 7863,
|
16 |
+
},
|
17 |
+
"sentinel_1": {
|
18 |
+
"shards" : 1763,
|
19 |
+
},
|
20 |
+
"neon": {
|
21 |
+
"config" : "default",
|
22 |
+
"shards" : 607,
|
23 |
+
"path" : "data",
|
24 |
+
},
|
25 |
+
"sentinel_2": {
|
26 |
+
"shards" : 19997,
|
27 |
+
},
|
28 |
+
}
|
29 |
+
|
30 |
+
def get_subsets():
|
31 |
+
return sets.keys()
|
32 |
+
|
33 |
+
def get_nshards(subset):
|
34 |
+
return sets[subset]["shards"]
|
35 |
+
|
36 |
+
def get_path(subset):
|
37 |
+
return sets[subset].get("path", subset)
|
38 |
+
|
39 |
+
def get_config(subset):
|
40 |
+
return sets[subset].get("config", subset)
|
41 |
+
|
42 |
+
def load_dataset(subset, dataset="satellogic/EarthView", split="train", shards = None, streaming=True, **kwargs):
|
43 |
+
config = get_config(subset)
|
44 |
+
nshards = get_nshards(subset)
|
45 |
+
path = get_path(subset)
|
46 |
+
if shards is None:
|
47 |
+
data_files = None
|
48 |
+
else:
|
49 |
+
if subset == "sentinel_2":
|
50 |
+
data_files = [f"{path}/sentinel_2-{shard//10}/{split}-{shard % 10:05d}-of-00010.parquet" for shard in shards]
|
51 |
+
else:
|
52 |
+
data_files = [f"{path}/{split}-{shard:05d}-of-{nshards:05d}.parquet" for shard in shards]
|
53 |
+
data_files = {split: data_files}
|
54 |
+
|
55 |
+
ds = _load_dataset(
|
56 |
+
path=dataset,
|
57 |
+
name=config,
|
58 |
+
save_infos=True,
|
59 |
+
split=split,
|
60 |
+
data_files=data_files,
|
61 |
+
streaming=streaming,
|
62 |
+
token=environ.get("HF_TOKEN", None),
|
63 |
+
**kwargs)
|
64 |
+
|
65 |
+
return ds
|
66 |
+
|
67 |
+
def load_parquet(subset_or_filename, batch_size=100):
|
68 |
+
if subset_or_filename in get_subsets():
|
69 |
+
filename = f"dataset/{subset_or_filename}/sample.parquet"
|
70 |
+
else:
|
71 |
+
filename = subset_or_filename
|
72 |
+
|
73 |
+
pqfile = ParquetFile(filename)
|
74 |
+
batch = pqfile.iter_batches(batch_size=batch_size)
|
75 |
+
return Dataset(pa_Table.from_batches(batch))
|
76 |
+
|
77 |
+
def item_to_images(subset, item):
|
78 |
+
"""
|
79 |
+
Converts the images within an item (arrays), as retrieved from the dataset to proper PIL.Image
|
80 |
+
subset: The name of the Subset, one of "satellogic", "neon", "sentinel-1"
|
81 |
+
item: The item as retrieved from the subset
|
82 |
+
returns the item, with arrays converted to PIL.Image
|
83 |
+
"""
|
84 |
+
metadata = item["metadata"]
|
85 |
+
if type(metadata) == str:
|
86 |
+
metadata = json.loads(metadata)
|
87 |
+
|
88 |
+
item = {
|
89 |
+
k: np.asarray(v).astype("uint8")
|
90 |
+
for k,v in item.items()
|
91 |
+
if k != "metadata"
|
92 |
+
}
|
93 |
+
item["metadata"] = metadata
|
94 |
+
|
95 |
+
if subset == "satellogic":
|
96 |
+
# item["rgb"] = [
|
97 |
+
# Image.fromarray(np.average(image.transpose(1,2,0), 2).astype("uint8"))
|
98 |
+
# for image in item["rgb"]
|
99 |
+
# ]
|
100 |
+
rgbs = []
|
101 |
+
for rgb in item["rgb"]:
|
102 |
+
rgbs.append(Image.fromarray(rgb.transpose(1,2,0)))
|
103 |
+
# rgbs.append(Image.fromarray(rgb[0,:,:])) # Red
|
104 |
+
# rgbs.append(Image.fromarray(rgb[1,:,:])) # Green
|
105 |
+
# rgbs.append(Image.fromarray(rgb[2,:,:])) # Blue
|
106 |
+
item["rgb"] = rgbs
|
107 |
+
item["1m"] = [
|
108 |
+
Image.fromarray(image[0,:,:])
|
109 |
+
for image in item["1m"]
|
110 |
+
]
|
111 |
+
count = len(item["1m"])
|
112 |
+
elif subset == "sentinel_1":
|
113 |
+
# Mapping of V and H to RGB. May not be correct
|
114 |
+
# https://gis.stackexchange.com/questions/400726/creating-composite-rgb-images-from-sentinel-1-channels
|
115 |
+
i10m = item["10m"]
|
116 |
+
i10m = np.concatenate(
|
117 |
+
( i10m,
|
118 |
+
np.expand_dims(
|
119 |
+
i10m[:,0,:,:]/(i10m[:,1,:,:]+0.01)*256,
|
120 |
+
1
|
121 |
+
).astype("uint8")
|
122 |
+
),
|
123 |
+
1
|
124 |
+
)
|
125 |
+
item["10m"] = [
|
126 |
+
Image.fromarray(image.transpose(1,2,0))
|
127 |
+
for image in i10m
|
128 |
+
]
|
129 |
+
count = len(item["10m"])
|
130 |
+
elif subset == "sentinel_2":
|
131 |
+
for channel in ['10m', '20m', 'rgb', 'scl']: #, '40m']:
|
132 |
+
data = item[channel]
|
133 |
+
count = len(data)
|
134 |
+
data = np.asarray(data).astype("uint8").transpose(0,2,3,1)
|
135 |
+
if channel == "20m":
|
136 |
+
data = data[:,:,:,[0,2,4]]
|
137 |
+
mode = "L" if channel in ["10m", "scl"] else "RGB"
|
138 |
+
images = [Image.fromarray(data[i].squeeze(), mode=mode) for i in range(count)]
|
139 |
+
item[channel] = images
|
140 |
+
for field in ["solarAngles", "tileGeometry", "viewIncidenceAngles"]:
|
141 |
+
item["metadata"][field] = [json.loads(s) for s in item["metadata"][field]]
|
142 |
+
elif subset == "neon":
|
143 |
+
item["rgb"] = [
|
144 |
+
Image.fromarray(image.transpose(1,2,0))
|
145 |
+
for image in item["rgb"]
|
146 |
+
]
|
147 |
+
item["chm"] = [
|
148 |
+
Image.fromarray(image[0])
|
149 |
+
for image in item["chm"]
|
150 |
+
]
|
151 |
+
|
152 |
+
# The next is a very arbitrary conversion from the 369 hyperspectral data to RGB
|
153 |
+
# It just averages each 1/3 of the bads and assigns it to a channel
|
154 |
+
item["1m"] = [
|
155 |
+
Image.fromarray(
|
156 |
+
np.concatenate((
|
157 |
+
np.expand_dims(np.average(image[:124],0),2),
|
158 |
+
np.expand_dims(np.average(image[124:247],0),2),
|
159 |
+
np.expand_dims(np.average(image[247:],0),2))
|
160 |
+
,2).astype("uint8"))
|
161 |
+
for image in item["1m"]
|
162 |
+
]
|
163 |
+
count = len(item["rgb"])
|
164 |
+
bounds = item["metadata"]["bounds"]
|
165 |
+
|
166 |
+
# swap pairs
|
167 |
+
item["metadata"]["bounds"] = [bounds[i+1-l] for i in range(0, len(bounds), 2) for l in range(2)]
|
168 |
+
|
169 |
+
# fix CRS
|
170 |
+
item["metadata"]["epsg"] = "EPSG:4326"
|
171 |
+
|
172 |
+
item["metadata"]["count"] = count
|
173 |
+
return item
|