dropbop commited on
Commit
d82e8e5
·
verified ·
1 Parent(s): 465c443

Create earthview.py

Browse files

Copy of https://huggingface.co/spaces/satellogic/EarthView-Viewer/blob/main/earthview.py

Files changed (1) hide show
  1. earthview.py +173 -0
earthview.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset as _load_dataset
2
+ from os import environ
3
+ from PIL import Image
4
+ import numpy as np
5
+ import json
6
+
7
+ from pyarrow.parquet import ParquetFile
8
+ from pyarrow import Table as pa_Table
9
+ from datasets import Dataset
10
+
11
+ DATASET = "satellogic/EarthView"
12
+
13
+ sets = {
14
+ "satellogic": {
15
+ "shards" : 7863,
16
+ },
17
+ "sentinel_1": {
18
+ "shards" : 1763,
19
+ },
20
+ "neon": {
21
+ "config" : "default",
22
+ "shards" : 607,
23
+ "path" : "data",
24
+ },
25
+ "sentinel_2": {
26
+ "shards" : 19997,
27
+ },
28
+ }
29
+
30
+ def get_subsets():
31
+ return sets.keys()
32
+
33
+ def get_nshards(subset):
34
+ return sets[subset]["shards"]
35
+
36
+ def get_path(subset):
37
+ return sets[subset].get("path", subset)
38
+
39
+ def get_config(subset):
40
+ return sets[subset].get("config", subset)
41
+
42
+ def load_dataset(subset, dataset="satellogic/EarthView", split="train", shards = None, streaming=True, **kwargs):
43
+ config = get_config(subset)
44
+ nshards = get_nshards(subset)
45
+ path = get_path(subset)
46
+ if shards is None:
47
+ data_files = None
48
+ else:
49
+ if subset == "sentinel_2":
50
+ data_files = [f"{path}/sentinel_2-{shard//10}/{split}-{shard % 10:05d}-of-00010.parquet" for shard in shards]
51
+ else:
52
+ data_files = [f"{path}/{split}-{shard:05d}-of-{nshards:05d}.parquet" for shard in shards]
53
+ data_files = {split: data_files}
54
+
55
+ ds = _load_dataset(
56
+ path=dataset,
57
+ name=config,
58
+ save_infos=True,
59
+ split=split,
60
+ data_files=data_files,
61
+ streaming=streaming,
62
+ token=environ.get("HF_TOKEN", None),
63
+ **kwargs)
64
+
65
+ return ds
66
+
67
+ def load_parquet(subset_or_filename, batch_size=100):
68
+ if subset_or_filename in get_subsets():
69
+ filename = f"dataset/{subset_or_filename}/sample.parquet"
70
+ else:
71
+ filename = subset_or_filename
72
+
73
+ pqfile = ParquetFile(filename)
74
+ batch = pqfile.iter_batches(batch_size=batch_size)
75
+ return Dataset(pa_Table.from_batches(batch))
76
+
77
+ def item_to_images(subset, item):
78
+ """
79
+ Converts the images within an item (arrays), as retrieved from the dataset to proper PIL.Image
80
+ subset: The name of the Subset, one of "satellogic", "neon", "sentinel-1"
81
+ item: The item as retrieved from the subset
82
+ returns the item, with arrays converted to PIL.Image
83
+ """
84
+ metadata = item["metadata"]
85
+ if type(metadata) == str:
86
+ metadata = json.loads(metadata)
87
+
88
+ item = {
89
+ k: np.asarray(v).astype("uint8")
90
+ for k,v in item.items()
91
+ if k != "metadata"
92
+ }
93
+ item["metadata"] = metadata
94
+
95
+ if subset == "satellogic":
96
+ # item["rgb"] = [
97
+ # Image.fromarray(np.average(image.transpose(1,2,0), 2).astype("uint8"))
98
+ # for image in item["rgb"]
99
+ # ]
100
+ rgbs = []
101
+ for rgb in item["rgb"]:
102
+ rgbs.append(Image.fromarray(rgb.transpose(1,2,0)))
103
+ # rgbs.append(Image.fromarray(rgb[0,:,:])) # Red
104
+ # rgbs.append(Image.fromarray(rgb[1,:,:])) # Green
105
+ # rgbs.append(Image.fromarray(rgb[2,:,:])) # Blue
106
+ item["rgb"] = rgbs
107
+ item["1m"] = [
108
+ Image.fromarray(image[0,:,:])
109
+ for image in item["1m"]
110
+ ]
111
+ count = len(item["1m"])
112
+ elif subset == "sentinel_1":
113
+ # Mapping of V and H to RGB. May not be correct
114
+ # https://gis.stackexchange.com/questions/400726/creating-composite-rgb-images-from-sentinel-1-channels
115
+ i10m = item["10m"]
116
+ i10m = np.concatenate(
117
+ ( i10m,
118
+ np.expand_dims(
119
+ i10m[:,0,:,:]/(i10m[:,1,:,:]+0.01)*256,
120
+ 1
121
+ ).astype("uint8")
122
+ ),
123
+ 1
124
+ )
125
+ item["10m"] = [
126
+ Image.fromarray(image.transpose(1,2,0))
127
+ for image in i10m
128
+ ]
129
+ count = len(item["10m"])
130
+ elif subset == "sentinel_2":
131
+ for channel in ['10m', '20m', 'rgb', 'scl']: #, '40m']:
132
+ data = item[channel]
133
+ count = len(data)
134
+ data = np.asarray(data).astype("uint8").transpose(0,2,3,1)
135
+ if channel == "20m":
136
+ data = data[:,:,:,[0,2,4]]
137
+ mode = "L" if channel in ["10m", "scl"] else "RGB"
138
+ images = [Image.fromarray(data[i].squeeze(), mode=mode) for i in range(count)]
139
+ item[channel] = images
140
+ for field in ["solarAngles", "tileGeometry", "viewIncidenceAngles"]:
141
+ item["metadata"][field] = [json.loads(s) for s in item["metadata"][field]]
142
+ elif subset == "neon":
143
+ item["rgb"] = [
144
+ Image.fromarray(image.transpose(1,2,0))
145
+ for image in item["rgb"]
146
+ ]
147
+ item["chm"] = [
148
+ Image.fromarray(image[0])
149
+ for image in item["chm"]
150
+ ]
151
+
152
+ # The next is a very arbitrary conversion from the 369 hyperspectral data to RGB
153
+ # It just averages each 1/3 of the bads and assigns it to a channel
154
+ item["1m"] = [
155
+ Image.fromarray(
156
+ np.concatenate((
157
+ np.expand_dims(np.average(image[:124],0),2),
158
+ np.expand_dims(np.average(image[124:247],0),2),
159
+ np.expand_dims(np.average(image[247:],0),2))
160
+ ,2).astype("uint8"))
161
+ for image in item["1m"]
162
+ ]
163
+ count = len(item["rgb"])
164
+ bounds = item["metadata"]["bounds"]
165
+
166
+ # swap pairs
167
+ item["metadata"]["bounds"] = [bounds[i+1-l] for i in range(0, len(bounds), 2) for l in range(2)]
168
+
169
+ # fix CRS
170
+ item["metadata"]["epsg"] = "EPSG:4326"
171
+
172
+ item["metadata"]["count"] = count
173
+ return item