MarcSkovMadsen commited on
Commit
963ecfd
·
verified ·
1 Parent(s): 67b299f

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +46 -0
utils.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from pathlib import Path
3
+ from fsspec.parquet import open_parquet_file
4
+ import pyarrow.parquet as pq
5
+ import pandas as pd
6
+ from io import BytesIO
7
+ from PIL import Image
8
+ import holoviews as hv
9
+
10
+ REPOSITORY = "Major-TOM"
11
+ DATASETS = ['Core-S2L2A', 'Core-S2L1C']
12
+
13
+ DATA_PATH = Path(__file__).parent/"data"
14
+
15
+
16
+ def _meta_data_url(dataset='Core-S2L2A', repository=REPOSITORY):
17
+ return f'https://huggingface.co/datasets/{repository}/{dataset}/resolve/main/metadata.parquet'
18
+
19
+ def _meta_data_path(dataset='Core-S2L2A', repository=REPOSITORY):
20
+ DATA_PATH.mkdir(parents=True, exist_ok=True)
21
+ return DATA_PATH/f"{dataset}_metadata.parquet"
22
+
23
+ def get_meta_data(dataset='Core-S2L2A', repository=REPOSITORY):
24
+ path = _meta_data_path(dataset=dataset)
25
+ if not path.exists():
26
+ data = pd.read_parquet(_meta_data_url(dataset=dataset))
27
+ data.to_parquet(path)
28
+ data = pd.read_parquet(path)
29
+
30
+ data["centre_easting"], data["centre_northing"] = hv.util.transform.lon_lat_to_easting_northing(data["centre_lon"], data["centre_lat"])
31
+
32
+ return data
33
+
34
+ def get_image(row):
35
+ parquet_url = row["parquet_url"]
36
+ parquet_row = row["parquet_row"]
37
+ print(parquet_url)
38
+ print(parquet_row)
39
+ with open_parquet_file(parquet_url,columns = ["thumbnail"]) as f:
40
+ with pq.ParquetFile(f) as pf:
41
+ first_row_group = pf.read_row_group(parquet_row, columns=['thumbnail'])
42
+
43
+ stream = BytesIO(first_row_group['thumbnail'][0].as_py())
44
+ image = Image.open(stream)
45
+ return image
46
+