Spaces:
Running
Running
feat: extract dataset manipulation from map features
Browse files- src/dataset/download.py +77 -0
- src/dataset/requests.py +0 -0
- src/maps/obs_map.py +3 -67
- src/pages/2_π_map.py +1 -10
- src/pages/5_π€_requests.py +10 -1
src/dataset/download.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import time
|
3 |
+
import logging
|
4 |
+
import pandas as pd
|
5 |
+
from datasets import load_dataset
|
6 |
+
from datasets import DatasetDict, Dataset
|
7 |
+
|
8 |
+
############################################################
|
9 |
+
# the dataset of observations (hf dataset in our space)
|
10 |
+
dataset_id = "Saving-Willy/temp_dataset"
|
11 |
+
data_files = "data/train-00000-of-00001.parquet"
|
12 |
+
############################################################
|
13 |
+
|
14 |
+
m_logger = logging.getLogger(__name__)
|
15 |
+
# we can set the log level locally for funcs in this module
|
16 |
+
#g_m_logger.setLevel(logging.DEBUG)
|
17 |
+
m_logger.setLevel(logging.INFO)
|
18 |
+
|
19 |
+
presentation_data_schema = {
|
20 |
+
'lat': 'float',
|
21 |
+
'lon': 'float',
|
22 |
+
'species': 'str',
|
23 |
+
}
|
24 |
+
|
25 |
+
def try_download_dataset(dataset_id:str, data_files:str) -> dict:
|
26 |
+
"""
|
27 |
+
Attempts to download a dataset from Hugging Face, catching any errors that occur.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
dataset_id (str): The ID of the dataset to download.
|
31 |
+
data_files (str): The data files associated with the dataset.
|
32 |
+
Returns:
|
33 |
+
dict: A dictionary containing the dataset metadata if the download is successful,
|
34 |
+
or an empty dictionary if an error occurs.
|
35 |
+
|
36 |
+
"""
|
37 |
+
|
38 |
+
m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
|
39 |
+
t1 = time.time()
|
40 |
+
try:
|
41 |
+
metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
|
42 |
+
t2 = time.time(); elap = t2 - t1
|
43 |
+
except ValueError as e:
|
44 |
+
t2 = time.time(); elap = t2 - t1
|
45 |
+
msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)."
|
46 |
+
st.error(msg)
|
47 |
+
m_logger.error(msg)
|
48 |
+
metadata = {}
|
49 |
+
except Exception as e:
|
50 |
+
# catch all (other) exceptions and log them, handle them once isolated
|
51 |
+
t2 = time.time(); elap = t2 - t1
|
52 |
+
msg = f"!!Unknown Error!! downloading dataset: {e}. (after {elap:.2f}s)."
|
53 |
+
st.error(msg)
|
54 |
+
m_logger.error(msg)
|
55 |
+
metadata = {}
|
56 |
+
|
57 |
+
|
58 |
+
msg = f"Downloaded dataset: (after {elap:.2f}s). "
|
59 |
+
m_logger.info(msg)
|
60 |
+
st.write(msg)
|
61 |
+
return metadata
|
62 |
+
|
63 |
+
def get_dataset():
|
64 |
+
# load/download data from huggingface dataset
|
65 |
+
metadata = try_download_dataset(dataset_id, data_files)
|
66 |
+
|
67 |
+
if not metadata:
|
68 |
+
# create an empty, but compliant dataframe
|
69 |
+
df = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
|
70 |
+
else:
|
71 |
+
# make a pandas df that is compliant with folium/streamlit maps
|
72 |
+
df = pd.DataFrame({
|
73 |
+
'lat': metadata["train"]["latitude"],
|
74 |
+
'lon': metadata["train"]["longitude"],
|
75 |
+
'species': metadata["train"]["selected_class"],}
|
76 |
+
)
|
77 |
+
return df
|
src/dataset/requests.py
ADDED
File without changes
|
src/maps/obs_map.py
CHANGED
@@ -1,18 +1,13 @@
|
|
1 |
from typing import Tuple
|
2 |
import logging
|
3 |
|
4 |
-
import pandas as pd
|
5 |
-
from datasets import load_dataset
|
6 |
-
from datasets import DatasetDict, Dataset
|
7 |
-
|
8 |
-
import time
|
9 |
-
|
10 |
import streamlit as st
|
11 |
import folium
|
12 |
from streamlit_folium import st_folium
|
13 |
|
14 |
import whale_viewer as viewer
|
15 |
from utils.fix_tabrender import js_show_zeroheight_iframe
|
|
|
16 |
|
17 |
m_logger = logging.getLogger(__name__)
|
18 |
# we can set the log level locally for funcs in this module
|
@@ -66,13 +61,6 @@ _colors = [
|
|
66 |
|
67 |
whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
|
68 |
|
69 |
-
presentation_data_schema = {
|
70 |
-
'lat': 'float',
|
71 |
-
'lon': 'float',
|
72 |
-
'species': 'str',
|
73 |
-
}
|
74 |
-
|
75 |
-
|
76 |
def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
|
77 |
"""
|
78 |
Create a folium map with the specified tile layer
|
@@ -124,48 +112,8 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
|
|
124 |
#folium.LayerControl().add_to(m)
|
125 |
return m
|
126 |
|
127 |
-
def try_download_dataset(dataset_id:str, data_files:str) -> dict:
|
128 |
-
"""
|
129 |
-
Attempts to download a dataset from Hugging Face, catching any errors that occur.
|
130 |
-
|
131 |
-
Args:
|
132 |
-
dataset_id (str): The ID of the dataset to download.
|
133 |
-
data_files (str): The data files associated with the dataset.
|
134 |
-
Returns:
|
135 |
-
dict: A dictionary containing the dataset metadata if the download is successful,
|
136 |
-
or an empty dictionary if an error occurs.
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
|
141 |
-
t1 = time.time()
|
142 |
-
try:
|
143 |
-
metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
|
144 |
-
t2 = time.time(); elap = t2 - t1
|
145 |
-
except ValueError as e:
|
146 |
-
t2 = time.time(); elap = t2 - t1
|
147 |
-
msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)."
|
148 |
-
st.error(msg)
|
149 |
-
m_logger.error(msg)
|
150 |
-
metadata = {}
|
151 |
-
except Exception as e:
|
152 |
-
# catch all (other) exceptions and log them, handle them once isolated
|
153 |
-
t2 = time.time(); elap = t2 - t1
|
154 |
-
msg = f"!!Unknown Error!! downloading dataset: {e}. (after {elap:.2f}s)."
|
155 |
-
st.error(msg)
|
156 |
-
m_logger.error(msg)
|
157 |
-
metadata = {}
|
158 |
-
|
159 |
-
|
160 |
-
msg = f"Downloaded dataset: (after {elap:.2f}s). "
|
161 |
-
m_logger.info(msg)
|
162 |
-
st.write(msg)
|
163 |
-
return metadata
|
164 |
-
|
165 |
-
|
166 |
-
def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
|
167 |
-
data_files:str = "data/train-00000-of-00001.parquet",
|
168 |
-
dbg_show_extra:bool = False) -> dict:
|
169 |
"""
|
170 |
Render map plus tile selector, with markers for whale observations
|
171 |
|
@@ -186,19 +134,7 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
|
|
186 |
|
187 |
"""
|
188 |
|
189 |
-
|
190 |
-
metadata = try_download_dataset(dataset_id, data_files)
|
191 |
-
|
192 |
-
if not metadata:
|
193 |
-
# create an empty, but compliant dataframe
|
194 |
-
_df = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
|
195 |
-
else:
|
196 |
-
# make a pandas df that is compliant with folium/streamlit maps
|
197 |
-
_df = pd.DataFrame({
|
198 |
-
'lat': metadata["train"]["latitude"],
|
199 |
-
'lon': metadata["train"]["longitude"],
|
200 |
-
'species': metadata["train"]["selected_class"],}
|
201 |
-
)
|
202 |
|
203 |
if dbg_show_extra:
|
204 |
# add a few samples to visualise colours
|
|
|
1 |
from typing import Tuple
|
2 |
import logging
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import streamlit as st
|
5 |
import folium
|
6 |
from streamlit_folium import st_folium
|
7 |
|
8 |
import whale_viewer as viewer
|
9 |
from utils.fix_tabrender import js_show_zeroheight_iframe
|
10 |
+
from dataset.download import get_dataset
|
11 |
|
12 |
m_logger = logging.getLogger(__name__)
|
13 |
# we can set the log level locally for funcs in this module
|
|
|
61 |
|
62 |
whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
|
65 |
"""
|
66 |
Create a folium map with the specified tile layer
|
|
|
112 |
#folium.LayerControl().add_to(m)
|
113 |
return m
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
+
def present_obs_map(dbg_show_extra:bool = False) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
"""
|
118 |
Render map plus tile selector, with markers for whale observations
|
119 |
|
|
|
134 |
|
135 |
"""
|
136 |
|
137 |
+
_df = get_dataset()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
if dbg_show_extra:
|
140 |
# add a few samples to visualise colours
|
src/pages/2_π_map.py
CHANGED
@@ -6,8 +6,6 @@ st.set_page_config(
|
|
6 |
layout="wide",
|
7 |
)
|
8 |
|
9 |
-
from utils.st_logs import parse_log_buffer, init_logging_session_states
|
10 |
-
|
11 |
from maps.obs_map import add_obs_map_header
|
12 |
from maps.alps_map import present_alps_map
|
13 |
from maps.obs_map import present_obs_map
|
@@ -16,10 +14,6 @@ from datasets import disable_caching
|
|
16 |
disable_caching()
|
17 |
|
18 |
############################################################
|
19 |
-
# TO- DO: MAKE ENV FILE
|
20 |
-
# the dataset of observations (hf dataset in our space)
|
21 |
-
dataset_id = "Saving-Willy/temp_dataset"
|
22 |
-
data_files = "data/train-00000-of-00001.parquet"
|
23 |
USE_BASIC_MAP = False
|
24 |
DEV_SIDEBAR_LIB = True
|
25 |
############################################################
|
@@ -35,10 +29,7 @@ with tab_map_ui_cols[1]:
|
|
35 |
|
36 |
if show_db_points:
|
37 |
# show a nicer map, observations marked, tileset selectable.
|
38 |
-
st_observation = present_obs_map(
|
39 |
-
dataset_id=dataset_id, data_files=data_files,
|
40 |
-
dbg_show_extra=dbg_show_extra)
|
41 |
-
|
42 |
else:
|
43 |
# development map.
|
44 |
st_observation = present_alps_map()
|
|
|
6 |
layout="wide",
|
7 |
)
|
8 |
|
|
|
|
|
9 |
from maps.obs_map import add_obs_map_header
|
10 |
from maps.alps_map import present_alps_map
|
11 |
from maps.obs_map import present_obs_map
|
|
|
14 |
disable_caching()
|
15 |
|
16 |
############################################################
|
|
|
|
|
|
|
|
|
17 |
USE_BASIC_MAP = False
|
18 |
DEV_SIDEBAR_LIB = True
|
19 |
############################################################
|
|
|
29 |
|
30 |
if show_db_points:
|
31 |
# show a nicer map, observations marked, tileset selectable.
|
32 |
+
st_observation = present_obs_map(dbg_show_extra=dbg_show_extra)
|
|
|
|
|
|
|
33 |
else:
|
34 |
# development map.
|
35 |
st_observation = present_alps_map()
|
src/pages/5_π€_requests.py
CHANGED
@@ -5,4 +5,13 @@ st.set_page_config(
|
|
5 |
page_icon="π€",
|
6 |
)
|
7 |
|
8 |
-
from utils.st_logs import parse_log_buffer, init_logging_session_states
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
page_icon="π€",
|
6 |
)
|
7 |
|
8 |
+
from utils.st_logs import parse_log_buffer, init_logging_session_states
|
9 |
+
|
10 |
+
from datasets import disable_caching
|
11 |
+
disable_caching()
|
12 |
+
|
13 |
+
############################################################
|
14 |
+
# the dataset of observations (hf dataset in our space)
|
15 |
+
dataset_id = "Saving-Willy/temp_dataset"
|
16 |
+
data_files = "data/train-00000-of-00001.parquet"
|
17 |
+
############################################################
|