Spaces:
Sleeping
Sleeping
rmm
commited on
Commit
·
aba889f
1
Parent(s):
821ac40
fix: catch failing external resouce: huggingface dataset
Browse files- this version includes a data mock, since downstream processing
assumes everything worked. one step at a time
- src/maps/obs_map.py +32 -1
src/maps/obs_map.py
CHANGED
@@ -3,6 +3,10 @@ import logging
|
|
3 |
|
4 |
import pandas as pd
|
5 |
from datasets import load_dataset
|
|
|
|
|
|
|
|
|
6 |
import streamlit as st
|
7 |
import folium
|
8 |
from streamlit_folium import st_folium
|
@@ -113,6 +117,33 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
|
|
113 |
#folium.LayerControl().add_to(m)
|
114 |
return m
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
|
118 |
def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
|
@@ -139,7 +170,7 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
|
|
139 |
"""
|
140 |
|
141 |
# load/download data from huggingface dataset
|
142 |
-
metadata =
|
143 |
|
144 |
# make a pandas df that is compliant with folium/streamlit maps
|
145 |
_df = pd.DataFrame({
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
from datasets import load_dataset
|
6 |
+
from datasets import DatasetDict, Dataset
|
7 |
+
|
8 |
+
import time
|
9 |
+
|
10 |
import streamlit as st
|
11 |
import folium
|
12 |
from streamlit_folium import st_folium
|
|
|
117 |
#folium.LayerControl().add_to(m)
|
118 |
return m
|
119 |
|
120 |
+
def try_download_dataset(dataset_id:str, data_files:str, mockdata_on_failure:bool=False) -> dict:
|
121 |
+
# the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
|
122 |
+
# (one step at a time)
|
123 |
+
t1 = time.time()
|
124 |
+
try:
|
125 |
+
m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
|
126 |
+
metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
|
127 |
+
t2 = time.time(); elap = t2 - t1
|
128 |
+
except Exception as e:
|
129 |
+
t2 = time.time(); elap = t2 - t1
|
130 |
+
msg = f"Error downloading dataset: {e}. (after {elap:.2f}s) Using mock data to continue"
|
131 |
+
st.error(msg)
|
132 |
+
m_logger.error(msg)
|
133 |
+
if mockdata_on_failure:
|
134 |
+
metadata = {'train':
|
135 |
+
{'latitude': [0],
|
136 |
+
'longitude': [0],
|
137 |
+
'predicted_class': ['rough_toothed_dolphin']}
|
138 |
+
}
|
139 |
+
|
140 |
+
else:
|
141 |
+
metadata = {}
|
142 |
+
|
143 |
+
msg = f"Downloaded dataset: (after {elap:.2f}s). "
|
144 |
+
m_logger.info(msg)
|
145 |
+
st.write(msg)
|
146 |
+
return metadata
|
147 |
|
148 |
|
149 |
def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
|
|
|
170 |
"""
|
171 |
|
172 |
# load/download data from huggingface dataset
|
173 |
+
metadata = try_download_dataset(dataset_id, data_files, mockdata_on_failure=True)
|
174 |
|
175 |
# make a pandas df that is compliant with folium/streamlit maps
|
176 |
_df = pd.DataFrame({
|