rmm commited on
Commit
aba889f
·
1 Parent(s): 821ac40

fix: catch failing external resouce: huggingface dataset

Browse files

- this version includes a data mock, since downstream processing
assumes everything worked. one step at a time

Files changed (1) hide show
  1. src/maps/obs_map.py +32 -1
src/maps/obs_map.py CHANGED
@@ -3,6 +3,10 @@ import logging
3
 
4
  import pandas as pd
5
  from datasets import load_dataset
 
 
 
 
6
  import streamlit as st
7
  import folium
8
  from streamlit_folium import st_folium
@@ -113,6 +117,33 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
113
  #folium.LayerControl().add_to(m)
114
  return m
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
 
118
  def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
@@ -139,7 +170,7 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
139
  """
140
 
141
  # load/download data from huggingface dataset
142
- metadata = load_dataset(dataset_id, data_files=data_files)
143
 
144
  # make a pandas df that is compliant with folium/streamlit maps
145
  _df = pd.DataFrame({
 
3
 
4
  import pandas as pd
5
  from datasets import load_dataset
6
+ from datasets import DatasetDict, Dataset
7
+
8
+ import time
9
+
10
  import streamlit as st
11
  import folium
12
  from streamlit_folium import st_folium
 
117
  #folium.LayerControl().add_to(m)
118
  return m
119
 
120
+ def try_download_dataset(dataset_id:str, data_files:str, mockdata_on_failure:bool=False) -> dict:
121
+ # the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
122
+ # (one step at a time)
123
+ t1 = time.time()
124
+ try:
125
+ m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
126
+ metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
127
+ t2 = time.time(); elap = t2 - t1
128
+ except Exception as e:
129
+ t2 = time.time(); elap = t2 - t1
130
+ msg = f"Error downloading dataset: {e}. (after {elap:.2f}s) Using mock data to continue"
131
+ st.error(msg)
132
+ m_logger.error(msg)
133
+ if mockdata_on_failure:
134
+ metadata = {'train':
135
+ {'latitude': [0],
136
+ 'longitude': [0],
137
+ 'predicted_class': ['rough_toothed_dolphin']}
138
+ }
139
+
140
+ else:
141
+ metadata = {}
142
+
143
+ msg = f"Downloaded dataset: (after {elap:.2f}s). "
144
+ m_logger.info(msg)
145
+ st.write(msg)
146
+ return metadata
147
 
148
 
149
  def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
 
170
  """
171
 
172
  # load/download data from huggingface dataset
173
+ metadata = try_download_dataset(dataset_id, data_files, mockdata_on_failure=True)
174
 
175
  # make a pandas df that is compliant with folium/streamlit maps
176
  _df = pd.DataFrame({