rmm commited on
Commit
abf9c5d
·
1 Parent(s): 3920cb1

fix: added broad exception handling branch too, cleanup

Browse files

- catching the known ValueError, but also arbitray errors with download
(the internal function is quite complex, potentially many errors that
could be raised, unknown)
- added refined test cases for ValueError and for arbitrary error.
- cleaned up obs_map code a little

Files changed (2) hide show
  1. src/maps/obs_map.py +29 -18
  2. tests/test_obs_map.py +20 -1
src/maps/obs_map.py CHANGED
@@ -66,6 +66,13 @@ _colors = [
66
 
67
  whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
68
 
 
 
 
 
 
 
 
69
  def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
70
  """
71
  Create a folium map with the specified tile layer
@@ -118,19 +125,37 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
118
  return m
119
 
120
  def try_download_dataset(dataset_id:str, data_files:str) -> dict:
121
- # the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
122
- # (one step at a time)
 
 
 
 
 
 
 
 
 
 
123
  m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
124
  t1 = time.time()
125
  try:
126
  metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
127
  t2 = time.time(); elap = t2 - t1
128
- except Exception as e:
129
  t2 = time.time(); elap = t2 - t1
130
  msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)."
131
  st.error(msg)
132
  m_logger.error(msg)
133
  metadata = {}
 
 
 
 
 
 
 
 
134
 
135
  msg = f"Downloaded dataset: (after {elap:.2f}s). "
136
  m_logger.info(msg)
@@ -161,26 +186,12 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
161
 
162
  """
163
 
164
- metadata_schema = {
165
- 'train': {
166
- 'latitude': 'list',
167
- 'longitude': 'list',
168
- 'predicted_class': 'list',
169
- }
170
- }
171
- presentation_data_schema = {
172
- 'lat': 'float',
173
- 'lon': 'float',
174
- 'species': 'str',
175
- }
176
-
177
  # load/download data from huggingface dataset
178
  metadata = try_download_dataset(dataset_id, data_files)
179
 
180
  if not metadata:
181
  # create an empty, but compliant dataframe
182
- df0 = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
183
- _df = df0
184
  else:
185
  # make a pandas df that is compliant with folium/streamlit maps
186
  _df = pd.DataFrame({
 
66
 
67
  whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
68
 
69
+ presentation_data_schema = {
70
+ 'lat': 'float',
71
+ 'lon': 'float',
72
+ 'species': 'str',
73
+ }
74
+
75
+
76
  def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
77
  """
78
  Create a folium map with the specified tile layer
 
125
  return m
126
 
127
  def try_download_dataset(dataset_id:str, data_files:str) -> dict:
128
+ """
129
+ Attempts to download a dataset from Hugging Face, catching any errors that occur.
130
+
131
+ Args:
132
+ dataset_id (str): The ID of the dataset to download.
133
+ data_files (str): The data files associated with the dataset.
134
+ Returns:
135
+ dict: A dictionary containing the dataset metadata if the download is successful,
136
+ or an empty dictionary if an error occurs.
137
+
138
+ """
139
+
140
  m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
141
  t1 = time.time()
142
  try:
143
  metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
144
  t2 = time.time(); elap = t2 - t1
145
+ except ValueError as e:
146
  t2 = time.time(); elap = t2 - t1
147
  msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)."
148
  st.error(msg)
149
  m_logger.error(msg)
150
  metadata = {}
151
+ except Exception as e:
152
+ # catch all (other) exceptions and log them, handle them once isolated
153
+ t2 = time.time(); elap = t2 - t1
154
+ msg = f"!!Unknown Error!! downloading dataset: {e}. (after {elap:.2f}s)."
155
+ st.error(msg)
156
+ m_logger.error(msg)
157
+ metadata = {}
158
+
159
 
160
  msg = f"Downloaded dataset: (after {elap:.2f}s). "
161
  m_logger.info(msg)
 
186
 
187
  """
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  # load/download data from huggingface dataset
190
  metadata = try_download_dataset(dataset_id, data_files)
191
 
192
  if not metadata:
193
  # create an empty, but compliant dataframe
194
+ _df = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
 
195
  else:
196
  # make a pandas df that is compliant with folium/streamlit maps
197
  _df = pd.DataFrame({
tests/test_obs_map.py CHANGED
@@ -31,7 +31,8 @@ def test_try_download_dataset_success(mock_logger, mock_st, mock_load_dataset):
31
  @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
32
  @patch('maps.obs_map.st')
33
  @patch('maps.obs_map.m_logger')
34
- def test_try_download_dataset_failure(mock_logger, mock_st, mock_load_dataset):
 
35
  dataset_id = "test_dataset"
36
  data_files = "test_file"
37
  result = try_download_dataset(dataset_id, data_files)
@@ -44,3 +45,21 @@ def test_try_download_dataset_failure(mock_logger, mock_st, mock_load_dataset):
44
  assert result == {}
45
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
46
  mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
32
  @patch('maps.obs_map.st')
33
  @patch('maps.obs_map.m_logger')
34
+ def test_try_download_dataset_failure_known(mock_logger, mock_st, mock_load_dataset):
35
+ # testing the case where we've found (can reproduce by removing network connection)
36
  dataset_id = "test_dataset"
37
  data_files = "test_file"
38
  result = try_download_dataset(dataset_id, data_files)
 
45
  assert result == {}
46
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
47
  mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
48
+
49
+ @patch('maps.obs_map.load_dataset', side_effect=Exception("Download engine corrupt"))
50
+ @patch('maps.obs_map.st')
51
+ @patch('maps.obs_map.m_logger')
52
+ def test_try_download_dataset_failure_unknown(mock_logger, mock_st, mock_load_dataset):
53
+ # the cases we haven't found, but should still be handled (maybe network error, etc)
54
+ dataset_id = "test_dataset"
55
+ data_files = "test_file"
56
+ result = try_download_dataset(dataset_id, data_files)
57
+
58
+ # Assertions
59
+ mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
60
+ mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
61
+ mock_logger.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt. (after 0.00s).")
62
+ mock_st.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt. (after 0.00s).")
63
+ assert result == {}
64
+ mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
65
+ mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")