rmm commited on
Commit
d219dea
·
1 Parent(s): ba0dee1

fix: simplified try_download_dataset

Browse files

- removed assumptions about dataset format
- removed one argument, only one main and one failing pathway

Files changed (2) hide show
  1. src/maps/obs_map.py +30 -19
  2. tests/test_obs_map.py +5 -22
src/maps/obs_map.py CHANGED
@@ -117,28 +117,20 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
117
  #folium.LayerControl().add_to(m)
118
  return m
119
 
120
- def try_download_dataset(dataset_id:str, data_files:str, mockdata_on_failure:bool=False) -> dict:
121
  # the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
122
  # (one step at a time)
 
123
  t1 = time.time()
124
  try:
125
- m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
126
  metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
127
  t2 = time.time(); elap = t2 - t1
128
  except Exception as e:
129
  t2 = time.time(); elap = t2 - t1
130
- msg = f"Error downloading dataset: {e}. (after {elap:.2f}s) Using mock data to continue"
131
  st.error(msg)
132
  m_logger.error(msg)
133
- if mockdata_on_failure:
134
- metadata = {'train':
135
- {'latitude': [0],
136
- 'longitude': [0],
137
- 'predicted_class': ['rough_toothed_dolphin']}
138
- }
139
-
140
- else:
141
- metadata = {}
142
 
143
  msg = f"Downloaded dataset: (after {elap:.2f}s). "
144
  m_logger.info(msg)
@@ -169,15 +161,34 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
169
 
170
  """
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  # load/download data from huggingface dataset
173
- metadata = try_download_dataset(dataset_id, data_files, mockdata_on_failure=True)
174
 
175
- # make a pandas df that is compliant with folium/streamlit maps
176
- _df = pd.DataFrame({
177
- 'lat': metadata["train"]["latitude"],
178
- 'lon': metadata["train"]["longitude"],
179
- 'species': metadata["train"]["predicted_class"],}
180
- )
 
 
 
 
 
 
181
  if dbg_show_extra:
182
  # add a few samples to visualise colours
183
  _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}
 
117
  #folium.LayerControl().add_to(m)
118
  return m
119
 
120
+ def try_download_dataset(dataset_id:str, data_files:str) -> dict:
121
  # the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
122
  # (one step at a time)
123
+ m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
124
  t1 = time.time()
125
  try:
 
126
  metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
127
  t2 = time.time(); elap = t2 - t1
128
  except Exception as e:
129
  t2 = time.time(); elap = t2 - t1
130
+ msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)."
131
  st.error(msg)
132
  m_logger.error(msg)
133
+ metadata = {}
 
 
 
 
 
 
 
 
134
 
135
  msg = f"Downloaded dataset: (after {elap:.2f}s). "
136
  m_logger.info(msg)
 
161
 
162
  """
163
 
164
+ metadata_schema = {
165
+ 'train': {
166
+ 'latitude': 'list',
167
+ 'longitude': 'list',
168
+ 'predicted_class': 'list',
169
+ }
170
+ }
171
+ presentation_data_schema = {
172
+ 'lat': 'float',
173
+ 'lon': 'float',
174
+ 'species': 'str',
175
+ }
176
+
177
  # load/download data from huggingface dataset
178
+ metadata = try_download_dataset(dataset_id, data_files)
179
 
180
+ if not metadata:
181
+ # create an empty, but compliant dataframe
182
+ df0 = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
183
+ _df = df0
184
+ else:
185
+ # make a pandas df that is compliant with folium/streamlit maps
186
+ _df = pd.DataFrame({
187
+ 'lat': metadata["train"]["latitude"],
188
+ 'lon': metadata["train"]["longitude"],
189
+ 'species': metadata["train"]["predicted_class"],}
190
+ )
191
+
192
  if dbg_show_extra:
193
  # add a few samples to visualise colours
194
  _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}
tests/test_obs_map.py CHANGED
@@ -31,33 +31,16 @@ def test_try_download_dataset_success(mock_logger, mock_st, mock_load_dataset):
31
  @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
32
  @patch('maps.obs_map.st')
33
  @patch('maps.obs_map.m_logger')
34
- def test_try_download_dataset_failure_with_mockdata(mock_logger, mock_st, mock_load_dataset):
35
  dataset_id = "test_dataset"
36
  data_files = "test_file"
37
- result = try_download_dataset(dataset_id, data_files, mockdata_on_failure=True)
38
-
39
- # Assertions
40
- mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
41
- mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
42
- mock_logger.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s) Using mock data to continue")
43
- mock_st.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s) Using mock data to continue")
44
- assert result == {'train': {'latitude': [0], 'longitude': [0], 'predicted_class': ['rough_toothed_dolphin']}}
45
- mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
46
- mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
47
-
48
- @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
49
- @patch('maps.obs_map.st')
50
- @patch('maps.obs_map.m_logger')
51
- def test_try_download_dataset_failure_without_mockdata(mock_logger, mock_st, mock_load_dataset):
52
- dataset_id = "test_dataset"
53
- data_files = "test_file"
54
- result = try_download_dataset(dataset_id, data_files, mockdata_on_failure=False)
55
 
56
  # Assertions
57
  mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
58
  mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
59
- mock_logger.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s) Using mock data to continue")
60
- mock_st.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s) Using mock data to continue")
61
  assert result == {}
62
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
63
- mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
 
31
  @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
32
  @patch('maps.obs_map.st')
33
  @patch('maps.obs_map.m_logger')
34
+ def test_try_download_dataset_failure(mock_logger, mock_st, mock_load_dataset):
35
  dataset_id = "test_dataset"
36
  data_files = "test_file"
37
+ result = try_download_dataset(dataset_id, data_files)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # Assertions
40
  mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
41
  mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
42
+ mock_logger.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s).")
43
+ mock_st.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s).")
44
  assert result == {}
45
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
46
+ mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")