vancauwe commited on
Commit
d28c512
·
unverified ·
2 Parent(s): 8209004 abf9c5d

Merge pull request #39 from sdsc-ordes/fix/nofail-on-missing-ext-resource

Browse files
Files changed (2) hide show
  1. src/maps/obs_map.py +60 -7
  2. tests/test_obs_map.py +65 -0
src/maps/obs_map.py CHANGED
@@ -3,6 +3,10 @@ import logging
3
 
4
  import pandas as pd
5
  from datasets import load_dataset
 
 
 
 
6
  import streamlit as st
7
  import folium
8
  from streamlit_folium import st_folium
@@ -62,6 +66,13 @@ _colors = [
62
 
63
  whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
64
 
 
 
 
 
 
 
 
65
  def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
66
  """
67
  Create a folium map with the specified tile layer
@@ -113,6 +124,43 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
113
  #folium.LayerControl().add_to(m)
114
  return m
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
 
118
  def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
@@ -139,14 +187,19 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
139
  """
140
 
141
  # load/download data from huggingface dataset
142
- metadata = load_dataset(dataset_id, data_files=data_files)
143
 
144
- # make a pandas df that is compliant with folium/streamlit maps
145
- _df = pd.DataFrame({
146
- 'lat': metadata["train"]["latitude"],
147
- 'lon': metadata["train"]["longitude"],
148
- 'species': metadata["train"]["predicted_class"],}
149
- )
 
 
 
 
 
150
  if dbg_show_extra:
151
  # add a few samples to visualise colours
152
  _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}
 
3
 
4
  import pandas as pd
5
  from datasets import load_dataset
6
+ from datasets import DatasetDict, Dataset
7
+
8
+ import time
9
+
10
  import streamlit as st
11
  import folium
12
  from streamlit_folium import st_folium
 
66
 
67
  whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
68
 
69
+ presentation_data_schema = {
70
+ 'lat': 'float',
71
+ 'lon': 'float',
72
+ 'species': 'str',
73
+ }
74
+
75
+
76
  def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
77
  """
78
  Create a folium map with the specified tile layer
 
124
  #folium.LayerControl().add_to(m)
125
  return m
126
 
127
+ def try_download_dataset(dataset_id:str, data_files:str) -> dict:
128
+ """
129
+ Attempts to download a dataset from Hugging Face, catching any errors that occur.
130
+
131
+ Args:
132
+ dataset_id (str): The ID of the dataset to download.
133
+ data_files (str): The data files associated with the dataset.
134
+ Returns:
135
+ dict: A dictionary containing the dataset metadata if the download is successful,
136
+ or an empty dictionary if an error occurs.
137
+
138
+ """
139
+
140
+ m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
141
+ t1 = time.time()
142
+ try:
143
+ metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
144
+ t2 = time.time(); elap = t2 - t1
145
+ except ValueError as e:
146
+ t2 = time.time(); elap = t2 - t1
147
+ msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)."
148
+ st.error(msg)
149
+ m_logger.error(msg)
150
+ metadata = {}
151
+ except Exception as e:
152
+ # catch all (other) exceptions and log them, handle them once isolated
153
+ t2 = time.time(); elap = t2 - t1
154
+ msg = f"!!Unknown Error!! downloading dataset: {e}. (after {elap:.2f}s)."
155
+ st.error(msg)
156
+ m_logger.error(msg)
157
+ metadata = {}
158
+
159
+
160
+ msg = f"Downloaded dataset: (after {elap:.2f}s). "
161
+ m_logger.info(msg)
162
+ st.write(msg)
163
+ return metadata
164
 
165
 
166
  def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
 
187
  """
188
 
189
  # load/download data from huggingface dataset
190
+ metadata = try_download_dataset(dataset_id, data_files)
191
 
192
+ if not metadata:
193
+ # create an empty, but compliant dataframe
194
+ _df = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
195
+ else:
196
+ # make a pandas df that is compliant with folium/streamlit maps
197
+ _df = pd.DataFrame({
198
+ 'lat': metadata["train"]["latitude"],
199
+ 'lon': metadata["train"]["longitude"],
200
+ 'species': metadata["train"]["predicted_class"],}
201
+ )
202
+
203
  if dbg_show_extra:
204
  # add a few samples to visualise colours
205
  _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}
tests/test_obs_map.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from unittest.mock import patch, MagicMock
3
+ from maps.obs_map import try_download_dataset
4
+
5
+ # tests for try_download_dataset
6
+ # - the main aim here is to mock the function load_dataset which makes external HTTP requests,
7
+ # and follow the successful and failing pathways.
8
+ # - tests templates generated with copilot, they test the text/messages too; the core
9
+ # is the return value, which should have similar form but change according to if an exception was raised or not
10
+ # since this function uses st and m_logger to keep track of the download status, we need to mock them too
11
+
12
+ @patch('maps.obs_map.load_dataset')
13
+ @patch('maps.obs_map.st')
14
+ @patch('maps.obs_map.m_logger')
15
+ def test_try_download_dataset_success(mock_logger, mock_st, mock_load_dataset):
16
+ # Mock the return value of load_dataset
17
+ mock_load_dataset.return_value = {'train': {'latitude': [1], 'longitude': [2], 'predicted_class': ['whale']}}
18
+
19
+ dataset_id = "test_dataset"
20
+ data_files = "test_file"
21
+ result = try_download_dataset(dataset_id, data_files)
22
+
23
+ # Assertions
24
+ mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
25
+ mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
26
+ assert result == {'train': {'latitude': [1], 'longitude': [2], 'predicted_class': ['whale']}}
27
+ mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
28
+ mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
29
+
30
+
31
+ @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
32
+ @patch('maps.obs_map.st')
33
+ @patch('maps.obs_map.m_logger')
34
+ def test_try_download_dataset_failure_known(mock_logger, mock_st, mock_load_dataset):
35
+ # testing the case where we've found (can reproduce by removing network connection)
36
+ dataset_id = "test_dataset"
37
+ data_files = "test_file"
38
+ result = try_download_dataset(dataset_id, data_files)
39
+
40
+ # Assertions
41
+ mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
42
+ mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
43
+ mock_logger.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s).")
44
+ mock_st.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s).")
45
+ assert result == {}
46
+ mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
47
+ mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
48
+
49
+ @patch('maps.obs_map.load_dataset', side_effect=Exception("Download engine corrupt"))
50
+ @patch('maps.obs_map.st')
51
+ @patch('maps.obs_map.m_logger')
52
+ def test_try_download_dataset_failure_unknown(mock_logger, mock_st, mock_load_dataset):
53
+ # the cases we haven't found, but should still be handled (maybe network error, etc)
54
+ dataset_id = "test_dataset"
55
+ data_files = "test_file"
56
+ result = try_download_dataset(dataset_id, data_files)
57
+
58
+ # Assertions
59
+ mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
60
+ mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
61
+ mock_logger.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt. (after 0.00s).")
62
+ mock_st.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt. (after 0.00s).")
63
+ assert result == {}
64
+ mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
65
+ mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")