Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

rmm commited on about 1 month ago

Commit

abf9c5d

1 Parent(s): 3920cb1

fix: added broad exception handling branch too, cleanup

- catching the known ValueError, but also arbitray errors with download
(the internal function is quite complex, potentially many errors that
could be raised, unknown)
- added refined test cases for ValueError and for arbitrary error.
- cleaned up obs_map code a little

Files changed (2) hide show

src/maps/obs_map.py +29 -18
tests/test_obs_map.py +20 -1

src/maps/obs_map.py CHANGED Viewed

@@ -66,6 +66,13 @@ _colors = [
 whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
 def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
     """
     Create a folium map with the specified tile layer
@@ -118,19 +125,37 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
     return m
 def try_download_dataset(dataset_id:str, data_files:str) -> dict:
-    # the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
-    # (one step at a time)
     m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
     t1 = time.time()
     try:
         metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
         t2 = time.time(); elap = t2 - t1
-    except Exception as e:
         t2 = time.time(); elap = t2 - t1
         msg = f"Error downloading dataset: {e}.  (after {elap:.2f}s)."
         st.error(msg)
         m_logger.error(msg)
         metadata = {}
     msg = f"Downloaded dataset: (after {elap:.2f}s). "
     m_logger.info(msg)
@@ -161,26 +186,12 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
     """
-    metadata_schema = {
-        'train': {
-            'latitude': 'list',
-            'longitude': 'list',
-            'predicted_class': 'list',
-        }
-    }
-    presentation_data_schema = {
-        'lat': 'float',
-        'lon': 'float',
-        'species': 'str',
-    }
     # load/download data from huggingface dataset
     metadata = try_download_dataset(dataset_id, data_files)
     if not metadata:
         # create an empty, but compliant dataframe
-        df0 = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
-        _df = df0
     else:
         # make a pandas df that is compliant with folium/streamlit maps
         _df = pd.DataFrame({

 whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
+presentation_data_schema = {
+    'lat': 'float',
+    'lon': 'float',
+    'species': 'str',
+}
 def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
     """
     Create a folium map with the specified tile layer
     return m
 def try_download_dataset(dataset_id:str, data_files:str) -> dict:
+    """
+    Attempts to download a dataset from Hugging Face, catching any errors that occur.
+    Args:
+        dataset_id (str): The ID of the dataset to download.
+        data_files (str): The data files associated with the dataset.
+    Returns:
+        dict: A dictionary containing the dataset metadata if the download is successful,
+              or an empty dictionary if an error occurs.
+    """
     m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
     t1 = time.time()
     try:
         metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
         t2 = time.time(); elap = t2 - t1
+    except ValueError as e:
         t2 = time.time(); elap = t2 - t1
         msg = f"Error downloading dataset: {e}.  (after {elap:.2f}s)."
         st.error(msg)
         m_logger.error(msg)
         metadata = {}
+    except Exception as e:
+        # catch all (other) exceptions and log them, handle them once isolated
+        t2 = time.time(); elap = t2 - t1
+        msg = f"!!Unknown Error!! downloading dataset: {e}.  (after {elap:.2f}s)."
+        st.error(msg)
+        m_logger.error(msg)
+        metadata = {}
     msg = f"Downloaded dataset: (after {elap:.2f}s). "
     m_logger.info(msg)
     """
     # load/download data from huggingface dataset
     metadata = try_download_dataset(dataset_id, data_files)
     if not metadata:
         # create an empty, but compliant dataframe
+        _df = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
     else:
         # make a pandas df that is compliant with folium/streamlit maps
         _df = pd.DataFrame({

tests/test_obs_map.py CHANGED Viewed

@@ -31,7 +31,8 @@ def test_try_download_dataset_success(mock_logger, mock_st, mock_load_dataset):
 @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
 @patch('maps.obs_map.st')
 @patch('maps.obs_map.m_logger')
-def test_try_download_dataset_failure(mock_logger, mock_st, mock_load_dataset):
     dataset_id = "test_dataset"
     data_files = "test_file"
     result = try_download_dataset(dataset_id, data_files)
@@ -44,3 +45,21 @@ def test_try_download_dataset_failure(mock_logger, mock_st, mock_load_dataset):
     assert result == {}
     mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
     mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")

 @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
 @patch('maps.obs_map.st')
 @patch('maps.obs_map.m_logger')
+def test_try_download_dataset_failure_known(mock_logger, mock_st, mock_load_dataset):
+    # testing the case where we've found (can reproduce by removing network connection)
     dataset_id = "test_dataset"
     data_files = "test_file"
     result = try_download_dataset(dataset_id, data_files)
     assert result == {}
     mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
     mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
+@patch('maps.obs_map.load_dataset', side_effect=Exception("Download engine corrupt"))
+@patch('maps.obs_map.st')
+@patch('maps.obs_map.m_logger')
+def test_try_download_dataset_failure_unknown(mock_logger, mock_st, mock_load_dataset):
+    # the cases we haven't found, but should still be handled (maybe network error, etc)
+    dataset_id = "test_dataset"
+    data_files = "test_file"
+    result = try_download_dataset(dataset_id, data_files)
+    # Assertions
+    mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
+    mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
+    mock_logger.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt.  (after 0.00s).")
+    mock_st.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt.  (after 0.00s).")
+    assert result == {}
+    mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
+    mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")