Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

App Files Files Community

rmm commited on Mar 8

Commit

d219dea

1 Parent(s): ba0dee1

fix: simplified try_download_dataset

Browse files

- removed assumptions about dataset format
- removed one argument, only one main and one failing pathway

Files changed (2) hide show

src/maps/obs_map.py +30 -19
tests/test_obs_map.py +5 -22

src/maps/obs_map.py CHANGED Viewed

@@ -117,28 +117,20 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
     #folium.LayerControl().add_to(m)
     return m
-def try_download_dataset(dataset_id:str, data_files:str, mockdata_on_failure:bool=False) -> dict:
     # the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
     # (one step at a time)
     t1 = time.time()
     try:
-        m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
         metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
         t2 = time.time(); elap = t2 - t1
     except Exception as e:
         t2 = time.time(); elap = t2 - t1
-        msg = f"Error downloading dataset: {e}.  (after {elap:.2f}s) Using mock data to continue"
         st.error(msg)
         m_logger.error(msg)
-        if mockdata_on_failure:
-            metadata = {'train':
-                         {'latitude': [0],
-                          'longitude': [0],
-                          'predicted_class': ['rough_toothed_dolphin']}
-                       }
-        else:
-            metadata = {}
     msg = f"Downloaded dataset: (after {elap:.2f}s). "
     m_logger.info(msg)
@@ -169,15 +161,34 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
     """
     # load/download data from huggingface dataset
-    metadata = try_download_dataset(dataset_id, data_files, mockdata_on_failure=True)
-    # make a pandas df that is compliant with folium/streamlit maps
-    _df = pd.DataFrame({
-        'lat': metadata["train"]["latitude"],
-        'lon': metadata["train"]["longitude"],
-        'species': metadata["train"]["predicted_class"],}
-    )
     if dbg_show_extra:
         # add a few samples to visualise colours
         _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}

     #folium.LayerControl().add_to(m)
     return m
+def try_download_dataset(dataset_id:str, data_files:str) -> dict:
     # the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
     # (one step at a time)
+    m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
     t1 = time.time()
     try:
         metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
         t2 = time.time(); elap = t2 - t1
     except Exception as e:
         t2 = time.time(); elap = t2 - t1
+        msg = f"Error downloading dataset: {e}.  (after {elap:.2f}s)."
         st.error(msg)
         m_logger.error(msg)
+        metadata = {}
     msg = f"Downloaded dataset: (after {elap:.2f}s). "
     m_logger.info(msg)
     """
+    metadata_schema = {
+        'train': {
+            'latitude': 'list',
+            'longitude': 'list',
+            'predicted_class': 'list',
+        }
+    }
+    presentation_data_schema = {
+        'lat': 'float',
+        'lon': 'float',
+        'species': 'str',
+    }
     # load/download data from huggingface dataset
+    metadata = try_download_dataset(dataset_id, data_files)
+    if not metadata:
+        # create an empty, but compliant dataframe
+        df0 = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
+        _df = df0
+    else:
+        # make a pandas df that is compliant with folium/streamlit maps
+        _df = pd.DataFrame({
+            'lat': metadata["train"]["latitude"],
+            'lon': metadata["train"]["longitude"],
+            'species': metadata["train"]["predicted_class"],}
+        )
     if dbg_show_extra:
         # add a few samples to visualise colours
         _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}

tests/test_obs_map.py CHANGED Viewed

@@ -31,33 +31,16 @@ def test_try_download_dataset_success(mock_logger, mock_st, mock_load_dataset):
 @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
 @patch('maps.obs_map.st')
 @patch('maps.obs_map.m_logger')
-def test_try_download_dataset_failure_with_mockdata(mock_logger, mock_st, mock_load_dataset):
     dataset_id = "test_dataset"
     data_files = "test_file"
-    result = try_download_dataset(dataset_id, data_files, mockdata_on_failure=True)
-    # Assertions
-    mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
-    mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
-    mock_logger.error.assert_called_with("Error downloading dataset: Download failed.  (after 0.00s) Using mock data to continue")
-    mock_st.error.assert_called_with("Error downloading dataset: Download failed.  (after 0.00s) Using mock data to continue")
-    assert result == {'train': {'latitude': [0], 'longitude': [0], 'predicted_class': ['rough_toothed_dolphin']}}
-    mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
-    mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
-@patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
-@patch('maps.obs_map.st')
-@patch('maps.obs_map.m_logger')
-def test_try_download_dataset_failure_without_mockdata(mock_logger, mock_st, mock_load_dataset):
-    dataset_id = "test_dataset"
-    data_files = "test_file"
-    result = try_download_dataset(dataset_id, data_files, mockdata_on_failure=False)
     # Assertions
     mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
     mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
-    mock_logger.error.assert_called_with("Error downloading dataset: Download failed.  (after 0.00s) Using mock data to continue")
-    mock_st.error.assert_called_with("Error downloading dataset: Download failed.  (after 0.00s) Using mock data to continue")
     assert result == {}
     mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
-    mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")

 @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
 @patch('maps.obs_map.st')
 @patch('maps.obs_map.m_logger')
+def test_try_download_dataset_failure(mock_logger, mock_st, mock_load_dataset):
     dataset_id = "test_dataset"
     data_files = "test_file"
+    result = try_download_dataset(dataset_id, data_files)
     # Assertions
     mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
     mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
+    mock_logger.error.assert_called_with("Error downloading dataset: Download failed.  (after 0.00s).")
+    mock_st.error.assert_called_with("Error downloading dataset: Download failed.  (after 0.00s).")
     assert result == {}
     mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
+    mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")