Spaces:
Running
Running
rmm
commited on
Commit
·
d219dea
1
Parent(s):
ba0dee1
fix: simplified try_download_dataset
Browse files- removed assumptions about dataset format
- removed one argument, only one main and one failing pathway
- src/maps/obs_map.py +30 -19
- tests/test_obs_map.py +5 -22
src/maps/obs_map.py
CHANGED
@@ -117,28 +117,20 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
|
|
117 |
#folium.LayerControl().add_to(m)
|
118 |
return m
|
119 |
|
120 |
-
def try_download_dataset(dataset_id:str, data_files:str
|
121 |
# the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
|
122 |
# (one step at a time)
|
|
|
123 |
t1 = time.time()
|
124 |
try:
|
125 |
-
m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
|
126 |
metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
|
127 |
t2 = time.time(); elap = t2 - t1
|
128 |
except Exception as e:
|
129 |
t2 = time.time(); elap = t2 - t1
|
130 |
-
msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)
|
131 |
st.error(msg)
|
132 |
m_logger.error(msg)
|
133 |
-
|
134 |
-
metadata = {'train':
|
135 |
-
{'latitude': [0],
|
136 |
-
'longitude': [0],
|
137 |
-
'predicted_class': ['rough_toothed_dolphin']}
|
138 |
-
}
|
139 |
-
|
140 |
-
else:
|
141 |
-
metadata = {}
|
142 |
|
143 |
msg = f"Downloaded dataset: (after {elap:.2f}s). "
|
144 |
m_logger.info(msg)
|
@@ -169,15 +161,34 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
|
|
169 |
|
170 |
"""
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
# load/download data from huggingface dataset
|
173 |
-
metadata = try_download_dataset(dataset_id, data_files
|
174 |
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
if dbg_show_extra:
|
182 |
# add a few samples to visualise colours
|
183 |
_df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}
|
|
|
117 |
#folium.LayerControl().add_to(m)
|
118 |
return m
|
119 |
|
120 |
+
def try_download_dataset(dataset_id:str, data_files:str) -> dict:
|
121 |
# the `mockdata_on_failure` generates a minimal compliant dataset if the download fails
|
122 |
# (one step at a time)
|
123 |
+
m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
|
124 |
t1 = time.time()
|
125 |
try:
|
|
|
126 |
metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
|
127 |
t2 = time.time(); elap = t2 - t1
|
128 |
except Exception as e:
|
129 |
t2 = time.time(); elap = t2 - t1
|
130 |
+
msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)."
|
131 |
st.error(msg)
|
132 |
m_logger.error(msg)
|
133 |
+
metadata = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
msg = f"Downloaded dataset: (after {elap:.2f}s). "
|
136 |
m_logger.info(msg)
|
|
|
161 |
|
162 |
"""
|
163 |
|
164 |
+
metadata_schema = {
|
165 |
+
'train': {
|
166 |
+
'latitude': 'list',
|
167 |
+
'longitude': 'list',
|
168 |
+
'predicted_class': 'list',
|
169 |
+
}
|
170 |
+
}
|
171 |
+
presentation_data_schema = {
|
172 |
+
'lat': 'float',
|
173 |
+
'lon': 'float',
|
174 |
+
'species': 'str',
|
175 |
+
}
|
176 |
+
|
177 |
# load/download data from huggingface dataset
|
178 |
+
metadata = try_download_dataset(dataset_id, data_files)
|
179 |
|
180 |
+
if not metadata:
|
181 |
+
# create an empty, but compliant dataframe
|
182 |
+
df0 = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
|
183 |
+
_df = df0
|
184 |
+
else:
|
185 |
+
# make a pandas df that is compliant with folium/streamlit maps
|
186 |
+
_df = pd.DataFrame({
|
187 |
+
'lat': metadata["train"]["latitude"],
|
188 |
+
'lon': metadata["train"]["longitude"],
|
189 |
+
'species': metadata["train"]["predicted_class"],}
|
190 |
+
)
|
191 |
+
|
192 |
if dbg_show_extra:
|
193 |
# add a few samples to visualise colours
|
194 |
_df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}
|
tests/test_obs_map.py
CHANGED
@@ -31,33 +31,16 @@ def test_try_download_dataset_success(mock_logger, mock_st, mock_load_dataset):
|
|
31 |
@patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
|
32 |
@patch('maps.obs_map.st')
|
33 |
@patch('maps.obs_map.m_logger')
|
34 |
-
def
|
35 |
dataset_id = "test_dataset"
|
36 |
data_files = "test_file"
|
37 |
-
result = try_download_dataset(dataset_id, data_files
|
38 |
-
|
39 |
-
# Assertions
|
40 |
-
mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
|
41 |
-
mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
|
42 |
-
mock_logger.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s) Using mock data to continue")
|
43 |
-
mock_st.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s) Using mock data to continue")
|
44 |
-
assert result == {'train': {'latitude': [0], 'longitude': [0], 'predicted_class': ['rough_toothed_dolphin']}}
|
45 |
-
mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
|
46 |
-
mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
|
47 |
-
|
48 |
-
@patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
|
49 |
-
@patch('maps.obs_map.st')
|
50 |
-
@patch('maps.obs_map.m_logger')
|
51 |
-
def test_try_download_dataset_failure_without_mockdata(mock_logger, mock_st, mock_load_dataset):
|
52 |
-
dataset_id = "test_dataset"
|
53 |
-
data_files = "test_file"
|
54 |
-
result = try_download_dataset(dataset_id, data_files, mockdata_on_failure=False)
|
55 |
|
56 |
# Assertions
|
57 |
mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
|
58 |
mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
|
59 |
-
mock_logger.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s)
|
60 |
-
mock_st.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s)
|
61 |
assert result == {}
|
62 |
mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
|
63 |
-
mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
|
|
|
31 |
@patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
|
32 |
@patch('maps.obs_map.st')
|
33 |
@patch('maps.obs_map.m_logger')
|
34 |
+
def test_try_download_dataset_failure(mock_logger, mock_st, mock_load_dataset):
|
35 |
dataset_id = "test_dataset"
|
36 |
data_files = "test_file"
|
37 |
+
result = try_download_dataset(dataset_id, data_files)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
# Assertions
|
40 |
mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
|
41 |
mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
|
42 |
+
mock_logger.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s).")
|
43 |
+
mock_st.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s).")
|
44 |
assert result == {}
|
45 |
mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
|
46 |
+
mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
|