vancauwe commited on
Commit
2ae74ed
Β·
unverified Β·
2 Parent(s): fe38b4f 68f1407

Merge pull request #41 from sdsc-ordes/feat/multipage

Browse files
Files changed (47) hide show
  1. .streamlit/config.toml +6 -0
  2. README.md +2 -2
  3. docs/{hotdog.md β†’ classifier_hotdog.md} +0 -0
  4. docs/dataset_cleaner.md +3 -0
  5. docs/dataset_download.md +3 -0
  6. docs/dataset_fake_data.md +3 -0
  7. docs/{hf_push_observations.md β†’ dataset_hf_push_observations.md} +1 -1
  8. docs/dataset_requests.md +3 -0
  9. docs/{main.md β†’ home.md} +1 -1
  10. docs/pages.md +12 -0
  11. docs/release_protocol.md +32 -0
  12. docs/{fix_tabrender.md β†’ utils_fix_tabrender.md} +0 -0
  13. docs/{grid_maker.md β†’ utils_grid_maker.md} +0 -0
  14. docs/{metadata_handler.md β†’ utils_metadata_handler.md} +0 -0
  15. mkdocs.yaml +20 -18
  16. requirements.txt +8 -0
  17. src/apptest/demo_input_sidebar.py +2 -0
  18. src/classifier/classifier_image.py +2 -95
  19. docs/index.md β†’ src/dataset/__init__.py +0 -0
  20. src/dataset/cleaner.py +30 -0
  21. src/dataset/data_requests.py +72 -0
  22. src/dataset/download.py +87 -0
  23. src/dataset/fake_data.py +49 -0
  24. src/{hf_push_observations.py β†’ dataset/hf_push_observations.py} +3 -48
  25. src/home.py +84 -0
  26. src/images/design/challenge1.png +3 -0
  27. src/images/design/challenge2.png +3 -0
  28. src/images/design/leaderboard.png +3 -0
  29. src/images/logo/sdsc-horizontal.png +3 -0
  30. src/input/input_handling.py +94 -41
  31. src/main.py +0 -319
  32. src/maps/obs_map.py +4 -68
  33. src/old_main.py +313 -0
  34. src/pages/1_πŸ‹_about.py +46 -0
  35. src/pages/2_🌍_map.py +36 -0
  36. src/pages/3_🀝_data requests.py +73 -0
  37. src/pages/4_πŸ”₯_classifiers.py +198 -0
  38. src/pages/5_πŸ“_benchmarking.py +15 -0
  39. src/pages/6_πŸ†_challenges.py +24 -0
  40. src/pages/7_🌊_gallery.py +17 -0
  41. src/pages/8_🚧_coordinates.py +28 -0
  42. src/pages/πŸ“Š_logs.py +17 -0
  43. src/utils/metadata_handler.py +2 -1
  44. src/utils/workflow_ui.py +5 -0
  45. src/whale_viewer.py +3 -1
  46. tests/{test_obs_map.py β†’ test_dataset_download.py} +12 -18
  47. tests/test_demo_input_sidebar.py +4 -4
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor="#2CA3DF"
3
+ backgroundColor="#0F418C"
4
+ secondaryBackgroundColor="#0A326D"
5
+ textColor="#F5F7FA"
6
+ font="sans serif"
README.md CHANGED
@@ -6,7 +6,7 @@ colorTo: blue
6
  sdk: streamlit
7
  sdk_version: 1.39.0
8
  python_version: "3.10"
9
- app_file: src/main.py
10
  pinned: false
11
  license: apache-2.0
12
  short_description: 'SDSC Hackathon - Project 10. '
@@ -28,7 +28,7 @@ pip install -r requirements.txt
28
  ```
29
 
30
  ```
31
- streamlit run src/main.py
32
  ```
33
 
34
 
 
6
  sdk: streamlit
7
  sdk_version: 1.39.0
8
  python_version: "3.10"
9
+ app_file: src/home.py
10
  pinned: false
11
  license: apache-2.0
12
  short_description: 'SDSC Hackathon - Project 10. '
 
28
  ```
29
 
30
  ```
31
+ streamlit run src/home.py
32
  ```
33
 
34
 
docs/{hotdog.md β†’ classifier_hotdog.md} RENAMED
File without changes
docs/dataset_cleaner.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module provides basic cleaning checks for the dataset that has been downloaded, any row which does not have the expected types is discarded.
2
+
3
+ ::: src.dataset.cleaner
docs/dataset_download.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module provides a download function for accessing the hugging face Dataset.
2
+
3
+ ::: src.dataset.download
docs/dataset_fake_data.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module takes care of generating some fake data.
2
+
3
+ ::: src.dataset.fake_data
docs/{hf_push_observations.md β†’ dataset_hf_push_observations.md} RENAMED
@@ -1,3 +1,3 @@
1
  This module writes an observation into a temporary JSON file, in order to add this JSON file to the Saving-Willy Dataset in the Saving-Willy Hugging Face Community.
2
 
3
- ::: src.hf_push_observations
 
1
  This module writes an observation into a temporary JSON file, in order to add this JSON file to the Saving-Willy Dataset in the Saving-Willy Hugging Face Community.
2
 
3
+ ::: src.dataset.hf_push_observations
docs/dataset_requests.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module provides functions for filtering the data by localisation and time and for rendering the search possibilities as well as the search results.
2
+
3
+ ::: src.dataset.requests
docs/{main.md β†’ home.md} RENAMED
@@ -7,4 +7,4 @@ The session state is used to retain values from one interaction to the next, sin
7
  See streamlit [docs](https://docs.streamlit.io/develop/api-reference/caching-and-state/st.session_state).
8
 
9
 
10
- ::: src.main
 
7
  See streamlit [docs](https://docs.streamlit.io/develop/api-reference/caching-and-state/st.session_state).
8
 
9
 
10
+ ::: src.home
docs/pages.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The UI is organized into a multipage streamlit app.
2
+
3
+ The pages cover the main functionalities of the code.
4
+
5
+ Some pages do not yet have code implemented for them: they represent a concept more than a functionality. Such pages are `About`, `Benchmarking`, `Challenges` which are currently only writing, markdown and images and do not require further documentation.
6
+
7
+ Pages that have fully implemented code and functionality are the following:
8
+
9
+ - Maps
10
+ - Classifiers
11
+ - Gallery
12
+ - Logs
docs/release_protocol.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Release Protocol
2
+
3
+ We use 2 spaces on hugging face: one for the development of the interface and the main space for showcasing the most recent stable release. The main branch is protected and deploys to the main space when a PR is accepted.
4
+
5
+ We wish to enforce strict commits from the dev branch to the main branch when a PR is made to create a new release.
6
+
7
+ Dev to Main PR Checklist:
8
+
9
+ 1. Open a PR from dev branch to main branch
10
+ 2. Commit: in `dataset/download` change the `dataset_id` to point to the main dataset : `Saving-Willy/main_dataset`
11
+ 3. Commit: in the ReadMe, to avoid merge conflict, change the header to this :
12
+
13
+ ```
14
+ ---
15
+ title: Saving Willy
16
+ emoji: πŸ‹
17
+ colorFrom: indigo
18
+ colorTo: blue
19
+ sdk: streamlit
20
+ sdk_version: 1.39.0
21
+ python_version: "3.10"
22
+ app_file: src/home.py
23
+ pinned: false
24
+ license: apache-2.0
25
+ short_description: 'SDSC Hackathon - Project 10. '
26
+ ---
27
+ ```
28
+
29
+ 4. Ask for Review
30
+ 5. Merge to main upon approval
31
+ 6. Make a new tag for a major version change (semantic versioning) i.e. `vX.0.0`
32
+ 7. Make a new release of the code, associated to this tag
docs/{fix_tabrender.md β†’ utils_fix_tabrender.md} RENAMED
File without changes
docs/{grid_maker.md β†’ utils_grid_maker.md} RENAMED
File without changes
docs/{metadata_handler.md β†’ utils_metadata_handler.md} RENAMED
File without changes
mkdocs.yaml CHANGED
@@ -22,32 +22,34 @@ plugins:
22
 
23
  nav:
24
  - README: index.md
25
- #- Quickstart:
26
- #- Installation: installation.md
27
- #- Usage: usage.md
28
- - API:
29
- - Main app: main.md
 
30
  - Modules:
31
- - Data entry handling:
32
- - Data input: input_handling.md
33
- - Data extraction and validation: input_validator.md
34
  - Data Object Class: input_observation.md
35
- - Classifiers:
 
 
 
 
 
 
36
  - Cetacean Fluke & Fin Recognition: classifier_image.md
37
- - (temporary) Hotdog Classifier: hotdog.md
38
- - Hugging Face Integration:
39
- - Push Observations to Dataset: hf_push_observations.md
40
  - Map of observations: obs_map.md
41
  - Whale gallery: whale_gallery.md
42
  - Whale viewer: whale_viewer.md
43
  - Logging: st_logs.md
44
  - Utils:
45
- - Tab-rendering fix (js): fix_tabrender.md
46
- - Metadata handling: metadata_handler.md
47
- - Grid maker: grid_maker.md
48
 
49
  - Development clutter:
50
  - Demo app: app.md
51
-
52
- - How to contribute:
53
- - Dev Notes: dev_notes.md
 
22
 
23
  nav:
24
  - README: index.md
25
+ - Release Protocol: release_protocol.md
26
+ - How to contribute:
27
+ - Dev Notes: dev_notes.md
28
+ - App:
29
+ - Main App & Home Page: home.md
30
+ - Multipages Notes: pages.md
31
  - Modules:
32
+ - Data Entry Handling:
33
+ - Data Input: input_handling.md
34
+ - Data Extraction & Validation: input_validator.md
35
  - Data Object Class: input_observation.md
36
+ - Hugging Face Dataset:
37
+ - Download: dataset_download.md
38
+ - Cleaning: dataset_cleaner.md
39
+ - Push Observations to Dataset: dataset_hf_push_observations.md
40
+ - Data Requests: dataset_requests.md
41
+ - Fake data: dataset_fake_data.md
42
+ - Hugging Face Classifiers:
43
  - Cetacean Fluke & Fin Recognition: classifier_image.md
44
+ - (temporary) Hotdog Classifier: classifier_hotdog.md
 
 
45
  - Map of observations: obs_map.md
46
  - Whale gallery: whale_gallery.md
47
  - Whale viewer: whale_viewer.md
48
  - Logging: st_logs.md
49
  - Utils:
50
+ - Tab-rendering fix (js): utils_fix_tabrender.md
51
+ - Metadata handling: utils_metadata_handler.md
52
+ - Grid maker: utils_grid_maker.md
53
 
54
  - Development clutter:
55
  - Demo app: app.md
 
 
 
requirements.txt CHANGED
@@ -13,6 +13,9 @@ datasets==3.0.2
13
  ## FSM
14
  transitions==0.9.2
15
 
 
 
 
16
  # running ML models
17
 
18
  ## to use ML models hosted on HF
@@ -28,8 +31,13 @@ pillow==10.4.0
28
  opencv-python-headless==4.5.5.64
29
  albumentations==1.1.0
30
 
 
 
 
31
  # documentation: mkdocs
32
  mkdocs~=1.6.0
33
  mkdocstrings[python]>=0.25.1
34
  mkdocs-material~=9.5.27
35
  mkdocs-homepage-copier~=1.0.0
 
 
 
13
  ## FSM
14
  transitions==0.9.2
15
 
16
+ # data manipulation
17
+ pandas==2.2.3
18
+
19
  # running ML models
20
 
21
  ## to use ML models hosted on HF
 
31
  opencv-python-headless==4.5.5.64
32
  albumentations==1.1.0
33
 
34
+ # for env variables
35
+ python-dotenv==1.1.0
36
+
37
  # documentation: mkdocs
38
  mkdocs~=1.6.0
39
  mkdocstrings[python]>=0.25.1
40
  mkdocs-material~=9.5.27
41
  mkdocs-homepage-copier~=1.0.0
42
+
43
+
src/apptest/demo_input_sidebar.py CHANGED
@@ -25,6 +25,8 @@ from apptest.demo_elements import show_uploaded_file_info
25
 
26
  if __name__ == "__main__":
27
 
 
 
28
  init_input_data_session_states()
29
  init_input_container_states()
30
  init_workflow_session_states()
 
25
 
26
  if __name__ == "__main__":
27
 
28
+ if "input_author_email" not in st.session_state:
29
+ st.session_state.input_author_email = ""
30
  init_input_data_session_states()
31
  init_input_container_states()
32
  init_workflow_session_states()
src/classifier/classifier_image.py CHANGED
@@ -7,7 +7,6 @@ g_logger = logging.getLogger(__name__)
7
  g_logger.setLevel(LOG_LEVEL)
8
 
9
  import whale_viewer as viewer
10
- from hf_push_observations import push_observations
11
  from utils.grid_maker import gridder
12
  from utils.metadata_handler import metadata2md
13
  from input.input_observation import InputObservation
@@ -107,20 +106,15 @@ def cetacean_show_results_and_review() -> None:
107
  print(f"[D] {o:3} pred1: {pred1:30} | {hash}")
108
  ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
109
  selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
110
-
111
  _observation.set_selected_class(selected_class)
112
- #observation['predicted_class'] = selected_class
113
- # this logic is now in the InputObservation class automatially
114
- #if selected_class != st.session_state.whale_prediction1[hash]:
115
- # observation['class_overriden'] = selected_class # TODO: this should be boolean!
116
 
117
  # store the elements of the observation that will be transmitted (not image)
118
  observation = _observation.to_dict()
119
  st.session_state.public_observations[hash] = observation
120
 
121
- #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
122
  # TODO: the metadata only fills properly if `validate` was clicked.
123
- st.markdown(metadata2md(hash, debug=True))
 
124
 
125
  msg = f"[D] full observation after inference: {observation}"
126
  g_logger.debug(msg)
@@ -163,27 +157,6 @@ def cetacean_show_results():
163
 
164
  with grid[col]:
165
  st.image(image, use_column_width=True)
166
-
167
- # # dropdown for selecting/overriding the species prediction
168
- # if not st.session_state.classify_whale_done[hash]:
169
- # selected_class = st.sidebar.selectbox("Species", viewer.WHALE_CLASSES,
170
- # index=None, placeholder="Species not yet identified...",
171
- # disabled=True)
172
- # else:
173
- # pred1 = st.session_state.whale_prediction1[hash]
174
- # # get index of pred1 from WHALE_CLASSES, none if not present
175
- # print(f"[D] pred1: {pred1}")
176
- # ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
177
- # selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
178
-
179
- # observation['predicted_class'] = selected_class
180
- # if selected_class != st.session_state.whale_prediction1[hash]:
181
- # observation['class_overriden'] = selected_class # TODO: this should be boolean!
182
-
183
- # st.session_state.public_observation = observation
184
-
185
- #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
186
- #
187
  st.markdown(metadata2md(hash, debug=True))
188
 
189
  msg = f"[D] full observation after inference: {observation}"
@@ -199,69 +172,3 @@ def cetacean_show_results():
199
  viewer.display_whale(whale_classes, i)
200
  o += 1
201
  col = (col + 1) % row_size
202
-
203
-
204
-
205
-
206
- # func to do all in one
207
- def cetacean_classify_show_and_review(cetacean_classifier):
208
- """Cetacean classifier using the saving-willy model from Saving Willy Hugging Face space.
209
- For each image in the session state, classify the image and display the top 3 predictions.
210
- Args:
211
- cetacean_classifier ([type]): saving-willy model from Saving Willy Hugging Face space
212
- """
213
- raise DeprecationWarning("This function is deprecated. Use individual steps instead")
214
- images = st.session_state.images
215
- observations = st.session_state.observations
216
- hashes = st.session_state.image_hashes
217
- batch_size, row_size, page = gridder(hashes)
218
-
219
- grid = st.columns(row_size)
220
- col = 0
221
- o=1
222
- for hash in hashes:
223
- image = images[hash]
224
-
225
- with grid[col]:
226
- st.image(image, use_column_width=True)
227
- observation = observations[hash].to_dict()
228
- # run classifier model on `image`, and persistently store the output
229
- out = cetacean_classifier(image) # get top 3 matches
230
- st.session_state.whale_prediction1[hash] = out['predictions'][0]
231
- st.session_state.classify_whale_done[hash] = True
232
- msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
233
- g_logger.info(msg)
234
-
235
- # dropdown for selecting/overriding the species prediction
236
- if not st.session_state.classify_whale_done[hash]:
237
- selected_class = st.sidebar.selectbox("Species", viewer.WHALE_CLASSES,
238
- index=None, placeholder="Species not yet identified...",
239
- disabled=True)
240
- else:
241
- pred1 = st.session_state.whale_prediction1[hash]
242
- # get index of pred1 from WHALE_CLASSES, none if not present
243
- print(f"[D] pred1: {pred1}")
244
- ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
245
- selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
246
-
247
- observation['predicted_class'] = selected_class
248
- if selected_class != st.session_state.whale_prediction1[hash]:
249
- observation['class_overriden'] = selected_class
250
-
251
- st.session_state.public_observation = observation
252
- st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
253
- # TODO: the metadata only fills properly if `validate` was clicked.
254
- st.markdown(metadata2md())
255
-
256
- msg = f"[D] full observation after inference: {observation}"
257
- g_logger.debug(msg)
258
- print(msg)
259
- # TODO: add a link to more info on the model, next to the button.
260
-
261
- whale_classes = out['predictions'][:]
262
- # render images for the top 3 (that is what the model api returns)
263
- st.markdown(f"Top 3 Predictions for observation {str(o)}")
264
- for i in range(len(whale_classes)):
265
- viewer.display_whale(whale_classes, i)
266
- o += 1
267
- col = (col + 1) % row_size
 
7
  g_logger.setLevel(LOG_LEVEL)
8
 
9
  import whale_viewer as viewer
 
10
  from utils.grid_maker import gridder
11
  from utils.metadata_handler import metadata2md
12
  from input.input_observation import InputObservation
 
106
  print(f"[D] {o:3} pred1: {pred1:30} | {hash}")
107
  ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
108
  selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
 
109
  _observation.set_selected_class(selected_class)
 
 
 
 
110
 
111
  # store the elements of the observation that will be transmitted (not image)
112
  observation = _observation.to_dict()
113
  st.session_state.public_observations[hash] = observation
114
 
 
115
  # TODO: the metadata only fills properly if `validate` was clicked.
116
+ # TODO put condition on the debug
117
+ st.markdown(metadata2md(hash, debug=False))
118
 
119
  msg = f"[D] full observation after inference: {observation}"
120
  g_logger.debug(msg)
 
157
 
158
  with grid[col]:
159
  st.image(image, use_column_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  st.markdown(metadata2md(hash, debug=True))
161
 
162
  msg = f"[D] full observation after inference: {observation}"
 
172
  viewer.display_whale(whale_classes, i)
173
  o += 1
174
  col = (col + 1) % row_size
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
docs/index.md β†’ src/dataset/__init__.py RENAMED
File without changes
src/dataset/cleaner.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def clean_lat_long(df) -> pd.DataFrame:
4
+ """
5
+ Clean latitude and longitude columns in the DataFrame.
6
+ Ensure lat and lon are numeric, coerce errors to NaN
7
+ Args:
8
+ df (pd.DataFrame): DataFrame containing latitude and longitude columns.
9
+ Returns:
10
+ pd.DataFrame: DataFrame with cleaned latitude and longitude columns.
11
+ """
12
+ df['lat'] = pd.to_numeric(df['lat'], errors='coerce')
13
+ df['lon'] = pd.to_numeric(df['lon'], errors='coerce')
14
+
15
+ # Drop rows with NaN in lat or lon
16
+ df = df.dropna(subset=['lat', 'lon']).reset_index(drop=True)
17
+ return df
18
+
19
+ def clean_date(df) -> pd.DataFrame: # Ensure lat and lon are numeric, coerce errors to NaN
20
+ """
21
+ Clean date column in the DataFrame.
22
+ Args:
23
+ df (pd.DataFrame): DataFrame containing date column.
24
+ Returns:
25
+ pd.DataFrame: DataFrame with cleaned date column.
26
+ """
27
+ df['date'] = pd.to_datetime(df['date'], errors='coerce')
28
+ # Drop rows with NaN in lat or lon
29
+ df = df.dropna(subset=['date']).reset_index(drop=True)
30
+ return df
src/dataset/data_requests.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from dataset.cleaner import clean_lat_long, clean_date
4
+ from dataset.download import get_dataset
5
+ from dataset.fake_data import generate_fake_data
6
+
7
+ def data_prep() -> pd.DataFrame:
8
+ """
9
+ Prepares the dataset for use in the application.
10
+ Downloads the dataset and cleans the data (and generates fake data if needed).
11
+ Returns:
12
+ pd.DataFrame: A DataFrame containing the cleaned dataset.
13
+ """
14
+ df = get_dataset()
15
+ # uncomment to generate some fake data
16
+ # df = generate_fake_data(df, 100)
17
+ df = clean_lat_long(df)
18
+ df = clean_date(df)
19
+ return df
20
+
21
+ def filter_data(df:pd.DataFrame) -> pd.DataFrame:
22
+ """
23
+ Filter the DataFrame based on user-selected ranges for latitude, longitude, and date.
24
+ Args:
25
+ df (pd.DataFrame): DataFrame to filter.
26
+ Returns:
27
+ pd.DataFrame: Filtered DataFrame.
28
+ """
29
+ df_filtered = df[
30
+ (df['date'] >= pd.to_datetime(st.session_state.date_range[0])) &
31
+ (df['date'] <= pd.to_datetime(st.session_state.date_range[1])) &
32
+ (df['lon'] >= st.session_state.lon_range[0]) &
33
+ (df['lon'] <= st.session_state.lon_range[1]) &
34
+ (df['lat'] >= st.session_state.lat_range[0]) &
35
+ (df['lat'] <= st.session_state.lat_range[1])
36
+ ]
37
+ return df_filtered
38
+
39
+ def show_specie_author(df:pd.DataFrame):
40
+ """
41
+ Display a list of species and their corresponding authors with checkboxes.
42
+ Args:
43
+ df (pd.DataFrame): DataFrame containing species and author information.
44
+ """
45
+ df = df.groupby(['species', 'author_email']).size().reset_index(name='counts')
46
+ for specie in df["species"].unique():
47
+ st.subheader(f"Species: {specie}")
48
+ specie_data = df[df['species'] == specie]
49
+ for _, row in specie_data.iterrows():
50
+ key = f"{specie}_{row['author_email']}"
51
+ label = f"{row['author_email']} ({row['counts']})"
52
+ st.session_state.checkbox_states[key] = st.checkbox(label, key=key)
53
+
54
+ def show_new_data_view(df:pd.DataFrame) -> pd.DataFrame:
55
+ """
56
+ Show the new filtered data view on the UI.
57
+ Filter the dataframe based on the state of the localisation sliders and selected timeframe by the user.
58
+ Then, show the results of the filtering grouped by species then by authors.
59
+ Authors are matched to a checkbox component so the user can click it if he/she/they wish to request data from this author.
60
+ Args:
61
+ df (pd.DataFrame): DataFrame to filter and display.
62
+ Returns:
63
+ pd.DataFrame: Filtered and grouped DataFrame.
64
+ """
65
+ df = filter_data(df)
66
+ df_ordered = show_specie_author(df)
67
+ return df_ordered
68
+
69
+
70
+
71
+
72
+
src/dataset/download.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import time
3
+ import logging
4
+ import pandas as pd
5
+ from datasets import load_dataset
6
+ from datasets import DatasetDict
7
+
8
+ ############################################################
9
+ # the dataset of observations (hf dataset in our space)
10
+ dataset_id = "Saving-Willy/temp_dataset"
11
+ data_files = "data/train-00000-of-00001.parquet"
12
+ ############################################################
13
+
14
+ m_logger = logging.getLogger(__name__)
15
+ # we can set the log level locally for funcs in this module
16
+ #g_m_logger.setLevel(logging.DEBUG)
17
+ m_logger.setLevel(logging.INFO)
18
+
19
+ presentation_data_schema = {
20
+ 'lat': 'float',
21
+ 'lon': 'float',
22
+ 'species': 'str',
23
+ 'author_email': 'str',
24
+ 'date' : 'timestamp',
25
+ }
26
+
27
+ def try_download_dataset(dataset_id:str, data_files:str) -> dict:
28
+ """
29
+ Attempts to download a dataset from Hugging Face, catching any errors that occur.
30
+
31
+ Args:
32
+ dataset_id (str): The ID of the dataset to download.
33
+ data_files (str): The data files associated with the dataset.
34
+ Returns:
35
+ dict: A dictionary containing the dataset metadata if the download is successful,
36
+ or an empty dictionary if an error occurs.
37
+
38
+ """
39
+
40
+ m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
41
+ t1 = time.time()
42
+ try:
43
+ metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
44
+ t2 = time.time(); elap = t2 - t1
45
+ except ValueError as e:
46
+ t2 = time.time(); elap = t2 - t1
47
+ msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)."
48
+ st.error(msg)
49
+ m_logger.error(msg)
50
+ metadata = {}
51
+ except Exception as e:
52
+ # catch all (other) exceptions and log them, handle them once isolated
53
+ t2 = time.time(); elap = t2 - t1
54
+ msg = f"!!Unknown Error!! downloading dataset: {e}. (after {elap:.2f}s)."
55
+ st.error(msg)
56
+ m_logger.error(msg)
57
+ metadata = {}
58
+
59
+
60
+ msg = f"Downloaded dataset: (after {elap:.2f}s). "
61
+ m_logger.info(msg)
62
+ #st.write(msg)
63
+ return metadata
64
+
65
+ def get_dataset() -> pd.DataFrame:
66
+ """
67
+ Downloads the dataset from Hugging Face and prepares it for use.
68
+ If the dataset is not available, it creates an empty DataFrame with the specified schema.
69
+ Returns:
70
+ pd.DataFrame: A DataFrame containing the dataset, or an empty DataFrame if the dataset is not available.
71
+ """
72
+ # load/download data from huggingface dataset
73
+ metadata = try_download_dataset(dataset_id, data_files)
74
+
75
+ if not metadata:
76
+ # create an empty, but compliant dataframe
77
+ df = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
78
+ else:
79
+ # make a pandas df that is compliant with folium/streamlit maps
80
+ df = pd.DataFrame({
81
+ 'lat': metadata["train"]["latitude"],
82
+ 'lon': metadata["train"]["longitude"],
83
+ 'species': metadata["train"]["selected_class"],
84
+ 'author_email': metadata["train"]["author_email"],
85
+ 'date': metadata["train"]["date"],}
86
+ )
87
+ return df
src/dataset/fake_data.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+ import pandas as pd
3
+ import random
4
+ from datetime import datetime, timedelta
5
+
6
+ from dataset.download import presentation_data_schema
7
+ from whale_viewer import WHALE_CLASSES
8
+
9
+ def generate_fake_data(df:pd.DataFrame, num_fake:int) -> pd.DataFrame:
10
+ """
11
+ Generate fake data for the dataset.
12
+
13
+ Args:
14
+ df (pd.DataFrame): Original DataFrame to append fake data to.
15
+ num_fake (int): Number of fake observations to generate.
16
+ Returns:
17
+ pd.DataFrame: DataFrame with the original and fake data.
18
+ """
19
+
20
+ # Options for random generation
21
+ species_options = WHALE_CLASSES
22
+ email_options = [
23
24
25
+ ]
26
+
27
+ def random_ocean_coord() -> Tuple[float, float]:
28
+ """Generate random ocean-friendly coordinates."""
29
+ lat = random.uniform(-60, 60) # avoid poles
30
+ lon = random.uniform(-180, 180)
31
+ return lat, lon
32
+
33
+ def random_date(start_year:int=2018, end_year:int=2025) -> datetime:
34
+ """Generate a random date."""
35
+ start = datetime(start_year, 1, 1)
36
+ end = datetime(end_year, 1, 1)
37
+ return start + timedelta(days=random.randint(0, (end - start).days))
38
+
39
+ new_data = []
40
+ for _ in range(num_fake):
41
+ lat, lon = random_ocean_coord()
42
+ species = random.choice(species_options)
43
+ email = random.choice(email_options)
44
+ date = random_date()
45
+ new_data.append([lat, lon, species, email, date])
46
+
47
+ new_df = pd.DataFrame(new_data, columns=presentation_data_schema).astype(presentation_data_schema)
48
+ df = pd.concat([df, new_df], ignore_index=True)
49
+ return df
src/{hf_push_observations.py β†’ dataset/hf_push_observations.py} RENAMED
@@ -7,6 +7,7 @@ from streamlit.delta_generator import DeltaGenerator
7
  import streamlit as st
8
  from huggingface_hub import HfApi, CommitInfo
9
 
 
10
 
11
  # get a global var for logger accessor in this module
12
  LOG_LEVEL = logging.DEBUG
@@ -48,7 +49,7 @@ def push_observation(image_hash:str, api:HfApi, enable_push:False) -> CommitInfo
48
  rv = api.upload_file(
49
  path_or_fileobj=f.name,
50
  path_in_repo=path_in_repo,
51
- repo_id="Saving-Willy/temp_dataset",
52
  repo_type="dataset",
53
  )
54
  print(rv)
@@ -73,50 +74,4 @@ def push_all_observations(enable_push:bool=False):
73
 
74
  # iterate over the list of observations
75
  for hash in st.session_state.public_observations.keys():
76
- rv = push_observation(hash, api, enable_push=enable_push)
77
-
78
-
79
-
80
- def push_observations(tab_log:DeltaGenerator=None):
81
- """
82
- Push the observations to the Hugging Face dataset
83
-
84
- Args:
85
- tab_log (streamlit.container): The container to log messages to. If not provided,
86
- log messages are in any case written to the global logger (TODO: test - didn't
87
- push any observation since generating the logger)
88
-
89
- """
90
- raise DeprecationWarning("This function is deprecated. Use push_all_observations instead.")
91
-
92
- # we get the observation from session state: 1 is the dict 2 is the image.
93
- # first, lets do an info display (popup)
94
- metadata_str = json.dumps(st.session_state.public_observation)
95
-
96
- st.toast(f"Uploading observations: {metadata_str}", icon="🦭")
97
- g_logger.info(f"Uploading observations: {metadata_str}")
98
-
99
- # get huggingface api
100
- token = os.environ.get("HF_TOKEN", None)
101
- api = HfApi(token=token)
102
-
103
- f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
104
- f.write(metadata_str)
105
- f.close()
106
- st.info(f"temp file: {f.name} with metadata written...")
107
-
108
- path_in_repo= f"metadata/{st.session_state.public_observation['author_email']}/{st.session_state.public_observation['image_md5']}.json"
109
- msg = f"fname: {f.name} | path: {path_in_repo}"
110
- print(msg)
111
- st.warning(msg)
112
- # rv = api.upload_file(
113
- # path_or_fileobj=f.name,
114
- # path_in_repo=path_in_repo,
115
- # repo_id="Saving-Willy/temp_dataset",
116
- # repo_type="dataset",
117
- # )
118
- # print(rv)
119
- # msg = f"observation attempted tx to repo happy walrus: {rv}"
120
- g_logger.info(msg)
121
- st.info(msg)
122
-
 
7
  import streamlit as st
8
  from huggingface_hub import HfApi, CommitInfo
9
 
10
+ from dataset.download import dataset_id
11
 
12
  # get a global var for logger accessor in this module
13
  LOG_LEVEL = logging.DEBUG
 
49
  rv = api.upload_file(
50
  path_or_fileobj=f.name,
51
  path_in_repo=path_in_repo,
52
+ repo_id=dataset_id,
53
  repo_type="dataset",
54
  )
55
  print(rv)
 
74
 
75
  # iterate over the list of observations
76
  for hash in st.session_state.public_observations.keys():
77
+ rv = push_observation(hash, api, enable_push=enable_push)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/home.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+
4
+ import logging
5
+
6
+ st.set_page_config(
7
+ page_title="Home",
8
+ page_icon="🐳",
9
+ )
10
+
11
+ # get a global var for logger accessor in this module
12
+ LOG_LEVEL = logging.DEBUG
13
+ g_logger = logging.getLogger(__name__)
14
+ g_logger.setLevel(LOG_LEVEL)
15
+
16
+ # one toggle for all the extra debug text
17
+ if "MODE_DEV_STATEFUL" not in st.session_state:
18
+ st.session_state.MODE_DEV_STATEFUL = False
19
+
20
+ from utils.st_logs import init_logging_session_states
21
+ init_logging_session_states() # logging init should be early
22
+
23
+ # set email state var to exist, to permit persistence across page switches
24
+ if "input_author_email" not in st.session_state:
25
+ st.session_state.input_author_email = ""
26
+
27
+ st.write("""
28
+ # Welcome ! 🐬˚✧˚.β‹†πŸ‹
29
+
30
+ # Cetacean Conservation Community
31
+ """)
32
+
33
+ st.sidebar.success("Explore the pages: there are machine learning models, data requests, maps and more !")
34
+ st.sidebar.image(
35
+ "src/images/logo/sdsc-horizontal.png",
36
+ width=200
37
+ )
38
+
39
+ st.markdown(
40
+ """
41
+ ## πŸ’™ Research Data Infrastructure
42
+
43
+ Λ–Β°π“‡ΌπŸŒŠβ‹†πŸšπŸ«§ This interface is a Proof of Concept of a Community-driven Research Data Infrastructure (RDI) for the Cetacean Conservation Community.
44
+ This PoC will happily be made into a production-ready RDI if the community is interested.
45
+
46
+ πŸ‘€ The intended users of this interface are the researchers and conservationists working on cetacean conservation.
47
+ In its current state, the interface is designed to be user-friendly, allowing users to upload images of cetaceans and receive species classification results.
48
+
49
+ 🀝 We value community-contributions and encourage anyone interested to reach out on [the main repository's Github issues](https://github.com/sdsc-ordes/saving-willy/issues).
50
+
51
+ 🌍 The goal of this RDI is to explore community methods for sharing code and data.
52
+
53
+
54
+ ## πŸ’» Sharing Code
55
+
56
+ Through the platform of Hugging Face πŸ€—, machine learning models are published so they can be used for inference on this UI or by other users.
57
+ Currently, a demonstration model is available for cetacean species classification.
58
+ The model is based on the [HappyWhale](https://www.kaggle.com/competitions/happy-whale-and-dolphin) competition with the most recent weights.
59
+ Since part of the model was not made public, the classifier should not be used for inference and is purely demonstrative.
60
+
61
+ πŸ† Ideally, through new Kaggle challenges or ongoing development in research groups, new models can be brought to Hugging Face and onto the UI.
62
+
63
+
64
+ ## πŸ’Ž Sharing Data
65
+
66
+ The dataset is hosted on Hugging Face πŸ€— as well, in order to share the metadata of the images which have been classified by the model.
67
+ Making the metadata public is under the choice of the researcher, who can choose to use the model for inference without making the image metadata public afterwards.
68
+ Of course, we encourage open data. Please note that the original images are never made public in the current-state RDI.
69
+
70
+ πŸ’ͺ The RDI also explores how to share data after inference, with a simple data request page where researchers can filter the existing metadata from the Hugging Face dataset, and then easily select those of interest for them.
71
+ Ideally, the Request button would either start a Discord channel discussion between concerned parties of the data request, or generate an e-mail with interested parties. This design is still under conception.
72
+
73
+ """
74
+ )
75
+
76
+
77
+
78
+
79
+ g_logger.info("App started.")
80
+ g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}")
81
+
82
+ #g_logger.debug("debug message")
83
+ #g_logger.info("info message")
84
+ #g_logger.warning("warning message")
src/images/design/challenge1.png ADDED

Git LFS Details

  • SHA256: 1dd2aa78e98b48b2a4e9eba9a8ebc6a2245848c2499949c2b0670ff65d1dff89
  • Pointer size: 131 Bytes
  • Size of remote file: 324 kB
src/images/design/challenge2.png ADDED

Git LFS Details

  • SHA256: 0e85a6600b8ed5037feb0ff811086e03dac8dc5e9b9fd7e3caf9c9b9ac02ccc4
  • Pointer size: 131 Bytes
  • Size of remote file: 230 kB
src/images/design/leaderboard.png ADDED

Git LFS Details

  • SHA256: 1205d84eeb588f3285890f26e65fc44677db75d58481b91da8e6f69806c89bc4
  • Pointer size: 131 Bytes
  • Size of remote file: 233 kB
src/images/logo/sdsc-horizontal.png ADDED

Git LFS Details

  • SHA256: a4a40e28f815045ff6251fbc937edf4423da7e36ad9b0418458f5e1eb767f6e2
  • Pointer size: 130 Bytes
  • Size of remote file: 37.4 kB
src/input/input_handling.py CHANGED
@@ -5,7 +5,7 @@ import hashlib
5
  import os
6
 
7
  import streamlit as st
8
- from streamlit.delta_generator import DeltaGenerator
9
  from streamlit.runtime.uploaded_file_manager import UploadedFile
10
 
11
  import cv2
@@ -202,7 +202,13 @@ def metadata_inputs_one_file(file:UploadedFile, image_hash:str, dbg_ix:int=0) ->
202
  m_logger.warning("[W] `container_metadata_inputs` is None, using sidebar")
203
 
204
 
205
-
 
 
 
 
 
 
206
  author_email = st.session_state["input_author_email"]
207
  filename = file.name
208
  image_datetime_raw = get_image_datetime(file)
@@ -211,6 +217,23 @@ def metadata_inputs_one_file(file:UploadedFile, image_hash:str, dbg_ix:int=0) ->
211
  msg = f"[D] {filename}: lat, lon from image metadata: {latitude0}, {longitude0}"
212
  m_logger.debug(msg)
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  if spoof_metadata:
215
  if latitude0 is None: # get some default values if not found in exifdata
216
  latitude0:float = spoof_metadata.get('latitude', 0) + dbg_ix
@@ -219,20 +242,16 @@ def metadata_inputs_one_file(file:UploadedFile, image_hash:str, dbg_ix:int=0) ->
219
 
220
  image = st.session_state.images.get(image_hash, None)
221
  # add the UI elements
222
- #viewcontainer.title(f"Metadata for {filename}")
223
  viewcontainer = _viewcontainer.expander(f"Metadata for {file.name}", expanded=True)
224
 
225
- # TODO: use session state so any changes are persisted within session -- currently I think
226
- # we are going to take the defaults over and over again -- if the user adjusts coords, or date, it will get lost
227
- # - it is a bit complicated, if no values change, they persist (the widget definition: params, name, key, etc)
228
- # even if the code is re-run. but if the value changes, it is lost.
229
-
230
 
231
  # 3. Latitude Entry Box
232
  latitude = viewcontainer.text_input(
233
  "Latitude for " + filename,
234
  latitude0,
235
- key=f"input_latitude_{image_hash}")
 
 
236
  if latitude and not is_valid_number(latitude):
237
  viewcontainer.error("Please enter a valid latitude (numerical only).")
238
  m_logger.error(f"Invalid latitude entered: {latitude}.")
@@ -240,40 +259,71 @@ def metadata_inputs_one_file(file:UploadedFile, image_hash:str, dbg_ix:int=0) ->
240
  longitude = viewcontainer.text_input(
241
  "Longitude for " + filename,
242
  longitude0,
243
- key=f"input_longitude_{image_hash}")
 
 
244
  if longitude and not is_valid_number(longitude):
245
  viewcontainer.error("Please enter a valid longitude (numerical only).")
246
  m_logger.error(f"Invalid latitude entered: {latitude}.")
 
 
 
 
 
247
 
248
  # 5. Date/time
249
- ## first from image metadata
250
- if image_datetime_raw is not None:
251
- # if we have a timezone let's use it (but only if we also have datetime)
252
- time_fmt = '%Y:%m:%d %H:%M:%S'
253
- if image_timezone_raw is not None:
254
- image_datetime_raw += f" {image_timezone_raw}"
255
- time_fmt += ' %z'
256
- #
257
- dt = datetime.datetime.strptime(image_datetime_raw, time_fmt)
 
 
 
 
 
 
258
  date_value = dt.date()
259
  time_value = dt.time()
260
-
261
- #time_value = datetime.datetime.strptime(image_datetime_raw, '%Y:%m:%d %H:%M:%S').time()
262
- #date_value = datetime.datetime.strptime(image_datetime_raw, '%Y:%m:%d %H:%M:%S').date()
263
  else:
264
- # get current time, with user timezone (or is it server timezone?! TODO: test with different zones)
265
- dt = datetime.datetime.now().astimezone().replace(microsecond=0)
266
- time_value = dt.time()
267
- date_value = dt.date()
268
-
269
- #time_value = datetime.datetime.now().time() # Default to current time
270
- #date_value = datetime.datetime.now().date()
271
-
 
 
 
 
 
 
 
 
 
 
 
272
 
273
  ## either way, give user the option to enter manually (or correct, e.g. if camera has no rtc clock)
274
- date = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{image_hash}")
275
- time = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{image_hash}")
276
-
 
 
 
 
 
 
 
 
 
 
277
  tz_str = dt.strftime('%z') # this is numeric, otherwise the info isn't consistent.
278
 
279
  observation = InputObservation(image=image, latitude=latitude, longitude=longitude,
@@ -339,8 +389,15 @@ def _setup_oneoff_inputs() -> None:
339
 
340
  with container_file_uploader:
341
  # 1. Input the author email
342
- author_email = st.text_input("Author Email", spoof_metadata.get('author_email', ""),
343
- key="input_author_email")
 
 
 
 
 
 
 
344
  if author_email and not is_valid_email(author_email):
345
  st.error("Please enter a valid email address.")
346
 
@@ -348,14 +405,10 @@ def _setup_oneoff_inputs() -> None:
348
  st.file_uploader(
349
  "Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
350
  accept_multiple_files=True,
 
351
  key="file_uploader_data", on_change=buffer_uploaded_files)
352
 
353
 
354
-
355
-
356
-
357
-
358
-
359
  def setup_input() -> None:
360
  '''
361
  Set up the user input handling (files and metadata)
@@ -424,7 +477,7 @@ def add_input_UI_elements() -> None:
424
  # which are not created in the same order.
425
 
426
  st.divider()
427
- st.title("Input image and data")
428
 
429
  # create and style a container for the file uploader/other one-off inputs
430
  st.markdown('<style>.st-key-container_file_uploader_id { border: 1px solid skyblue; border-radius: 5px; }</style>', unsafe_allow_html=True)
 
5
  import os
6
 
7
  import streamlit as st
8
+ #from streamlit.delta_generator import DeltaGenerator
9
  from streamlit.runtime.uploaded_file_manager import UploadedFile
10
 
11
  import cv2
 
202
  m_logger.warning("[W] `container_metadata_inputs` is None, using sidebar")
203
 
204
 
205
+ # logic for the precedence of lat/lon values (descending importance)
206
+ # 1) if something was already entered, take that value (can have arrived from 2 or 3 in previous round)
207
+ # 2) if file metadata, take that value
208
+ # 3) if spoof metadata flag is up, take that value
209
+ # 4) else, empty (None)
210
+ # - and similarly for date/time
211
+
212
  author_email = st.session_state["input_author_email"]
213
  filename = file.name
214
  image_datetime_raw = get_image_datetime(file)
 
217
  msg = f"[D] {filename}: lat, lon from image metadata: {latitude0}, {longitude0}"
218
  m_logger.debug(msg)
219
 
220
+ # let's see if there was a value that was already entered for latitude and/or longitude
221
+ key_lon=f"input_longitude_{image_hash}"
222
+ key_lat=f"input_latitude_{image_hash}"
223
+ present_lat = key_lat in st.session_state
224
+ present_lon = key_lon in st.session_state
225
+
226
+ latitude_prior = st.session_state.get(key_lat, None)
227
+ longitude_prior = st.session_state.get(key_lon, None)
228
+
229
+ m_logger.debug(f"[D] {key_lat}: key present? {int(present_lat)} | prior value: {latitude_prior} | metadata value: {latitude0}")
230
+ m_logger.debug(f"[D] {key_lon}: key present? {int(present_lon)} | prior value: {longitude_prior} | metadata value: {longitude0}")
231
+
232
+ if latitude_prior is not None:
233
+ latitude0 = latitude_prior
234
+ if longitude_prior is not None:
235
+ longitude0 = longitude_prior
236
+
237
  if spoof_metadata:
238
  if latitude0 is None: # get some default values if not found in exifdata
239
  latitude0:float = spoof_metadata.get('latitude', 0) + dbg_ix
 
242
 
243
  image = st.session_state.images.get(image_hash, None)
244
  # add the UI elements
 
245
  viewcontainer = _viewcontainer.expander(f"Metadata for {file.name}", expanded=True)
246
 
 
 
 
 
 
247
 
248
  # 3. Latitude Entry Box
249
  latitude = viewcontainer.text_input(
250
  "Latitude for " + filename,
251
  latitude0,
252
+ disabled=st.session_state.get("input_disabled", False),
253
+ key=f"input_latitude_anchor_{image_hash}",
254
+ )
255
  if latitude and not is_valid_number(latitude):
256
  viewcontainer.error("Please enter a valid latitude (numerical only).")
257
  m_logger.error(f"Invalid latitude entered: {latitude}.")
 
259
  longitude = viewcontainer.text_input(
260
  "Longitude for " + filename,
261
  longitude0,
262
+ disabled=st.session_state.get("input_disabled", False),
263
+ key=f"input_longitude_anchor_{image_hash}",
264
+ )
265
  if longitude and not is_valid_number(longitude):
266
  viewcontainer.error("Please enter a valid longitude (numerical only).")
267
  m_logger.error(f"Invalid latitude entered: {latitude}.")
268
+
269
+ # now store the latitude and longitude into the session state (persists across page switches)
270
+ st.session_state[key_lat] = latitude
271
+ st.session_state[key_lon] = longitude
272
+
273
 
274
  # 5. Date/time
275
+ ## first from state, if previously set/modified
276
+ key_date = f"input_date_{image_hash}"
277
+ key_time = f"input_time_{image_hash}"
278
+ present_date = key_date in st.session_state
279
+ present_time = key_time in st.session_state
280
+ date_prior:datetime.date = st.session_state.get(key_date, None)
281
+ time_prior:datetime.time = st.session_state.get(key_time, None)
282
+
283
+ m_logger.debug(f"[D] {key_date}: key present? {int(present_date)} | prior value: {date_prior} | metadata value: {image_datetime_raw}")
284
+ m_logger.debug(f"[D] {key_time}: key present? {int(present_time)} | prior value: {time_prior} | metadata value: {image_datetime_raw}")
285
+
286
+
287
+ if date_prior is not None and time_prior is not None:
288
+ # we should use these values
289
+ dt = datetime.datetime.combine(date_prior, time_prior)
290
  date_value = dt.date()
291
  time_value = dt.time()
 
 
 
292
  else:
293
+ ## second from image metadata
294
+ if image_datetime_raw is not None:
295
+ # if we have a timezone let's use it (but only if we also have datetime)
296
+ time_fmt = '%Y:%m:%d %H:%M:%S'
297
+ if image_timezone_raw is not None:
298
+ image_datetime_raw += f" {image_timezone_raw}"
299
+ time_fmt += ' %z'
300
+ #
301
+ dt = datetime.datetime.strptime(image_datetime_raw, time_fmt)
302
+ date_value = dt.date()
303
+ time_value = dt.time()
304
+
305
+ #time_value = datetime.datetime.strptime(image_datetime_raw, '%Y:%m:%d %H:%M:%S').time()
306
+ #date_value = datetime.datetime.strptime(image_datetime_raw, '%Y:%m:%d %H:%M:%S').date()
307
+ else:
308
+ # get current time, with user timezone (or is it server timezone?! TODO: test with different zones)
309
+ dt = datetime.datetime.now().astimezone().replace(microsecond=0)
310
+ time_value = dt.time()
311
+ date_value = dt.date()
312
 
313
  ## either way, give user the option to enter manually (or correct, e.g. if camera has no rtc clock)
314
+ date = viewcontainer.date_input(
315
+ "Date for "+filename, value=date_value,
316
+ key=f"input_date_anchor_{image_hash}",
317
+ disabled=st.session_state.get("input_disabled", False), )
318
+ time = viewcontainer.time_input(
319
+ "Time for "+filename, time_value,
320
+ key=f"input_time_anchor_{image_hash}",
321
+ disabled=st.session_state.get("input_disabled", False),)
322
+
323
+ # now store the date and time into the session state (persists across page switches)
324
+ st.session_state[key_date] = date
325
+ st.session_state[key_time] = time
326
+
327
  tz_str = dt.strftime('%z') # this is numeric, otherwise the info isn't consistent.
328
 
329
  observation = InputObservation(image=image, latitude=latitude, longitude=longitude,
 
389
 
390
  with container_file_uploader:
391
  # 1. Input the author email
392
+ text0 = st.session_state.get("input_author_email", "None")
393
+ #print(f"[D] author email: {text0}")
394
+ author_email = st.text_input("Author Email",
395
+ value=st.session_state.get("input_author_email", None),
396
+ disabled=st.session_state.get("input_disabled", False),
397
+ )
398
+ # store the email in session state
399
+ st.session_state["input_author_email"] = author_email
400
+
401
  if author_email and not is_valid_email(author_email):
402
  st.error("Please enter a valid email address.")
403
 
 
405
  st.file_uploader(
406
  "Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
407
  accept_multiple_files=True,
408
+ disabled=st.session_state.get("input_disabled", False),
409
  key="file_uploader_data", on_change=buffer_uploaded_files)
410
 
411
 
 
 
 
 
 
412
  def setup_input() -> None:
413
  '''
414
  Set up the user input handling (files and metadata)
 
477
  # which are not created in the same order.
478
 
479
  st.divider()
480
+ st.title("Input your images")
481
 
482
  # create and style a container for the file uploader/other one-off inputs
483
  st.markdown('<style>.st-key-container_file_uploader_id { border: 1px solid skyblue; border-radius: 5px; }</style>', unsafe_allow_html=True)
src/main.py DELETED
@@ -1,319 +0,0 @@
1
- import logging
2
- import os
3
-
4
- import pandas as pd
5
- import streamlit as st
6
- import folium
7
- from streamlit_folium import st_folium
8
-
9
- from transformers import pipeline
10
- from transformers import AutoModelForImageClassification
11
-
12
- from maps.obs_map import add_obs_map_header
13
- from classifier.classifier_image import add_classifier_header
14
- from datasets import disable_caching
15
- disable_caching()
16
-
17
- import whale_gallery as gallery
18
- import whale_viewer as viewer
19
- from input.input_handling import setup_input, check_inputs_are_set
20
- from input.input_handling import init_input_container_states, add_input_UI_elements, init_input_data_session_states
21
- from input.input_handling import dbg_show_observation_hashes
22
-
23
- from maps.alps_map import present_alps_map
24
- from maps.obs_map import present_obs_map
25
- from utils.st_logs import parse_log_buffer, init_logging_session_states
26
- from utils.workflow_ui import refresh_progress_display, init_workflow_viz, init_workflow_session_states
27
- from hf_push_observations import push_all_observations
28
-
29
- from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results, init_classifier_session_states
30
- from classifier.classifier_hotdog import hotdog_classify
31
-
32
-
33
- # setup for the ML model on huggingface (our wrapper)
34
- os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
35
- #classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6'
36
- classifier_revision = 'main' # default/latest version
37
- # and the dataset of observations (hf dataset in our space)
38
- dataset_id = "Saving-Willy/temp_dataset"
39
- data_files = "data/train-00000-of-00001.parquet"
40
-
41
- USE_BASIC_MAP = False
42
- DEV_SIDEBAR_LIB = True
43
-
44
- # one toggle for all the extra debug text
45
- if "MODE_DEV_STATEFUL" not in st.session_state:
46
- st.session_state.MODE_DEV_STATEFUL = False
47
-
48
-
49
- # get a global var for logger accessor in this module
50
- LOG_LEVEL = logging.DEBUG
51
- g_logger = logging.getLogger(__name__)
52
- g_logger.setLevel(LOG_LEVEL)
53
-
54
- st.set_page_config(layout="wide")
55
-
56
- # initialise various session state variables
57
- init_logging_session_states() # logging init should be early
58
- init_workflow_session_states()
59
- init_input_data_session_states()
60
- init_input_container_states()
61
- init_workflow_viz()
62
- init_classifier_session_states()
63
-
64
-
65
- def main() -> None:
66
- """
67
- Main entry point to set up the streamlit UI and run the application.
68
-
69
- The organisation is as follows:
70
-
71
- 1. observation input (a new observations) is handled in the sidebar
72
- 2. the rest of the interface is organised in tabs:
73
-
74
- - cetean classifier
75
- - hotdog classifier
76
- - map to present the obersvations
77
- - table of recent log entries
78
- - gallery of whale images
79
-
80
- The majority of the tabs are instantiated from modules. Currently the two
81
- classifiers are still in-line here.
82
-
83
- """
84
-
85
- g_logger.info("App started.")
86
- g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}")
87
-
88
- #g_logger.debug("debug message")
89
- #g_logger.info("info message")
90
- #g_logger.warning("warning message")
91
-
92
- # Streamlit app
93
- tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
94
- st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
95
-
96
- # put this early so the progress indicator is at the top (also refreshed at end)
97
- refresh_progress_display()
98
-
99
- # create a sidebar, and parse all the input (returned as `observations` object)
100
- with st.sidebar:
101
- # layout handling
102
- add_input_UI_elements()
103
- # input elements (file upload, text input, etc)
104
- setup_input()
105
-
106
-
107
- with tab_map:
108
- # visual structure: a couple of toggles at the top, then the map inlcuding a
109
- # dropdown for tileset selection.
110
- add_obs_map_header()
111
- tab_map_ui_cols = st.columns(2)
112
- with tab_map_ui_cols[0]:
113
- show_db_points = st.toggle("Show Points from DB", True)
114
- with tab_map_ui_cols[1]:
115
- dbg_show_extra = st.toggle("Show Extra points (test)", False)
116
-
117
- if show_db_points:
118
- # show a nicer map, observations marked, tileset selectable.
119
- st_observation = present_obs_map(
120
- dataset_id=dataset_id, data_files=data_files,
121
- dbg_show_extra=dbg_show_extra)
122
-
123
- else:
124
- # development map.
125
- st_observation = present_alps_map()
126
-
127
-
128
- with tab_log:
129
- handler = st.session_state['handler']
130
- if handler is not None:
131
- records = parse_log_buffer(handler.buffer)
132
- st.dataframe(records[::-1], use_container_width=True,)
133
- st.info(f"Length of records: {len(records)}")
134
- else:
135
- st.error("⚠️ No log handler found!")
136
-
137
-
138
-
139
- with tab_coords:
140
- # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
141
- st.markdown("Coming later! :construction:")
142
- st.markdown(
143
- """*The goal is to allow interactive definition for the coordinates of a new
144
- observation, by click/drag points on the map.*""")
145
-
146
-
147
- st.write("Click on the map to capture a location.")
148
- #m = folium.Map(location=visp_loc, zoom_start=7)
149
- mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
150
- folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
151
- ).add_to(mm)
152
-
153
- st_data2 = st_folium(mm, width=725)
154
- st.write("below the map...")
155
- if st_data2['last_clicked'] is not None:
156
- print(st_data2)
157
- st.info(st_data2['last_clicked'])
158
-
159
-
160
- with tab_gallery:
161
- # here we make a container to allow filtering css properties
162
- # specific to the gallery (otherwise we get side effects)
163
- tg_cont = st.container(key="swgallery")
164
- with tg_cont:
165
- gallery.render_whale_gallery(n_cols=4)
166
-
167
-
168
- # state handling re data_entry phases
169
- # 0. no data entered yet -> display the file uploader thing
170
- # 1. we have some images, but not all the metadata fields are done -> validate button shown, disabled
171
- # 2. all data entered -> validate button enabled
172
- # 3. validation button pressed, validation done -> enable the inference button.
173
- # - at this point do we also want to disable changes to the metadata selectors?
174
- # anyway, simple first.
175
-
176
- if st.session_state.workflow_fsm.is_in_state('doing_data_entry'):
177
- # can we advance state? - only when all inputs are set for all uploaded files
178
- all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False)
179
- if all_inputs_set:
180
- st.session_state.workflow_fsm.complete_current_state()
181
- # -> data_entry_complete
182
- else:
183
- # button, disabled; no state change yet.
184
- st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.")
185
-
186
-
187
- if st.session_state.workflow_fsm.is_in_state('data_entry_complete'):
188
- # can we advance state? - only when the validate button is pressed
189
- if st.sidebar.button(":white_check_mark:[**Validate**]"):
190
- # create a dictionary with the submitted observation
191
- tab_log.info(f"{st.session_state.observations}")
192
- df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
193
- #df = pd.DataFrame(st.session_state.observations, index=[0])
194
- with tab_coords:
195
- st.table(df)
196
- # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
197
- # hmm, maybe it should actually just be "I'm done with data entry"
198
- st.session_state.workflow_fsm.complete_current_state()
199
- # -> data_entry_validated
200
-
201
- # state handling re inference phases (tab_inference)
202
- # 3. validation button pressed, validation done -> enable the inference button.
203
- # 4. inference button pressed -> ML started. | let's cut this one out, since it would only
204
- # make sense if we did it as an async action
205
- # 5. ML done -> show results, and manual validation options
206
- # 6. manual validation done -> enable the upload buttons
207
- #
208
- with tab_inference:
209
- # inside the inference tab, on button press we call the model (on huggingface hub)
210
- # which will be run locally.
211
- # - the model predicts the top 3 most likely species from the input image
212
- # - these species are shown
213
- # - the user can override the species prediction using the dropdown
214
- # - an observation is uploaded if the user chooses.
215
-
216
-
217
- if st.session_state.MODE_DEV_STATEFUL:
218
- dbg_show_observation_hashes()
219
-
220
- add_classifier_header()
221
- # if we are before data_entry_validated, show the button, disabled.
222
- if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
223
- tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True,
224
- help="Please validate inputs before proceeding",
225
- key="button_infer_ceteans")
226
-
227
- if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
228
- # show the button, enabled. If pressed, we start the ML model (And advance state)
229
- if tab_inference.button("Identify with cetacean classifier",
230
- key="button_infer_ceteans"):
231
- cetacean_classifier = AutoModelForImageClassification.from_pretrained(
232
- "Saving-Willy/cetacean-classifier",
233
- revision=classifier_revision,
234
- trust_remote_code=True)
235
-
236
- cetacean_just_classify(cetacean_classifier)
237
- st.session_state.workflow_fsm.complete_current_state()
238
- # trigger a refresh too (refreshhing the prog indicator means the script reruns and
239
- # we can enter the next state - visualising the results / review)
240
- # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
241
- refresh_progress_display()
242
- #TODO: validate this doesn't harm performance adversely.
243
- st.rerun()
244
-
245
- elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
246
- # show the results, and allow manual validation
247
- st.markdown("""### Inference results and manual validation/adjustment """)
248
- if st.session_state.MODE_DEV_STATEFUL:
249
- s = ""
250
- for k, v in st.session_state.whale_prediction1.items():
251
- s += f"* Image {k}: {v}\n"
252
-
253
- st.markdown(s)
254
-
255
- # add a button to advance the state
256
- if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"):
257
- st.session_state.workflow_fsm.complete_current_state()
258
- # -> manual_inspection_completed
259
- st.rerun()
260
-
261
- cetacean_show_results_and_review()
262
-
263
- elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
264
- # show the ML results, and allow the user to upload the observation
265
- st.markdown("""### Inference Results (after manual validation) """)
266
-
267
-
268
- if st.button("Upload all observations to THE INTERNET!"):
269
- # let this go through to the push_all func, since it just reports to log for now.
270
- push_all_observations(enable_push=False)
271
- st.session_state.workflow_fsm.complete_current_state()
272
- # -> data_uploaded
273
- st.rerun()
274
-
275
- cetacean_show_results()
276
-
277
- elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
278
- # the data has been sent. Lets show the observations again
279
- # but no buttons to upload (or greyed out ok)
280
- st.markdown("""### Observation(s) uploaded - thank you!""")
281
- cetacean_show_results()
282
-
283
- st.divider()
284
- #df = pd.DataFrame(st.session_state.observations, index=[0])
285
- df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
286
- st.table(df)
287
-
288
- # didn't decide what the next state is here - I think we are in the terminal state.
289
- #st.session_state.workflow_fsm.complete_current_state()
290
-
291
-
292
- # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
293
- # purposes, an hotdog image classifier) which will be run locally.
294
- # - this model predicts if the image is a hotdog or not, and returns probabilities
295
- # - the input image is the same as for the ceteacean classifier - defined in the sidebar
296
- tab_hotdogs.title("Hot Dog? Or Not?")
297
- tab_hotdogs.write("""
298
- *Run alternative classifer on input images. Here we are using
299
- a binary classifier - hotdog or not - from
300
- huggingface.co/julien-c/hotdog-not-hotdog.*""")
301
-
302
- if tab_hotdogs.button("Get Hotdog Prediction"):
303
-
304
- pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
305
-
306
- if st.session_state.image is None:
307
- st.info("Please upload an image first.")
308
- #st.info(str(observations.to_dict()))
309
-
310
- else:
311
- hotdog_classify(pipeline_hot_dog, tab_hotdogs)
312
-
313
-
314
- # after all other processing, we can show the stage/state
315
- refresh_progress_display()
316
-
317
-
318
- if __name__ == "__main__":
319
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/maps/obs_map.py CHANGED
@@ -1,18 +1,13 @@
1
  from typing import Tuple
2
  import logging
3
 
4
- import pandas as pd
5
- from datasets import load_dataset
6
- from datasets import DatasetDict, Dataset
7
-
8
- import time
9
-
10
  import streamlit as st
11
  import folium
12
  from streamlit_folium import st_folium
13
 
14
  import whale_viewer as viewer
15
  from utils.fix_tabrender import js_show_zeroheight_iframe
 
16
 
17
  m_logger = logging.getLogger(__name__)
18
  # we can set the log level locally for funcs in this module
@@ -66,13 +61,6 @@ _colors = [
66
 
67
  whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
68
 
69
- presentation_data_schema = {
70
- 'lat': 'float',
71
- 'lon': 'float',
72
- 'species': 'str',
73
- }
74
-
75
-
76
  def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
77
  """
78
  Create a folium map with the specified tile layer
@@ -124,48 +112,8 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
124
  #folium.LayerControl().add_to(m)
125
  return m
126
 
127
- def try_download_dataset(dataset_id:str, data_files:str) -> dict:
128
- """
129
- Attempts to download a dataset from Hugging Face, catching any errors that occur.
130
-
131
- Args:
132
- dataset_id (str): The ID of the dataset to download.
133
- data_files (str): The data files associated with the dataset.
134
- Returns:
135
- dict: A dictionary containing the dataset metadata if the download is successful,
136
- or an empty dictionary if an error occurs.
137
-
138
- """
139
-
140
- m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
141
- t1 = time.time()
142
- try:
143
- metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
144
- t2 = time.time(); elap = t2 - t1
145
- except ValueError as e:
146
- t2 = time.time(); elap = t2 - t1
147
- msg = f"Error downloading dataset: {e}. (after {elap:.2f}s)."
148
- st.error(msg)
149
- m_logger.error(msg)
150
- metadata = {}
151
- except Exception as e:
152
- # catch all (other) exceptions and log them, handle them once isolated
153
- t2 = time.time(); elap = t2 - t1
154
- msg = f"!!Unknown Error!! downloading dataset: {e}. (after {elap:.2f}s)."
155
- st.error(msg)
156
- m_logger.error(msg)
157
- metadata = {}
158
-
159
-
160
- msg = f"Downloaded dataset: (after {elap:.2f}s). "
161
- m_logger.info(msg)
162
- st.write(msg)
163
- return metadata
164
-
165
 
166
- def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
167
- data_files:str = "data/train-00000-of-00001.parquet",
168
- dbg_show_extra:bool = False) -> dict:
169
  """
170
  Render map plus tile selector, with markers for whale observations
171
 
@@ -186,20 +134,8 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
186
 
187
  """
188
 
189
- # load/download data from huggingface dataset
190
- metadata = try_download_dataset(dataset_id, data_files)
191
-
192
- if not metadata:
193
- # create an empty, but compliant dataframe
194
- _df = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
195
- else:
196
- # make a pandas df that is compliant with folium/streamlit maps
197
- _df = pd.DataFrame({
198
- 'lat': metadata["train"]["latitude"],
199
- 'lon': metadata["train"]["longitude"],
200
- 'species': metadata["train"]["predicted_class"],}
201
- )
202
-
203
  if dbg_show_extra:
204
  # add a few samples to visualise colours
205
  _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}
 
1
  from typing import Tuple
2
  import logging
3
 
 
 
 
 
 
 
4
  import streamlit as st
5
  import folium
6
  from streamlit_folium import st_folium
7
 
8
  import whale_viewer as viewer
9
  from utils.fix_tabrender import js_show_zeroheight_iframe
10
+ from dataset.download import get_dataset
11
 
12
  m_logger = logging.getLogger(__name__)
13
  # we can set the log level locally for funcs in this module
 
61
 
62
  whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
63
 
 
 
 
 
 
 
 
64
  def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
65
  """
66
  Create a folium map with the specified tile layer
 
112
  #folium.LayerControl().add_to(m)
113
  return m
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
+ def present_obs_map(dbg_show_extra:bool = False) -> dict:
 
 
117
  """
118
  Render map plus tile selector, with markers for whale observations
119
 
 
134
 
135
  """
136
 
137
+ _df = get_dataset()
138
+ print(_df)
 
 
 
 
 
 
 
 
 
 
 
 
139
  if dbg_show_extra:
140
  # add a few samples to visualise colours
141
  _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}
src/old_main.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+
4
+ import pandas as pd
5
+ import streamlit as st
6
+ import folium
7
+ from streamlit_folium import st_folium
8
+
9
+ # from transformers import pipeline
10
+ # from transformers import AutoModelForImageClassification
11
+
12
+ # from maps.obs_map import add_obs_map_header
13
+
14
+ # from datasets import disable_caching
15
+ # disable_caching()
16
+
17
+ # import whale_gallery as gallery
18
+ # import whale_viewer as viewer
19
+ # from input.input_handling import setup_input, check_inputs_are_set
20
+ # from input.input_handling import init_input_container_states, add_input_UI_elements, init_input_data_session_states
21
+ # from input.input_handling import dbg_show_observation_hashes
22
+
23
+ # from maps.alps_map import present_alps_map
24
+ # from maps.obs_map import present_obs_map
25
+ # from utils.st_logs import parse_log_buffer, init_logging_session_states
26
+ # from utils.workflow_ui import refresh_progress_display, init_workflow_viz, init_workflow_session_states
27
+ # from hf_push_observations import push_all_observations
28
+
29
+ # from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results, init_classifier_session_states
30
+ # from classifier.classifier_hotdog import hotdog_classify
31
+
32
+
33
+ # # setup for the ML model on huggingface (our wrapper)
34
+ # os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
35
+ #classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6'
36
+ # classifier_revision = 'main' # default/latest version
37
+ # # and the dataset of observations (hf dataset in our space)
38
+ # dataset_id = "Saving-Willy/temp_dataset"
39
+ # data_files = "data/train-00000-of-00001.parquet"
40
+
41
+ # USE_BASIC_MAP = False
42
+ # DEV_SIDEBAR_LIB = True
43
+
44
+ # # one toggle for all the extra debug text
45
+ # if "MODE_DEV_STATEFUL" not in st.session_state:
46
+ # st.session_state.MODE_DEV_STATEFUL = False
47
+
48
+
49
+ # get a global var for logger accessor in this module
50
+ # LOG_LEVEL = logging.DEBUG
51
+ # g_logger = logging.getLogger(__name__)
52
+ # g_logger.setLevel(LOG_LEVEL)
53
+
54
+ # st.set_page_config(layout="wide")
55
+
56
+
57
+
58
+
59
+ def main() -> None:
60
+ """
61
+ Main entry point to set up the streamlit UI and run the application.
62
+
63
+ The organisation is as follows:
64
+
65
+ 1. observation input (a new observations) is handled in the sidebar
66
+ 2. the rest of the interface is organised in tabs:
67
+
68
+ - cetean classifier
69
+ - hotdog classifier
70
+ - map to present the obersvations
71
+ - table of recent log entries
72
+ - gallery of whale images
73
+
74
+ The majority of the tabs are instantiated from modules. Currently the two
75
+ classifiers are still in-line here.
76
+
77
+ """
78
+
79
+ # g_logger.info("App started.")
80
+ # g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}")
81
+
82
+ #g_logger.debug("debug message")
83
+ #g_logger.info("info message")
84
+ #g_logger.warning("warning message")
85
+
86
+ # Streamlit app
87
+ # tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
88
+ # st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
89
+
90
+ # # put this early so the progress indicator is at the top (also refreshed at end)
91
+ # refresh_progress_display()
92
+
93
+ # # create a sidebar, and parse all the input (returned as `observations` object)
94
+ # with st.sidebar:
95
+ # # layout handling
96
+ # add_input_UI_elements()
97
+ # # input elements (file upload, text input, etc)
98
+ # setup_input()
99
+
100
+
101
+ # with tab_map:
102
+ # # visual structure: a couple of toggles at the top, then the map inlcuding a
103
+ # # dropdown for tileset selection.
104
+ # add_obs_map_header()
105
+ # tab_map_ui_cols = st.columns(2)
106
+ # with tab_map_ui_cols[0]:
107
+ # show_db_points = st.toggle("Show Points from DB", True)
108
+ # with tab_map_ui_cols[1]:
109
+ # dbg_show_extra = st.toggle("Show Extra points (test)", False)
110
+
111
+ # if show_db_points:
112
+ # # show a nicer map, observations marked, tileset selectable.
113
+ # st_observation = present_obs_map(
114
+ # dataset_id=dataset_id, data_files=data_files,
115
+ # dbg_show_extra=dbg_show_extra)
116
+
117
+ # else:
118
+ # # development map.
119
+ # st_observation = present_alps_map()
120
+
121
+
122
+ # with tab_log:
123
+ # handler = st.session_state['handler']
124
+ # if handler is not None:
125
+ # records = parse_log_buffer(handler.buffer)
126
+ # st.dataframe(records[::-1], use_container_width=True,)
127
+ # st.info(f"Length of records: {len(records)}")
128
+ # else:
129
+ # st.error("⚠️ No log handler found!")
130
+
131
+
132
+
133
+ # with tab_coords:
134
+ # # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
135
+ # st.markdown("Coming later! :construction:")
136
+ # st.markdown(
137
+ # """*The goal is to allow interactive definition for the coordinates of a new
138
+ # observation, by click/drag points on the map.*""")
139
+
140
+
141
+ # st.write("Click on the map to capture a location.")
142
+ # #m = folium.Map(location=visp_loc, zoom_start=7)
143
+ # mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
144
+ # folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
145
+ # ).add_to(mm)
146
+
147
+ # st_data2 = st_folium(mm, width=725)
148
+ # st.write("below the map...")
149
+ # if st_data2['last_clicked'] is not None:
150
+ # print(st_data2)
151
+ # st.info(st_data2['last_clicked'])
152
+
153
+
154
+ # with tab_gallery:
155
+ # # here we make a container to allow filtering css properties
156
+ # # specific to the gallery (otherwise we get side effects)
157
+ # tg_cont = st.container(key="swgallery")
158
+ # with tg_cont:
159
+ # gallery.render_whale_gallery(n_cols=4)
160
+
161
+
162
+ # state handling re data_entry phases
163
+ # 0. no data entered yet -> display the file uploader thing
164
+ # 1. we have some images, but not all the metadata fields are done -> validate button shown, disabled
165
+ # 2. all data entered -> validate button enabled
166
+ # 3. validation button pressed, validation done -> enable the inference button.
167
+ # - at this point do we also want to disable changes to the metadata selectors?
168
+ # anyway, simple first.
169
+
170
+ # if st.session_state.workflow_fsm.is_in_state('doing_data_entry'):
171
+ # # can we advance state? - only when all inputs are set for all uploaded files
172
+ # all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False)
173
+ # if all_inputs_set:
174
+ # st.session_state.workflow_fsm.complete_current_state()
175
+ # # -> data_entry_complete
176
+ # else:
177
+ # # button, disabled; no state change yet.
178
+ # st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.")
179
+
180
+
181
+ # if st.session_state.workflow_fsm.is_in_state('data_entry_complete'):
182
+ # # can we advance state? - only when the validate button is pressed
183
+ # if st.sidebar.button(":white_check_mark:[**Validate**]"):
184
+ # # create a dictionary with the submitted observation
185
+ # tab_log.info(f"{st.session_state.observations}")
186
+ # df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
187
+ # #df = pd.DataFrame(st.session_state.observations, index=[0])
188
+ # with tab_coords:
189
+ # st.table(df)
190
+ # # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
191
+ # # hmm, maybe it should actually just be "I'm done with data entry"
192
+ # st.session_state.workflow_fsm.complete_current_state()
193
+ # # -> data_entry_validated
194
+
195
+ # state handling re inference phases (tab_inference)
196
+ # 3. validation button pressed, validation done -> enable the inference button.
197
+ # 4. inference button pressed -> ML started. | let's cut this one out, since it would only
198
+ # make sense if we did it as an async action
199
+ # 5. ML done -> show results, and manual validation options
200
+ # 6. manual validation done -> enable the upload buttons
201
+ #
202
+ # with tab_inference:
203
+ # # inside the inference tab, on button press we call the model (on huggingface hub)
204
+ # # which will be run locally.
205
+ # # - the model predicts the top 3 most likely species from the input image
206
+ # # - these species are shown
207
+ # # - the user can override the species prediction using the dropdown
208
+ # # - an observation is uploaded if the user chooses.
209
+
210
+
211
+ # if st.session_state.MODE_DEV_STATEFUL:
212
+ # dbg_show_observation_hashes()
213
+
214
+ # add_classifier_header()
215
+ # # if we are before data_entry_validated, show the button, disabled.
216
+ # if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
217
+ # tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True,
218
+ # help="Please validate inputs before proceeding",
219
+ # key="button_infer_ceteans")
220
+
221
+ # if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
222
+ # # show the button, enabled. If pressed, we start the ML model (And advance state)
223
+ # if tab_inference.button("Identify with cetacean classifier",
224
+ # key="button_infer_ceteans"):
225
+ # cetacean_classifier = AutoModelForImageClassification.from_pretrained(
226
+ # "Saving-Willy/cetacean-classifier",
227
+ # revision=classifier_revision,
228
+ # trust_remote_code=True)
229
+
230
+ # cetacean_just_classify(cetacean_classifier)
231
+ # st.session_state.workflow_fsm.complete_current_state()
232
+ # # trigger a refresh too (refreshhing the prog indicator means the script reruns and
233
+ # # we can enter the next state - visualising the results / review)
234
+ # # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
235
+ # refresh_progress_display()
236
+ # #TODO: validate this doesn't harm performance adversely.
237
+ # st.rerun()
238
+
239
+ # elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
240
+ # # show the results, and allow manual validation
241
+ # st.markdown("""### Inference results and manual validation/adjustment """)
242
+ # if st.session_state.MODE_DEV_STATEFUL:
243
+ # s = ""
244
+ # for k, v in st.session_state.whale_prediction1.items():
245
+ # s += f"* Image {k}: {v}\n"
246
+
247
+ # st.markdown(s)
248
+
249
+ # # add a button to advance the state
250
+ # if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"):
251
+ # st.session_state.workflow_fsm.complete_current_state()
252
+ # # -> manual_inspection_completed
253
+ # st.rerun()
254
+
255
+ # cetacean_show_results_and_review()
256
+
257
+ # elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
258
+ # # show the ML results, and allow the user to upload the observation
259
+ # st.markdown("""### Inference Results (after manual validation) """)
260
+
261
+
262
+ # if st.button("Upload all observations to THE INTERNET!"):
263
+ # # let this go through to the push_all func, since it just reports to log for now.
264
+ # push_all_observations(enable_push=False)
265
+ # st.session_state.workflow_fsm.complete_current_state()
266
+ # # -> data_uploaded
267
+ # st.rerun()
268
+
269
+ # cetacean_show_results()
270
+
271
+ # elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
272
+ # # the data has been sent. Lets show the observations again
273
+ # # but no buttons to upload (or greyed out ok)
274
+ # st.markdown("""### Observation(s) uploaded - thank you!""")
275
+ # cetacean_show_results()
276
+
277
+ # st.divider()
278
+ # #df = pd.DataFrame(st.session_state.observations, index=[0])
279
+ # df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
280
+ # st.table(df)
281
+
282
+ # # didn't decide what the next state is here - I think we are in the terminal state.
283
+ # #st.session_state.workflow_fsm.complete_current_state()
284
+
285
+
286
+ # # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
287
+ # # purposes, an hotdog image classifier) which will be run locally.
288
+ # # - this model predicts if the image is a hotdog or not, and returns probabilities
289
+ # # - the input image is the same as for the ceteacean classifier - defined in the sidebar
290
+ # tab_hotdogs.title("Hot Dog? Or Not?")
291
+ # tab_hotdogs.write("""
292
+ # *Run alternative classifer on input images. Here we are using
293
+ # a binary classifier - hotdog or not - from
294
+ # huggingface.co/julien-c/hotdog-not-hotdog.*""")
295
+
296
+ # if tab_hotdogs.button("Get Hotdog Prediction"):
297
+
298
+ # pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
299
+
300
+ # if st.session_state.image is None:
301
+ # st.info("Please upload an image first.")
302
+ # #st.info(str(observations.to_dict()))
303
+
304
+ # else:
305
+ # hotdog_classify(pipeline_hot_dog, tab_hotdogs)
306
+
307
+
308
+ # # after all other processing, we can show the stage/state
309
+ # refresh_progress_display()
310
+
311
+
312
+ if __name__ == "__main__":
313
+ main()
src/pages/1_πŸ‹_about.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(
4
+ page_title="About",
5
+ page_icon="πŸ‹",
6
+ )
7
+
8
+ st.markdown(
9
+ """
10
+ # About
11
+ We created this web app in [a hackathon](https://sdsc-hackathons.ch/projectPage?projectRef=vUt8BfDJXaAs0UfOesXI|XyWLFpqjq3CX3zrM4uz8).
12
+
13
+ This interface is a Proof of Concept of a Community-driven Research Data Infrastructure for the Cetacean Conservation Community.
14
+
15
+ Please reach out on [the project Github issues](https://github.com/sdsc-ordes/saving-willy/issues) for feedback, suggestions, or if you want to join the project.
16
+
17
+ # Open Source Resources
18
+
19
+ ## UI Code
20
+ - The [space is hosted on Hugging Face](https://huggingface.co/spaces/Saving-Willy/saving-willy-space).
21
+ - The [UI code is available on Github](https://github.com/sdsc-ordes/saving-willy).
22
+ - The [development space](https://huggingface.co/spaces/Saving-Willy/saving-willy-dev) is also hosted publically on Hugging Face.
23
+
24
+ ## The Machine Learning Models
25
+ - The [model](https://huggingface.co/Saving-Willy/cetacean-classifier) is hosted on Hugging Face.
26
+ - The [original Kaggle model code](https://github.com/knshnb/kaggle-happywhale-1st-place) is open on Github as well.
27
+
28
+ ## The Data
29
+
30
+ (temporary setup, a more stable database is probably desired.)
31
+ - The dataset is hosted on Hugging Face.
32
+ - The [dataset syncing code](https://github.com/vancauwe/saving-willy-data-sync) is available on Github.
33
+
34
+ # Credits and Thanks
35
+
36
+ ## Developers
37
+ - [Rob Mills](https://github.com/rmm-ch)
38
+ - [Laure Vancauwenberghe](https://github.com/vancauwe)
39
+
40
+ ## Special Thanks
41
+ - [EDMAKTUB](https://edmaktub.org) for their advice.
42
+ - [Swiss Data Science Center](https://www.datascience.ch) for [the hackathon that started the project](https://sdsc-hackathons.ch/projectPage?projectRef=vUt8BfDJXaAs0UfOesXI|XyWLFpqjq3CX3zrM4uz8).
43
+ - [HappyWhale](https://happywhale.com) for launching [the Kaggle challenge that led to model development](https://www.kaggle.com/competitions/happy-whale-and-dolphin).
44
+
45
+ """
46
+ )
src/pages/2_🌍_map.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import logging
3
+ from datasets import disable_caching
4
+ disable_caching()
5
+
6
+ st.set_page_config(
7
+ page_title="About",
8
+ page_icon="🌍",
9
+ layout="wide",
10
+ )
11
+
12
+ from maps.obs_map import add_obs_map_header
13
+ from maps.alps_map import present_alps_map
14
+ from maps.obs_map import present_obs_map
15
+
16
+ ############################################################
17
+ g_logger = logging.getLogger(__name__)
18
+ USE_BASIC_MAP = False
19
+ DEV_SIDEBAR_LIB = True
20
+ ############################################################
21
+
22
+ # visual structure: a couple of toggles at the top, then the map inlcuding a
23
+ # dropdown for tileset selection.
24
+ add_obs_map_header()
25
+ tab_map_ui_cols = st.columns(2)
26
+ with tab_map_ui_cols[0]:
27
+ show_db_points = st.toggle("Show Points from DB", True)
28
+ with tab_map_ui_cols[1]:
29
+ dbg_show_extra = st.toggle("Show Extra points (test)", False)
30
+
31
+ if show_db_points:
32
+ # show a nicer map, observations marked, tileset selectable.
33
+ st_observation = present_obs_map(dbg_show_extra=dbg_show_extra)
34
+ else:
35
+ # development map.
36
+ st_observation = present_alps_map()
src/pages/3_🀝_data requests.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(
4
+ page_title="Requests",
5
+ page_icon="🀝",
6
+ )
7
+
8
+ from dataset.data_requests import data_prep, show_new_data_view
9
+
10
+ st.title("Data Requests")
11
+ st.write("This page is ensure findability of data across the community.")
12
+ st.write("You can filter the metadata by longitude, latitude and date. You can select data from multiple actors, for multiple species and make a grouped request. " \
13
+ "The request for the relevant data will be adressed individually to each owner. ")
14
+
15
+ # Initialize the default data view
16
+ df = data_prep()
17
+
18
+ if 'checkbox_states' not in st.session_state:
19
+ st.session_state.checkbox_states = {}
20
+
21
+ if 'lat_range' not in st.session_state:
22
+ st.session_state.lat_range = (float(df['lat'].min()), float(df['lat'].max()))
23
+
24
+ if 'lon_range' not in st.session_state:
25
+ st.session_state.lon_range = (df['lon'].min(), df['lon'].max())
26
+
27
+ if 'date_range' not in st.session_state:
28
+ st.session_state.date_range = (df['date'].min(), df['date'].max())
29
+
30
+ # Request button at the bottom
31
+ if st.button("REQUEST DATA",
32
+ type="primary",
33
+ icon="🐚"):
34
+ selected = [k for k, v in st.session_state.checkbox_states.items() if v]
35
+ if selected:
36
+ st.success(f"Request submitted for: the specie {', '.join(selected)}")
37
+ else:
38
+ st.warning("No selections made.")
39
+
40
+ # Latitude range filter
41
+ lat_min, lat_max = float(df['lat'].min()), float(df['lat'].max())
42
+ lat_range = st.sidebar.slider(
43
+ "Latitude range",
44
+ min_value=float(df['lat'].min()),
45
+ max_value=float(df['lat'].max()),
46
+ value=st.session_state.get("lat_range", (df['lat'].min(), df['lat'].max()))
47
+ )
48
+ st.session_state.lat_range = lat_range
49
+
50
+ # Longitude range filter
51
+ lon_min, lon_max = float(df['lon'].min()), float(df['lon'].max())
52
+ lon_range = st.sidebar.slider(
53
+ "Longitude range",
54
+ min_value=float(df['lon'].min()),
55
+ max_value=float(df['lon'].max()),
56
+ value=st.session_state.get("lon_range", (df['lon'].min(), df['lon'].max()))
57
+ )
58
+ st.session_state.lon_range = lon_range
59
+ # Date range filter
60
+ date_range = st.sidebar.date_input(
61
+ "Date range",
62
+ value=st.session_state.get("date_range", (df['date'].min(), df['date'].max())),
63
+ min_value=df['date'].min(),
64
+ max_value=df['date'].max()
65
+ )
66
+ st.session_state.date_range = date_range
67
+
68
+ # Show authors per specie
69
+ show_new_data_view(df)
70
+
71
+
72
+
73
+
src/pages/4_πŸ”₯_classifiers.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import pandas as pd
4
+ import logging
5
+
6
+ st.set_page_config(
7
+ page_title="ML Models",
8
+ page_icon="πŸ”₯",
9
+ )
10
+
11
+ from utils.st_logs import init_logging_session_states
12
+
13
+ from transformers import pipeline
14
+ from transformers import AutoModelForImageClassification
15
+ from classifier.classifier_image import add_classifier_header
16
+
17
+ from input.input_handling import setup_input, check_inputs_are_set
18
+ from input.input_handling import init_input_container_states, add_input_UI_elements, init_input_data_session_states
19
+ from input.input_handling import dbg_show_observation_hashes
20
+
21
+ from utils.workflow_ui import refresh_progress_display, init_workflow_viz, init_workflow_session_states
22
+ from dataset.hf_push_observations import push_all_observations
23
+
24
+ from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results, init_classifier_session_states
25
+ from classifier.classifier_hotdog import hotdog_classify
26
+
27
+ ############################################################
28
+ classifier_name = "Saving-Willy/cetacean-classifier"
29
+ #classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6'
30
+ classifier_revision = 'main' # default/latest version
31
+ ############################################################
32
+
33
+ g_logger = logging.getLogger(__name__)
34
+ # setup for the ML model on huggingface (our wrapper)
35
+ os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
36
+ # one toggle for all the extra debug text
37
+ if "MODE_DEV_STATEFUL" not in st.session_state:
38
+ st.session_state.MODE_DEV_STATEFUL = False
39
+
40
+ ############################################################
41
+
42
+
43
+ # Streamlit app
44
+ tab_inference, tab_hotdogs= \
45
+ st.tabs(["Cetecean classifier", "Hotdog classifier"])
46
+
47
+ # initialise various session state variables
48
+ init_logging_session_states() # logging init should be early
49
+ init_workflow_session_states()
50
+ init_input_data_session_states()
51
+ init_input_container_states()
52
+ init_workflow_viz()
53
+ init_classifier_session_states()
54
+
55
+ # put this early so the progress indicator is at the top (also refreshed at end)
56
+ refresh_progress_display()
57
+
58
+ # create a sidebar, and parse all the input (returned as `observations` object)
59
+ with st.sidebar:
60
+ # layout handling
61
+ add_input_UI_elements()
62
+ # input elements (file upload, text input, etc)
63
+ setup_input()
64
+
65
+ with tab_inference:
66
+ if st.session_state.workflow_fsm.is_in_state('doing_data_entry'):
67
+ # can we advance state? - only when all inputs are set for all uploaded files
68
+ all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False)
69
+ if all_inputs_set:
70
+ st.session_state.workflow_fsm.complete_current_state()
71
+ # -> data_entry_complete
72
+ else:
73
+ # button, disabled; no state change yet.
74
+ st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.")
75
+
76
+
77
+ if st.session_state.workflow_fsm.is_in_state('data_entry_complete'):
78
+ # can we advance state? - only when the validate button is pressed
79
+ if st.sidebar.button(":white_check_mark:[**Validate**]"):
80
+ # create a dictionary with the submitted observation
81
+
82
+ g_logger.info(f"{st.session_state.observations}")
83
+
84
+ df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
85
+ # with tab_coords:
86
+ # st.table(df)
87
+
88
+ # now disable all the input boxes / widgets
89
+ st.session_state.input_disabled = True
90
+
91
+ # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
92
+ # hmm, maybe it should actually just be "I'm done with data entry"
93
+ st.session_state.workflow_fsm.complete_current_state()
94
+ # -> data_entry_validated
95
+ st.rerun() # refresh so the input widgets are immediately disabled
96
+
97
+ if st.session_state.MODE_DEV_STATEFUL:
98
+ dbg_show_observation_hashes()
99
+
100
+ add_classifier_header()
101
+ # if we are before data_entry_validated, show the button, disabled.
102
+ if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
103
+ tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True,
104
+ help="Please validate inputs before proceeding",
105
+ key="button_infer_ceteans")
106
+
107
+ if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
108
+ # show the button, enabled. If pressed, we start the ML model (And advance state)
109
+ if tab_inference.button("Identify with cetacean classifier",
110
+ key="button_infer_ceteans"):
111
+ cetacean_classifier = AutoModelForImageClassification.from_pretrained(
112
+ classifier_name,
113
+ revision=classifier_revision,
114
+ trust_remote_code=True)
115
+
116
+ cetacean_just_classify(cetacean_classifier)
117
+ st.session_state.workflow_fsm.complete_current_state()
118
+ # trigger a refresh too (refreshhing the prog indicator means the script reruns and
119
+ # we can enter the next state - visualising the results / review)
120
+ # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
121
+ refresh_progress_display()
122
+ #TODO: validate this doesn't harm performance adversely.
123
+ st.rerun()
124
+
125
+ elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
126
+ # show the results, and allow manual validation
127
+ st.markdown("""### Inference results and manual validation/adjustment """)
128
+ if st.session_state.MODE_DEV_STATEFUL:
129
+ s = ""
130
+ for k, v in st.session_state.whale_prediction1.items():
131
+ s += f"* Image {k}: {v}\n"
132
+
133
+ st.markdown(s)
134
+
135
+ # add a button to advance the state
136
+ if st.button("I have looked over predictions and confirm correct species", icon= "πŸ‘€",
137
+ type="primary",
138
+ help="Confirm that all species are selected correctly"):
139
+ st.session_state.workflow_fsm.complete_current_state()
140
+ # -> manual_inspection_completed
141
+ st.rerun()
142
+
143
+ cetacean_show_results_and_review()
144
+
145
+ elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
146
+ # show the ML results, and allow the user to upload the observation
147
+ st.markdown("""### Inference Results (after manual validation) """)
148
+
149
+
150
+ if st.button("Upload all observations to THE INTERNET!", icon= "⬆️",
151
+ type="primary",):
152
+ # let this go through to the push_all func, since it just reports to log for now.
153
+ push_all_observations(enable_push=False)
154
+ st.session_state.workflow_fsm.complete_current_state()
155
+ # -> data_uploaded
156
+ st.rerun()
157
+
158
+ cetacean_show_results()
159
+
160
+ elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
161
+ # the data has been sent. Lets show the observations again
162
+ # but no buttons to upload (or greyed out ok)
163
+ st.markdown("""### Observation(s) uploaded - thank you!""")
164
+ cetacean_show_results()
165
+
166
+ st.divider()
167
+ df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
168
+ st.table(df)
169
+
170
+ # didn't decide what the next state is here - I think we are in the terminal state.
171
+ #st.session_state.workflow_fsm.complete_current_state()
172
+
173
+
174
+ with tab_hotdogs:
175
+ # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
176
+ # purposes, an hotdog image classifier) which will be run locally.
177
+ # - this model predicts if the image is a hotdog or not, and returns probabilities
178
+ # - the input image is the same as for the ceteacean classifier - defined in the sidebar
179
+ tab_hotdogs.title("Hot Dog? Or Not?")
180
+ tab_hotdogs.write("""
181
+ *Run alternative classifer on input images. Here we are using
182
+ a binary classifier - hotdog or not - from
183
+ huggingface.co/julien-c/hotdog-not-hotdog.*""")
184
+
185
+ if tab_hotdogs.button("Get Hotdog Prediction"):
186
+
187
+ pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
188
+
189
+ if st.session_state.image is None:
190
+ st.info("Please upload an image first.")
191
+ #st.info(str(observations.to_dict()))
192
+
193
+ else:
194
+ hotdog_classify(pipeline_hot_dog, tab_hotdogs)
195
+
196
+
197
+ # after all other processing, we can show the stage/state
198
+ refresh_progress_display()
src/pages/5_πŸ“_benchmarking.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(
4
+ page_title="Benchmarking",
5
+ page_icon="πŸ“",
6
+ layout="wide",
7
+ )
8
+
9
+ st.title("Benchmark of ML models")
10
+
11
+ st.write("All credits go to the original Leaderboard on hugging face: https://huggingface.co/spaces/opencompass/opencompass-llm-leaderboard"
12
+ )
13
+ st.write("This image serves as a pure placeholder to illustrate benchmarking possibilities.")
14
+
15
+ st.image("src/images/design/leaderboard.png", caption="Benchmarking models")
src/pages/6_πŸ†_challenges.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(
4
+ page_title="Challenges",
5
+ page_icon="πŸ†",
6
+ layout="wide",
7
+ )
8
+
9
+ st.title("Research Challenges (Kaggle)")
10
+
11
+ st.write("Working together to innovate is essential. Here are the current and past challenges on Kaggle organized around cetacean conservation.")
12
+
13
+ st.link_button("Click here for the full challenge.",
14
+ url = "https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.kaggle.com/competitions/happy-whale-and-dolphin&ved=2ahUKEwiIoPjCicaMAxVrzgIHHaDYH6MQFnoECAoQAQ&usg=AOvVaw3Cl2cK7ZwU_jTyDeA5Yg1m"
15
+ )
16
+ st.image("src/images/design/challenge2.png",
17
+ caption= "Ted Cheeseman, Ken Southerland, Walter Reade, and Addison Howard. Happywhale - Whale and Dolphin Identification. https://kaggle.com/competitions/happy-whale-and-dolphin, 2022. Kaggle.")
18
+
19
+
20
+ st.link_button("Click here for the full challenge.",
21
+ url="https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.kaggle.com/competitions/humpback-whale-identification&ved=2ahUKEwiIoPjCicaMAxVrzgIHHaDYH6MQFnoECB8QAQ&usg=AOvVaw0IdiKQC3GpODtI-fBt-yV3"
22
+ )
23
+ st.image("src/images/design/challenge1.png",
24
+ caption ="Addison Howard, inversion, Ken Southerland, and Ted Cheeseman. Humpback Whale Identification. https://kaggle.com/competitions/humpback-whale-identification, 2018. Kaggle.")
src/pages/7_🌊_gallery.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(
4
+ page_title="ML Models",
5
+ page_icon="🌊",
6
+ layout="wide",
7
+ )
8
+ from utils.st_logs import parse_log_buffer, init_logging_session_states
9
+
10
+ import whale_gallery as gallery
11
+ import whale_viewer as viewer
12
+
13
+ # here we make a container to allow filtering css properties
14
+ # specific to the gallery (otherwise we get side effects)
15
+ tg_cont = st.container(key="swgallery")
16
+ with tg_cont:
17
+ gallery.render_whale_gallery(n_cols=4)
src/pages/8_🚧_coordinates.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import folium
3
+ from streamlit_folium import st_folium
4
+
5
+ st.set_page_config(
6
+ page_title="Coordinates",
7
+ page_icon="🚧",
8
+ layout="wide",
9
+ )
10
+
11
+ # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
12
+ st.markdown("Coming later! :construction:")
13
+ st.markdown(
14
+ """*The goal is to allow interactive definition for the coordinates of a new
15
+ observation, by click/drag points on the map.*""")
16
+
17
+
18
+ st.write("Click on the map to capture a location.")
19
+ #m = folium.Map(location=visp_loc, zoom_start=7)
20
+ mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
21
+ folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
22
+ ).add_to(mm)
23
+
24
+ st_data2 = st_folium(mm, width=725)
25
+ st.write("below the map...")
26
+ if st_data2['last_clicked'] is not None:
27
+ print(st_data2)
28
+ st.info(st_data2['last_clicked'])
src/pages/πŸ“Š_logs.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+
4
+ st.set_page_config(
5
+ page_title="Logs",
6
+ page_icon="πŸ“Š",
7
+ )
8
+
9
+ from utils.st_logs import parse_log_buffer
10
+
11
+ handler = st.session_state['handler']
12
+ if handler is not None:
13
+ records = parse_log_buffer(handler.buffer)
14
+ st.dataframe(records[::-1], use_container_width=True,)
15
+ st.info(f"Length of records: {len(records)}")
16
+ else:
17
+ st.error("⚠️ No log handler found!")
src/utils/metadata_handler.py CHANGED
@@ -11,10 +11,11 @@ def metadata2md(image_hash:str, debug:bool=False) -> str:
11
  str: Markdown-formatted key-value list of metadata
12
 
13
  """
 
14
  markdown_str = "\n"
15
  keys_to_print = ["author_email", "latitude", "longitude", "date", "time"]
16
  if debug:
17
- keys_to_print += ["iamge_md5", "selected_class", "top_prediction", "class_overriden"]
18
 
19
  observation = st.session_state.public_observations.get(image_hash, {})
20
 
 
11
  str: Markdown-formatted key-value list of metadata
12
 
13
  """
14
+ print(debug)
15
  markdown_str = "\n"
16
  keys_to_print = ["author_email", "latitude", "longitude", "date", "time"]
17
  if debug:
18
+ keys_to_print += ["image_md5", "selected_class", "top_prediction", "class_overriden"]
19
 
20
  observation = st.session_state.public_observations.get(image_hash, {})
21
 
src/utils/workflow_ui.py CHANGED
@@ -9,6 +9,11 @@ def init_workflow_session_states():
9
  if "workflow_fsm" not in st.session_state:
10
  # create and init the state machine
11
  st.session_state.workflow_fsm = WorkflowFSM(FSM_STATES)
 
 
 
 
 
12
 
13
  def refresh_progress_display() -> None:
14
  """
 
9
  if "workflow_fsm" not in st.session_state:
10
  # create and init the state machine
11
  st.session_state.workflow_fsm = WorkflowFSM(FSM_STATES)
12
+
13
+ if "input_disabled" not in st.session_state:
14
+ # after workflow reaches some stage, disable chance to change inputs
15
+ st.session_state.input_disabled = False
16
+
17
 
18
  def refresh_progress_display() -> None:
19
  """
src/whale_viewer.py CHANGED
@@ -157,4 +157,6 @@ def display_whale(whale_classes:List[str], i:int, viewcontainer:DeltaGenerator=N
157
  image_path = os.path.join(current_dir, "src/images/references/")
158
  image = Image.open(image_path + df_whale_img_ref.loc[whale_classes[i], "WHALE_IMAGES"])
159
 
160
- viewcontainer.image(image, caption=df_whale_img_ref.loc[whale_classes[i], "WHALE_REFERENCES"], use_column_width=True)
 
 
 
157
  image_path = os.path.join(current_dir, "src/images/references/")
158
  image = Image.open(image_path + df_whale_img_ref.loc[whale_classes[i], "WHALE_IMAGES"])
159
 
160
+ viewcontainer.image(image,
161
+ caption=df_whale_img_ref.loc[whale_classes[i], "WHALE_REFERENCES"],
162
+ use_column_width=True)
tests/{test_obs_map.py β†’ test_dataset_download.py} RENAMED
@@ -1,6 +1,6 @@
1
  import pytest
2
  from unittest.mock import patch, MagicMock
3
- from maps.obs_map import try_download_dataset
4
 
5
  # tests for try_download_dataset
6
  # - the main aim here is to mock the function load_dataset which makes external HTTP requests,
@@ -9,10 +9,11 @@ from maps.obs_map import try_download_dataset
9
  # is the return value, which should have similar form but change according to if an exception was raised or not
10
  # since this function uses st and m_logger to keep track of the download status, we need to mock them too
11
 
12
- @patch('maps.obs_map.load_dataset')
13
- @patch('maps.obs_map.st')
14
- @patch('maps.obs_map.m_logger')
15
- def test_try_download_dataset_success(mock_logger, mock_st, mock_load_dataset):
 
16
  # Mock the return value of load_dataset
17
  mock_load_dataset.return_value = {'train': {'latitude': [1], 'longitude': [2], 'predicted_class': ['whale']}}
18
 
@@ -25,13 +26,11 @@ def test_try_download_dataset_success(mock_logger, mock_st, mock_load_dataset):
25
  mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
26
  assert result == {'train': {'latitude': [1], 'longitude': [2], 'predicted_class': ['whale']}}
27
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
28
- mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
29
 
30
 
31
- @patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
32
- @patch('maps.obs_map.st')
33
- @patch('maps.obs_map.m_logger')
34
- def test_try_download_dataset_failure_known(mock_logger, mock_st, mock_load_dataset):
35
  # testing the case where we've found (can reproduce by removing network connection)
36
  dataset_id = "test_dataset"
37
  data_files = "test_file"
@@ -41,15 +40,12 @@ def test_try_download_dataset_failure_known(mock_logger, mock_st, mock_load_data
41
  mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
42
  mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
43
  mock_logger.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s).")
44
- mock_st.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s).")
45
  assert result == {}
46
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
47
- mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
48
 
49
- @patch('maps.obs_map.load_dataset', side_effect=Exception("Download engine corrupt"))
50
- @patch('maps.obs_map.st')
51
- @patch('maps.obs_map.m_logger')
52
- def test_try_download_dataset_failure_unknown(mock_logger, mock_st, mock_load_dataset):
53
  # the cases we haven't found, but should still be handled (maybe network error, etc)
54
  dataset_id = "test_dataset"
55
  data_files = "test_file"
@@ -59,7 +55,5 @@ def test_try_download_dataset_failure_unknown(mock_logger, mock_st, mock_load_da
59
  mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
60
  mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
61
  mock_logger.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt. (after 0.00s).")
62
- mock_st.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt. (after 0.00s).")
63
  assert result == {}
64
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
65
- mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
 
1
  import pytest
2
  from unittest.mock import patch, MagicMock
3
+ from dataset.download import try_download_dataset
4
 
5
  # tests for try_download_dataset
6
  # - the main aim here is to mock the function load_dataset which makes external HTTP requests,
 
9
  # is the return value, which should have similar form but change according to if an exception was raised or not
10
  # since this function uses st and m_logger to keep track of the download status, we need to mock them too
11
 
12
+ #@patch('maps.obs_map.load_dataset')
13
+ #@patch('maps.obs_map.m_logger')
14
+ @patch('dataset.download.load_dataset')
15
+ @patch('dataset.download.m_logger')
16
+ def test_try_download_dataset_success(mock_logger, mock_load_dataset):
17
  # Mock the return value of load_dataset
18
  mock_load_dataset.return_value = {'train': {'latitude': [1], 'longitude': [2], 'predicted_class': ['whale']}}
19
 
 
26
  mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
27
  assert result == {'train': {'latitude': [1], 'longitude': [2], 'predicted_class': ['whale']}}
28
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
 
29
 
30
 
31
+ @patch('dataset.download.load_dataset', side_effect=ValueError("Download failed"))
32
+ @patch('dataset.download.m_logger')
33
+ def test_try_download_dataset_failure_known(mock_logger, mock_load_dataset):
 
34
  # testing the case where we've found (can reproduce by removing network connection)
35
  dataset_id = "test_dataset"
36
  data_files = "test_file"
 
40
  mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
41
  mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
42
  mock_logger.error.assert_called_with("Error downloading dataset: Download failed. (after 0.00s).")
 
43
  assert result == {}
44
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
 
45
 
46
+ @patch('dataset.download.load_dataset', side_effect=Exception("Download engine corrupt"))
47
+ @patch('dataset.download.m_logger')
48
+ def test_try_download_dataset_failure_unknown(mock_logger, mock_load_dataset):
 
49
  # the cases we haven't found, but should still be handled (maybe network error, etc)
50
  dataset_id = "test_dataset"
51
  data_files = "test_file"
 
55
  mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
56
  mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
57
  mock_logger.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt. (after 0.00s).")
 
58
  assert result == {}
59
  mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
 
tests/test_demo_input_sidebar.py CHANGED
@@ -262,10 +262,10 @@ def test_two_input_files_realdata(mock_file_rv: MagicMock, mock_uploadedFile_Lis
262
  # and then there are plenty of visual elements, based on the image hashes.
263
  for hash in at.session_state.image_hashes:
264
  # check that each of the 4 inputs is present
265
- assert at.sidebar.text_input(key=f"input_latitude_{hash}") is not None
266
- assert at.sidebar.text_input(key=f"input_longitude_{hash}") is not None
267
- assert at.sidebar.date_input(key=f"input_date_{hash}") is not None
268
- assert at.sidebar.time_input(key=f"input_time_{hash}") is not None
269
 
270
  if 'demo_input_sidebar' in SCRIPT_UNDER_TEST:
271
  verify_metadata_in_demo_display(at, num_files)
 
262
  # and then there are plenty of visual elements, based on the image hashes.
263
  for hash in at.session_state.image_hashes:
264
  # check that each of the 4 inputs is present
265
+ assert at.sidebar.text_input(key=f"input_latitude_anchor_{hash}") is not None
266
+ assert at.sidebar.text_input(key=f"input_longitude_anchor_{hash}") is not None
267
+ assert at.sidebar.date_input(key=f"input_date_anchor_{hash}") is not None
268
+ assert at.sidebar.time_input(key=f"input_time_anchor_{hash}") is not None
269
 
270
  if 'demo_input_sidebar' in SCRIPT_UNDER_TEST:
271
  verify_metadata_in_demo_display(at, num_files)