Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

App Files Files Community

vancauwe commited on Apr 4

Commit

2f87aad

1 Parent(s): 828f42d

feat: content split to page and gallery maps good to go

Browse files

Files changed (18) hide show

requirements.txt +5 -0
src/home.py +47 -0
src/input/input_handling.py +1 -1
src/main.py +0 -16
src/maps/obs_map.py +1 -1
src/old_main.py +208 -214
src/pages/1_home.py +0 -0
src/pages/1_🐋_about.py +33 -0
src/pages/2_classifiers.py +0 -0
src/pages/2_🌍_map.py +44 -0
src/pages/3_benchmarking.py +0 -0
src/pages/3_🔥_classifiers.py +180 -0
src/pages/4_requests.py +0 -0
src/pages/4_🏆_benchmarking.py +8 -0
src/pages/5_🤝_requests.py +8 -0
src/pages/6_🌊_gallery.py +17 -0
src/pages/7_🚧_coordinates.py +28 -0
src/pages/📊_logs.py +17 -0

requirements.txt CHANGED Viewed

@@ -28,8 +28,13 @@ pillow==10.4.0
 opencv-python-headless==4.5.5.64
 albumentations==1.1.0
 # documentation: mkdocs
 mkdocs~=1.6.0
 mkdocstrings[python]>=0.25.1
 mkdocs-material~=9.5.27
 mkdocs-homepage-copier~=1.0.0

 opencv-python-headless==4.5.5.64
 albumentations==1.1.0
+# for states
+transitions
 # documentation: mkdocs
 mkdocs~=1.6.0
 mkdocstrings[python]>=0.25.1
 mkdocs-material~=9.5.27
 mkdocs-homepage-copier~=1.0.0

src/home.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import streamlit as st
+import os
+import logging
+st.set_page_config(
+    page_title="Home",
+    page_icon="🐳",
+)
+# one toggle for all the extra debug text
+if "MODE_DEV_STATEFUL" not in st.session_state:
+    st.session_state.MODE_DEV_STATEFUL = False
+# get a global var for logger accessor in this module
+LOG_LEVEL = logging.DEBUG
+g_logger = logging.getLogger(__name__)
+g_logger.setLevel(LOG_LEVEL)
+from utils.st_logs import init_logging_session_states
+from utils.workflow_ui import init_workflow_session_states, init_workflow_viz
+from input.input_handling import init_input_container_states, init_input_data_session_states
+from classifier.classifier_image import init_classifier_session_states
+# initialise various session state variables
+init_logging_session_states() # logging init should be early
+init_workflow_session_states()
+init_input_data_session_states()
+init_input_container_states()
+init_workflow_viz()
+init_classifier_session_states()
+st.write("# Welcome to Cetacean Research Data Infrastructure! 🐬˚˖𓍢ִ໋ 🐋✧˚.⋆")
+st.sidebar.success("Here are the pages.")
+st.markdown(
+    """
+    About: blablabla
+"""
+)
+g_logger.info("App started.")
+g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}")
+#g_logger.debug("debug message")
+#g_logger.info("info message")
+#g_logger.warning("warning message")

src/input/input_handling.py CHANGED Viewed

@@ -424,7 +424,7 @@ def add_input_UI_elements() -> None:
     # which are not created in the same order.
     st.divider()
-    st.title("Input image and data")
     # create and style a container for the file uploader/other one-off inputs
     st.markdown('<style>.st-key-container_file_uploader_id { border: 1px solid skyblue; border-radius: 5px; }</style>', unsafe_allow_html=True)

     # which are not created in the same order.
     st.divider()
+    st.title("Input your images")
     # create and style a container for the file uploader/other one-off inputs
     st.markdown('<style>.st-key-container_file_uploader_id { border: 1px solid skyblue; border-radius: 5px; }</style>', unsafe_allow_html=True)

src/main.py DELETED Viewed

@@ -1,16 +0,0 @@
-import streamlit as st
-st.set_page_config(
-    page_title="Home",
-    page_icon="🐳",
-)
-st.write("# Welcome to Cetacean Research Data Infrastructure! 🐬˚˖𓍢ִ໋ 🐋✧˚.⋆")
-st.sidebar.success("Here are the pages.")
-st.markdown(
-    """
-    About: blablabla
-"""
-)

src/maps/obs_map.py CHANGED Viewed

@@ -197,7 +197,7 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
         _df = pd.DataFrame({
             'lat': metadata["train"]["latitude"],
             'lon': metadata["train"]["longitude"],
-            'species': metadata["train"]["predicted_class"],}
         )
     if dbg_show_extra:

         _df = pd.DataFrame({
             'lat': metadata["train"]["latitude"],
             'lon': metadata["train"]["longitude"],
+            'species': metadata["train"]["selected_class"],}
         )
     if dbg_show_extra:

src/old_main.py CHANGED Viewed

@@ -6,60 +6,54 @@ import streamlit as st
 import folium
 from streamlit_folium import st_folium
-from transformers import pipeline
-from transformers import AutoModelForImageClassification
-from maps.obs_map import add_obs_map_header
-from classifier.classifier_image import add_classifier_header
-from datasets import disable_caching
-disable_caching()
-import whale_gallery as gallery
-import whale_viewer as viewer
-from input.input_handling import setup_input, check_inputs_are_set
-from input.input_handling import init_input_container_states, add_input_UI_elements, init_input_data_session_states
-from input.input_handling import dbg_show_observation_hashes
-from maps.alps_map import present_alps_map
-from maps.obs_map import present_obs_map
-from utils.st_logs import parse_log_buffer, init_logging_session_states
-from utils.workflow_ui import refresh_progress_display, init_workflow_viz, init_workflow_session_states
-from hf_push_observations import push_all_observations
-from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results, init_classifier_session_states
-from classifier.classifier_hotdog import hotdog_classify
-# setup for the ML model on huggingface (our wrapper)
-os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
 #classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6'
-classifier_revision = 'main' # default/latest version
-# and the dataset of observations (hf dataset in our space)
-dataset_id = "Saving-Willy/temp_dataset"
-data_files = "data/train-00000-of-00001.parquet"
-USE_BASIC_MAP = False
-DEV_SIDEBAR_LIB = True
-# one toggle for all the extra debug text
-if "MODE_DEV_STATEFUL" not in st.session_state:
-    st.session_state.MODE_DEV_STATEFUL = False
 # get a global var for logger accessor in this module
-LOG_LEVEL = logging.DEBUG
-g_logger = logging.getLogger(__name__)
-g_logger.setLevel(LOG_LEVEL)
-st.set_page_config(layout="wide")
-# initialise various session state variables
-init_logging_session_states() # logging init should be early
-init_workflow_session_states()
-init_input_data_session_states()
-init_input_container_states()
-init_workflow_viz()
-init_classifier_session_states()
 def main() -> None:
@@ -82,87 +76,87 @@ def main() -> None:
     """
-    g_logger.info("App started.")
-    g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}")
     #g_logger.debug("debug message")
     #g_logger.info("info message")
     #g_logger.warning("warning message")
     # Streamlit app
-    tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
-        st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
-    # put this early so the progress indicator is at the top (also refreshed at end)
-    refresh_progress_display()
-    # create a sidebar, and parse all the input (returned as `observations` object)
-    with st.sidebar:
-        # layout handling
-        add_input_UI_elements()
-        # input elements (file upload, text input, etc)
-        setup_input()
-    with tab_map:
-        # visual structure: a couple of toggles at the top, then the map inlcuding a
-        # dropdown for tileset selection.
-        add_obs_map_header()
-        tab_map_ui_cols = st.columns(2)
-        with tab_map_ui_cols[0]:
-            show_db_points = st.toggle("Show Points from DB", True)
-        with tab_map_ui_cols[1]:
-            dbg_show_extra = st.toggle("Show Extra points (test)", False)
-        if show_db_points:
-            # show a nicer map, observations marked, tileset selectable.
-            st_observation = present_obs_map(
-                dataset_id=dataset_id, data_files=data_files,
-                dbg_show_extra=dbg_show_extra)
-        else:
-            # development map.
-            st_observation = present_alps_map()
-    with tab_log:
-        handler = st.session_state['handler']
-        if handler is not None:
-            records = parse_log_buffer(handler.buffer)
-            st.dataframe(records[::-1], use_container_width=True,)
-            st.info(f"Length of records: {len(records)}")
-        else:
-            st.error("⚠️ No log handler found!")
-    with tab_coords:
-        # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
-        st.markdown("Coming later! :construction:")
-        st.markdown(
-            """*The goal is to allow interactive definition for the coordinates of a new
-            observation, by click/drag points on the map.*""")
-        st.write("Click on the map to capture a location.")
-        #m = folium.Map(location=visp_loc, zoom_start=7)
-        mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
-        folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
-    ).add_to(mm)
-        st_data2 = st_folium(mm, width=725)
-        st.write("below the map...")
-        if st_data2['last_clicked'] is not None:
-            print(st_data2)
-            st.info(st_data2['last_clicked'])
-    with tab_gallery:
-        # here we make a container to allow filtering css properties
-        # specific to the gallery (otherwise we get side effects)
-        tg_cont = st.container(key="swgallery")
-        with tg_cont:
-            gallery.render_whale_gallery(n_cols=4)
     # state handling re data_entry phases
@@ -173,30 +167,30 @@ def main() -> None:
     #    - at this point do we also want to disable changes to the metadata selectors?
     #    anyway, simple first.
-    if st.session_state.workflow_fsm.is_in_state('doing_data_entry'):
-        # can we advance state? - only when all inputs are set for all uploaded files
-        all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False)
-        if all_inputs_set:
-            st.session_state.workflow_fsm.complete_current_state()
-            # -> data_entry_complete
-        else:
-            # button, disabled; no state change yet.
-            st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.")
-    if st.session_state.workflow_fsm.is_in_state('data_entry_complete'):
-        # can we advance state? - only when the validate button is pressed
-        if st.sidebar.button(":white_check_mark:[**Validate**]"):
-            # create a dictionary with the submitted observation
-            tab_log.info(f"{st.session_state.observations}")
-            df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
-            #df = pd.DataFrame(st.session_state.observations, index=[0])
-            with tab_coords:
-                st.table(df)
-            # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
-            # hmm, maybe it should actually just be "I'm done with data entry"
-            st.session_state.workflow_fsm.complete_current_state()
-            # -> data_entry_validated
     # state handling re inference phases (tab_inference)
     # 3. validation button pressed, validation done -> enable the inference button.
@@ -205,114 +199,114 @@ def main() -> None:
     # 5. ML done -> show results, and manual validation options
     # 6. manual validation done -> enable the upload buttons
     #
-    with tab_inference:
-        # inside the inference tab, on button press we call the model (on huggingface hub)
-        # which will be run locally.
-        # - the model predicts the top 3 most likely species from the input image
-        # - these species are shown
-        # - the user can override the species prediction using the dropdown
-        # - an observation is uploaded if the user chooses.
-        if st.session_state.MODE_DEV_STATEFUL:
-            dbg_show_observation_hashes()
-        add_classifier_header()
-        # if we are before data_entry_validated, show the button, disabled.
-        if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
-            tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True,
-                                help="Please validate inputs before proceeding",
-                                key="button_infer_ceteans")
-        if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
-            # show the button, enabled. If pressed, we start the ML model (And advance state)
-            if tab_inference.button("Identify with cetacean classifier",
-                                    key="button_infer_ceteans"):
-                cetacean_classifier = AutoModelForImageClassification.from_pretrained(
-                    "Saving-Willy/cetacean-classifier",
-                    revision=classifier_revision,
-                    trust_remote_code=True)
-                cetacean_just_classify(cetacean_classifier)
-                st.session_state.workflow_fsm.complete_current_state()
-                # trigger a refresh too (refreshhing the prog indicator means the script reruns and
-                # we can enter the next state - visualising the results / review)
-                # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
-                refresh_progress_display()
-                #TODO: validate this doesn't harm performance adversely.
-                st.rerun()
-        elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
-            # show the results, and allow manual validation
-            st.markdown("""### Inference results and manual validation/adjustment """)
-            if st.session_state.MODE_DEV_STATEFUL:
-                s = ""
-                for k, v in st.session_state.whale_prediction1.items():
-                    s += f"* Image {k}: {v}\n"
-                st.markdown(s)
-            # add a button to advance the state
-            if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"):
-                st.session_state.workflow_fsm.complete_current_state()
-                # -> manual_inspection_completed
-                st.rerun()
-            cetacean_show_results_and_review()
-        elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
-            # show the ML results, and allow the user to upload the observation
-            st.markdown("""### Inference Results (after manual validation) """)
-            if st.button("Upload all observations to THE INTERNET!"):
-                # let this go through to the push_all func, since it just reports to log for now.
-                push_all_observations(enable_push=False)
-                st.session_state.workflow_fsm.complete_current_state()
-                # -> data_uploaded
-                st.rerun()
-            cetacean_show_results()
-        elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
-            # the data has been sent. Lets show the observations again
-            # but no buttons to upload (or greyed out ok)
-            st.markdown("""### Observation(s) uploaded - thank you!""")
-            cetacean_show_results()
-            st.divider()
-            #df = pd.DataFrame(st.session_state.observations, index=[0])
-            df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
-            st.table(df)
-            # didn't decide what the next state is here - I think we are in the terminal state.
-            #st.session_state.workflow_fsm.complete_current_state()
-    # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
-    # purposes, an hotdog image classifier) which will be run locally.
-    # - this model predicts if the image is a hotdog or not, and returns probabilities
-    # - the input image is the same as for the ceteacean classifier - defined in the sidebar
-    tab_hotdogs.title("Hot Dog? Or Not?")
-    tab_hotdogs.write("""
-                *Run alternative classifer on input images. Here we are using
-                a binary classifier - hotdog or not - from
-                huggingface.co/julien-c/hotdog-not-hotdog.*""")
-    if tab_hotdogs.button("Get Hotdog Prediction"):
-        pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
-        if st.session_state.image is None:
-            st.info("Please upload an image first.")
-            #st.info(str(observations.to_dict()))
-        else:
-            hotdog_classify(pipeline_hot_dog, tab_hotdogs)
-    # after all other processing, we can show the stage/state
-    refresh_progress_display()
 if __name__ == "__main__":

 import folium
 from streamlit_folium import st_folium
+# from transformers import pipeline
+# from transformers import AutoModelForImageClassification
+# from maps.obs_map import add_obs_map_header
+# from datasets import disable_caching
+# disable_caching()
+# import whale_gallery as gallery
+# import whale_viewer as viewer
+# from input.input_handling import setup_input, check_inputs_are_set
+# from input.input_handling import init_input_container_states, add_input_UI_elements, init_input_data_session_states
+# from input.input_handling import dbg_show_observation_hashes
+# from maps.alps_map import present_alps_map
+# from maps.obs_map import present_obs_map
+# from utils.st_logs import parse_log_buffer, init_logging_session_states
+# from utils.workflow_ui import refresh_progress_display, init_workflow_viz, init_workflow_session_states
+# from hf_push_observations import push_all_observations
+# from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results, init_classifier_session_states
+# from classifier.classifier_hotdog import hotdog_classify
+# # setup for the ML model on huggingface (our wrapper)
+# os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
 #classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6'
+# classifier_revision = 'main' # default/latest version
+# # and the dataset of observations (hf dataset in our space)
+# dataset_id = "Saving-Willy/temp_dataset"
+# data_files = "data/train-00000-of-00001.parquet"
+# USE_BASIC_MAP = False
+# DEV_SIDEBAR_LIB = True
+# # one toggle for all the extra debug text
+# if "MODE_DEV_STATEFUL" not in st.session_state:
+#     st.session_state.MODE_DEV_STATEFUL = False
 # get a global var for logger accessor in this module
+# LOG_LEVEL = logging.DEBUG
+# g_logger = logging.getLogger(__name__)
+# g_logger.setLevel(LOG_LEVEL)
+# st.set_page_config(layout="wide")
 def main() -> None:
     """
+    # g_logger.info("App started.")
+    # g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}")
     #g_logger.debug("debug message")
     #g_logger.info("info message")
     #g_logger.warning("warning message")
     # Streamlit app
+    # tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
+    #     st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
+    # # put this early so the progress indicator is at the top (also refreshed at end)
+    # refresh_progress_display()
+    # # create a sidebar, and parse all the input (returned as `observations` object)
+    # with st.sidebar:
+    #     # layout handling
+    #     add_input_UI_elements()
+    #     # input elements (file upload, text input, etc)
+    #     setup_input()
+    # with tab_map:
+    #     # visual structure: a couple of toggles at the top, then the map inlcuding a
+    #     # dropdown for tileset selection.
+    #     add_obs_map_header()
+    #     tab_map_ui_cols = st.columns(2)
+    #     with tab_map_ui_cols[0]:
+    #         show_db_points = st.toggle("Show Points from DB", True)
+    #     with tab_map_ui_cols[1]:
+    #         dbg_show_extra = st.toggle("Show Extra points (test)", False)
+    #     if show_db_points:
+    #         # show a nicer map, observations marked, tileset selectable.
+    #         st_observation = present_obs_map(
+    #             dataset_id=dataset_id, data_files=data_files,
+    #             dbg_show_extra=dbg_show_extra)
+    #     else:
+    #         # development map.
+    #         st_observation = present_alps_map()
+    # with tab_log:
+    #     handler = st.session_state['handler']
+    #     if handler is not None:
+    #         records = parse_log_buffer(handler.buffer)
+    #         st.dataframe(records[::-1], use_container_width=True,)
+    #         st.info(f"Length of records: {len(records)}")
+    #     else:
+    #         st.error("⚠️ No log handler found!")
+    # with tab_coords:
+    #     # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
+    #     st.markdown("Coming later! :construction:")
+    #     st.markdown(
+    #         """*The goal is to allow interactive definition for the coordinates of a new
+    #         observation, by click/drag points on the map.*""")
+    #     st.write("Click on the map to capture a location.")
+    #     #m = folium.Map(location=visp_loc, zoom_start=7)
+    #     mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
+    #     folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
+    # ).add_to(mm)
+    #     st_data2 = st_folium(mm, width=725)
+    #     st.write("below the map...")
+    #     if st_data2['last_clicked'] is not None:
+    #         print(st_data2)
+    #         st.info(st_data2['last_clicked'])
+    # with tab_gallery:
+        # # here we make a container to allow filtering css properties
+        # # specific to the gallery (otherwise we get side effects)
+        # tg_cont = st.container(key="swgallery")
+        # with tg_cont:
+        #     gallery.render_whale_gallery(n_cols=4)
     # state handling re data_entry phases
     #    - at this point do we also want to disable changes to the metadata selectors?
     #    anyway, simple first.
+    # if st.session_state.workflow_fsm.is_in_state('doing_data_entry'):
+    #     # can we advance state? - only when all inputs are set for all uploaded files
+    #     all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False)
+    #     if all_inputs_set:
+    #         st.session_state.workflow_fsm.complete_current_state()
+    #         # -> data_entry_complete
+    #     else:
+    #         # button, disabled; no state change yet.
+    #         st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.")
+    # if st.session_state.workflow_fsm.is_in_state('data_entry_complete'):
+    #     # can we advance state? - only when the validate button is pressed
+    #     if st.sidebar.button(":white_check_mark:[**Validate**]"):
+    #         # create a dictionary with the submitted observation
+    #         tab_log.info(f"{st.session_state.observations}")
+    #         df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
+    #         #df = pd.DataFrame(st.session_state.observations, index=[0])
+    #         with tab_coords:
+    #             st.table(df)
+    #         # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
+    #         # hmm, maybe it should actually just be "I'm done with data entry"
+    #         st.session_state.workflow_fsm.complete_current_state()
+    #         # -> data_entry_validated
     # state handling re inference phases (tab_inference)
     # 3. validation button pressed, validation done -> enable the inference button.
     # 5. ML done -> show results, and manual validation options
     # 6. manual validation done -> enable the upload buttons
     #
+    # with tab_inference:
+    #     # inside the inference tab, on button press we call the model (on huggingface hub)
+    #     # which will be run locally.
+    #     # - the model predicts the top 3 most likely species from the input image
+    #     # - these species are shown
+    #     # - the user can override the species prediction using the dropdown
+    #     # - an observation is uploaded if the user chooses.
+    #     if st.session_state.MODE_DEV_STATEFUL:
+    #         dbg_show_observation_hashes()
+    #     add_classifier_header()
+    #     # if we are before data_entry_validated, show the button, disabled.
+    #     if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
+    #         tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True,
+    #                             help="Please validate inputs before proceeding",
+    #                             key="button_infer_ceteans")
+    #     if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
+    #         # show the button, enabled. If pressed, we start the ML model (And advance state)
+    #         if tab_inference.button("Identify with cetacean classifier",
+    #                                 key="button_infer_ceteans"):
+    #             cetacean_classifier = AutoModelForImageClassification.from_pretrained(
+    #                 "Saving-Willy/cetacean-classifier",
+    #                 revision=classifier_revision,
+    #                 trust_remote_code=True)
+    #             cetacean_just_classify(cetacean_classifier)
+    #             st.session_state.workflow_fsm.complete_current_state()
+    #             # trigger a refresh too (refreshhing the prog indicator means the script reruns and
+    #             # we can enter the next state - visualising the results / review)
+    #             # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
+    #             refresh_progress_display()
+    #             #TODO: validate this doesn't harm performance adversely.
+    #             st.rerun()
+    #     elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
+    #         # show the results, and allow manual validation
+    #         st.markdown("""### Inference results and manual validation/adjustment """)
+    #         if st.session_state.MODE_DEV_STATEFUL:
+    #             s = ""
+    #             for k, v in st.session_state.whale_prediction1.items():
+    #                 s += f"* Image {k}: {v}\n"
+    #             st.markdown(s)
+    #         # add a button to advance the state
+    #         if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"):
+    #             st.session_state.workflow_fsm.complete_current_state()
+    #             # -> manual_inspection_completed
+    #             st.rerun()
+    #         cetacean_show_results_and_review()
+    #     elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
+    #         # show the ML results, and allow the user to upload the observation
+    #         st.markdown("""### Inference Results (after manual validation) """)
+    #         if st.button("Upload all observations to THE INTERNET!"):
+    #             # let this go through to the push_all func, since it just reports to log for now.
+    #             push_all_observations(enable_push=False)
+    #             st.session_state.workflow_fsm.complete_current_state()
+    #             # -> data_uploaded
+    #             st.rerun()
+    #         cetacean_show_results()
+    #     elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
+    #         # the data has been sent. Lets show the observations again
+    #         # but no buttons to upload (or greyed out ok)
+    #         st.markdown("""### Observation(s) uploaded - thank you!""")
+    #         cetacean_show_results()
+    #         st.divider()
+    #         #df = pd.DataFrame(st.session_state.observations, index=[0])
+    #         df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
+    #         st.table(df)
+    #         # didn't decide what the next state is here - I think we are in the terminal state.
+    #         #st.session_state.workflow_fsm.complete_current_state()
+    # # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
+    # # purposes, an hotdog image classifier) which will be run locally.
+    # # - this model predicts if the image is a hotdog or not, and returns probabilities
+    # # - the input image is the same as for the ceteacean classifier - defined in the sidebar
+    # tab_hotdogs.title("Hot Dog? Or Not?")
+    # tab_hotdogs.write("""
+    #             *Run alternative classifer on input images. Here we are using
+    #             a binary classifier - hotdog or not - from
+    #             huggingface.co/julien-c/hotdog-not-hotdog.*""")
+    # if tab_hotdogs.button("Get Hotdog Prediction"):
+    #     pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+    #     if st.session_state.image is None:
+    #         st.info("Please upload an image first.")
+    #         #st.info(str(observations.to_dict()))
+    #     else:
+    #         hotdog_classify(pipeline_hot_dog, tab_hotdogs)
+    # # after all other processing, we can show the stage/state
+    # refresh_progress_display()
 if __name__ == "__main__":

src/pages/1_home.py DELETED Viewed

File without changes

src/pages/1_🐋_about.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import streamlit as st
+st.set_page_config(
+    page_title="About",
+    page_icon="🐋",
+)
+st.markdown(
+    """
+# About
+    We created this web app in a hackathon.
+    This interface is a Proof of Concept of a Community-driven Research Data Infrastructure for the Cetacean Conservation Community.
+    Please reach out for feedback, suggestions, or if you want to join the project.
+# Open Source Resources
+    The space is hosted on Hugging Face.
+    The code is available on Github.
+    All model codes are open.
+# Credits and Thanks
+Developers:
+- Rob Mills
+- Laure Vancauwenberghe
+Thanks to:
+- EDMAKTUB for their advice.
+- SDSC for the hackathon that started the project.
+"""
+)

src/pages/2_classifiers.py DELETED Viewed

File without changes

src/pages/2_🌍_map.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import streamlit as st
+st.set_page_config(
+    page_title="About",
+    page_icon="🌍",
+    layout="wide",
+)
+from utils.st_logs import parse_log_buffer, init_logging_session_states
+from maps.obs_map import add_obs_map_header
+from maps.alps_map import present_alps_map
+from maps.obs_map import present_obs_map
+from datasets import disable_caching
+disable_caching()
+############################################################
+# TO- DO: MAKE ENV FILE
+# the dataset of observations (hf dataset in our space)
+dataset_id = "Saving-Willy/temp_dataset"
+data_files = "data/train-00000-of-00001.parquet"
+USE_BASIC_MAP = False
+DEV_SIDEBAR_LIB = True
+############################################################
+# visual structure: a couple of toggles at the top, then the map inlcuding a
+# dropdown for tileset selection.
+add_obs_map_header()
+tab_map_ui_cols = st.columns(2)
+with tab_map_ui_cols[0]:
+    show_db_points = st.toggle("Show Points from DB", True)
+with tab_map_ui_cols[1]:
+    dbg_show_extra = st.toggle("Show Extra points (test)", False)
+if show_db_points:
+    # show a nicer map, observations marked, tileset selectable.
+    st_observation = present_obs_map(
+        dataset_id=dataset_id, data_files=data_files,
+        dbg_show_extra=dbg_show_extra)
+else:
+    # development map.
+    st_observation = present_alps_map()

src/pages/3_benchmarking.py DELETED Viewed

File without changes

src/pages/3_🔥_classifiers.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import streamlit as st
+import os
+import pandas as pd
+st.set_page_config(
+    page_title="ML Models",
+    page_icon="🔥",
+)
+from utils.st_logs import parse_log_buffer, init_logging_session_states
+from transformers import pipeline
+from transformers import AutoModelForImageClassification
+from classifier.classifier_image import add_classifier_header
+from input.input_handling import setup_input, check_inputs_are_set
+from input.input_handling import init_input_container_states, add_input_UI_elements, init_input_data_session_states
+from input.input_handling import dbg_show_observation_hashes
+from utils.workflow_ui import refresh_progress_display, init_workflow_viz, init_workflow_session_states
+from hf_push_observations import push_all_observations
+from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results, init_classifier_session_states
+from classifier.classifier_hotdog import hotdog_classify
+# setup for the ML model on huggingface (our wrapper)
+os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
+############################################################
+# TO- DO: MAKE ENV FILE
+#classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6'
+classifier_revision = 'main' # default/latest version
+# and the dataset of observations (hf dataset in our space)
+dataset_id = "Saving-Willy/temp_dataset"
+data_files = "data/train-00000-of-00001.parquet"
+############################################################
+st.sidebar.success("classifiers")
+ # Streamlit app
+tab_inference, tab_hotdogs= \
+    st.tabs(["Cetecean classifier", "Hotdog classifier"])
+# put this early so the progress indicator is at the top (also refreshed at end)
+refresh_progress_display()
+# create a sidebar, and parse all the input (returned as `observations` object)
+with st.sidebar:
+    # layout handling
+    add_input_UI_elements()
+    # input elements (file upload, text input, etc)
+    setup_input()
+if st.session_state.workflow_fsm.is_in_state('doing_data_entry'):
+        # can we advance state? - only when all inputs are set for all uploaded files
+        all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False)
+        if all_inputs_set:
+            st.session_state.workflow_fsm.complete_current_state()
+            # -> data_entry_complete
+        else:
+            # button, disabled; no state change yet.
+            st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.")
+if st.session_state.workflow_fsm.is_in_state('data_entry_complete'):
+    # can we advance state? - only when the validate button is pressed
+    if st.sidebar.button(":white_check_mark:[**Validate**]"):
+        # create a dictionary with the submitted observation
+        # TO-DO NEED TO ADAPT.
+        #tab_log.info(f"{st.session_state.observations}")
+        df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
+        #df = pd.DataFrame(st.session_state.observations, index=[0])
+        # with tab_coords:
+        #     st.table(df)
+        # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
+        # hmm, maybe it should actually just be "I'm done with data entry"
+        st.session_state.workflow_fsm.complete_current_state()
+        # -> data_entry_validated
+if st.session_state.MODE_DEV_STATEFUL:
+            dbg_show_observation_hashes()
+add_classifier_header()
+# if we are before data_entry_validated, show the button, disabled.
+if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
+    tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True,
+                        help="Please validate inputs before proceeding",
+                        key="button_infer_ceteans")
+if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
+    # show the button, enabled. If pressed, we start the ML model (And advance state)
+    if tab_inference.button("Identify with cetacean classifier",
+                            key="button_infer_ceteans"):
+        cetacean_classifier = AutoModelForImageClassification.from_pretrained(
+            "Saving-Willy/cetacean-classifier",
+            revision=classifier_revision,
+            trust_remote_code=True)
+        cetacean_just_classify(cetacean_classifier)
+        st.session_state.workflow_fsm.complete_current_state()
+        # trigger a refresh too (refreshhing the prog indicator means the script reruns and
+        # we can enter the next state - visualising the results / review)
+        # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
+        refresh_progress_display()
+        #TODO: validate this doesn't harm performance adversely.
+        st.rerun()
+elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
+    # show the results, and allow manual validation
+    st.markdown("""### Inference results and manual validation/adjustment """)
+    if st.session_state.MODE_DEV_STATEFUL:
+        s = ""
+        for k, v in st.session_state.whale_prediction1.items():
+            s += f"* Image {k}: {v}\n"
+        st.markdown(s)
+    # add a button to advance the state
+    if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"):
+        st.session_state.workflow_fsm.complete_current_state()
+        # -> manual_inspection_completed
+        st.rerun()
+    cetacean_show_results_and_review()
+elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
+    # show the ML results, and allow the user to upload the observation
+    st.markdown("""### Inference Results (after manual validation) """)
+    if st.button("Upload all observations to THE INTERNET!"):
+        # let this go through to the push_all func, since it just reports to log for now.
+        push_all_observations(enable_push=False)
+        st.session_state.workflow_fsm.complete_current_state()
+        # -> data_uploaded
+        st.rerun()
+    cetacean_show_results()
+elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
+    # the data has been sent. Lets show the observations again
+    # but no buttons to upload (or greyed out ok)
+    st.markdown("""### Observation(s) uploaded - thank you!""")
+    cetacean_show_results()
+    st.divider()
+    #df = pd.DataFrame(st.session_state.observations, index=[0])
+    df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
+    st.table(df)
+    # didn't decide what the next state is here - I think we are in the terminal state.
+    #st.session_state.workflow_fsm.complete_current_state()
+# inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
+# purposes, an hotdog image classifier) which will be run locally.
+# - this model predicts if the image is a hotdog or not, and returns probabilities
+# - the input image is the same as for the ceteacean classifier - defined in the sidebar
+tab_hotdogs.title("Hot Dog? Or Not?")
+tab_hotdogs.write("""
+            *Run alternative classifer on input images. Here we are using
+            a binary classifier - hotdog or not - from
+            huggingface.co/julien-c/hotdog-not-hotdog.*""")
+if tab_hotdogs.button("Get Hotdog Prediction"):
+    pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+    if st.session_state.image is None:
+        st.info("Please upload an image first.")
+        #st.info(str(observations.to_dict()))
+    else:
+        hotdog_classify(pipeline_hot_dog, tab_hotdogs)
+# after all other processing, we can show the stage/state
+refresh_progress_display()

src/pages/4_requests.py DELETED Viewed

File without changes

src/pages/4_🏆_benchmarking.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import streamlit as st
+st.set_page_config(
+    page_title="Benchmarking",
+    page_icon="🏆",
+)
+from utils.st_logs import parse_log_buffer, init_logging_session_states

src/pages/5_🤝_requests.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import streamlit as st
+st.set_page_config(
+    page_title="Requests",
+    page_icon="🤝",
+)
+from utils.st_logs import parse_log_buffer, init_logging_session_states

src/pages/6_🌊_gallery.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import streamlit as st
+st.set_page_config(
+    page_title="ML Models",
+    page_icon="🌊",
+    layout="wide",
+)
+from utils.st_logs import parse_log_buffer, init_logging_session_states
+import whale_gallery as gallery
+import whale_viewer as viewer
+# here we make a container to allow filtering css properties
+# specific to the gallery (otherwise we get side effects)
+tg_cont = st.container(key="swgallery")
+with tg_cont:
+    gallery.render_whale_gallery(n_cols=4)

src/pages/7_🚧_coordinates.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import streamlit as st
+import folium
+from streamlit_folium import st_folium
+st.set_page_config(
+    page_title="Coordinates",
+    page_icon="🚧",
+    layout="wide",
+)
+# the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
+st.markdown("Coming later! :construction:")
+st.markdown(
+    """*The goal is to allow interactive definition for the coordinates of a new
+    observation, by click/drag points on the map.*""")
+st.write("Click on the map to capture a location.")
+#m = folium.Map(location=visp_loc, zoom_start=7)
+mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
+folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
+).add_to(mm)
+st_data2 = st_folium(mm, width=725)
+st.write("below the map...")
+if st_data2['last_clicked'] is not None:
+    print(st_data2)
+    st.info(st_data2['last_clicked'])

src/pages/📊_logs.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import streamlit as st
+import os
+st.set_page_config(
+    page_title="Logs",
+    page_icon="📊",
+)
+from utils.st_logs import parse_log_buffer
+handler = st.session_state['handler']
+if handler is not None:
+    records = parse_log_buffer(handler.buffer)
+    st.dataframe(records[::-1], use_container_width=True,)
+    st.info(f"Length of records: {len(records)}")
+else:
+    st.error("⚠️ No log handler found!")