Spaces:

Saving-Willy
/

saving-willy-dev

Running

App Files Files Community

rmm commited on Jan 31

Commit

8c4b1f7

1 Parent(s): aa998c4

chore: tidy up of workflow and debug clutter

Browse files

- basically all phases seem ok, almost ready for validation

Files changed (4) hide show

src/classifier/classifier_image.py +14 -8
src/hf_push_observations.py +18 -17
src/input/input_observation.py +1 -7
src/main.py +24 -21

src/classifier/classifier_image.py CHANGED Viewed

@@ -39,9 +39,10 @@ def cetacean_just_classify(cetacean_classifier):
         msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
         g_logger.info(msg)
-        # TODO: what is the difference between public and regular; and why is this not array-ready?
         st.session_state.public_observations[hash] = observation
-        st.write(f"*[D] Observation {hash} classified as {st.session_state.whale_prediction1[hash]}*")
 # func to show results and allow review
@@ -70,7 +71,7 @@ def cetacean_show_results_and_review():
             else:
                 pred1 = st.session_state.whale_prediction1[hash]
                 # get index of pred1 from WHALE_CLASSES, none if not present
-                print(f"[D] pred1: {pred1}")
                 ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
                 selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
@@ -79,7 +80,7 @@ def cetacean_show_results_and_review():
                 observation['class_overriden'] = selected_class # TODO: this should be boolean!
             st.session_state.public_observations[hash] = observation
-            st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
             # TODO: the metadata only fills properly if `validate` was clicked.
             st.markdown(metadata2md(hash))
@@ -91,7 +92,7 @@ def cetacean_show_results_and_review():
             whale_classes = observations[hash].top_predictions
             # render images for the top 3 (that is what the model api returns)
             n = len(whale_classes)
-            st.markdown(f"Top {n} Predictions for observation {str(o)}")
             for i in range(n):
                 viewer.display_whale(whale_classes, i)
         o += 1
@@ -134,9 +135,14 @@ def cetacean_show_results():
             #     observation['class_overriden'] = selected_class # TODO: this should be boolean!
             # st.session_state.public_observation = observation
-            st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
-            # TODO: the metadata only fills properly if `validate` was clicked.
             st.markdown(metadata2md(hash))
             st.markdown(f"- **hash**: {hash}")
             msg = f"[D] full observation after inference: {observation}"
@@ -147,7 +153,7 @@ def cetacean_show_results():
             whale_classes = observations[hash].top_predictions
             # render images for the top 3 (that is what the model api returns)
             n = len(whale_classes)
-            st.markdown(f"Top {n} Predictions for observation {str(o)}")
             for i in range(n):
                 viewer.display_whale(whale_classes, i)
         o += 1

         msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
         g_logger.info(msg)
+        # store the elements of the observation that will be transmitted (not image)
         st.session_state.public_observations[hash] = observation
+        if st.session_state.MODE_DEV_STATEFUL:
+            st.write(f"*[D] Observation {hash} classified as {st.session_state.whale_prediction1[hash]}*")
 # func to show results and allow review
             else:
                 pred1 = st.session_state.whale_prediction1[hash]
                 # get index of pred1 from WHALE_CLASSES, none if not present
+                print(f"[D] {o:3} pred1: {pred1:30} | {hash}")
                 ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
                 selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
                 observation['class_overriden'] = selected_class # TODO: this should be boolean!
             st.session_state.public_observations[hash] = observation
+            #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
             # TODO: the metadata only fills properly if `validate` was clicked.
             st.markdown(metadata2md(hash))
             whale_classes = observations[hash].top_predictions
             # render images for the top 3 (that is what the model api returns)
             n = len(whale_classes)
+            st.markdown(f"**Top {n} Predictions for observation {str(o)}**")
             for i in range(n):
                 viewer.display_whale(whale_classes, i)
         o += 1
             #     observation['class_overriden'] = selected_class # TODO: this should be boolean!
             # st.session_state.public_observation = observation
+            #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
+            #
             st.markdown(metadata2md(hash))
+            # TODO: FIXME: this is the data taht will get pushed -- it DOESN'T reflect any adjustments
+            # # made via the dropdown on the last step!!!!
+            #st.markdown(f"- **selected species**: {observation['predicted_class']}")
+            st.markdown(f"- **selected species**: {st.session_state.whale_prediction1[hash]}")
             st.markdown(f"- **hash**: {hash}")
             msg = f"[D] full observation after inference: {observation}"
             whale_classes = observations[hash].top_predictions
             # render images for the top 3 (that is what the model api returns)
             n = len(whale_classes)
+            st.markdown(f"**Top {n} Predictions for observation {str(o)}**")
             for i in range(n):
                 viewer.display_whale(whale_classes, i)
         o += 1

src/hf_push_observations.py CHANGED Viewed

@@ -13,7 +13,7 @@ LOG_LEVEL = logging.DEBUG
 g_logger = logging.getLogger(__name__)
 g_logger.setLevel(LOG_LEVEL)
-def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
     '''
     push one observation to the Hugging Face dataset
@@ -38,32 +38,33 @@ def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
     f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
     f.write(metadata_str)
     f.close()
-    st.info(f"temp file: {f.name} with metadata written...")
-    # observation['author_email']
-    # observation['image_md5']
     path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
     msg = f"fname: {f.name} | path: {path_in_repo}"
     print(msg)
     st.warning(msg)
-    rv = None # temp don't send anything
-    # rv = api.upload_file(
-    #     path_or_fileobj=f.name,
-    #     path_in_repo=path_in_repo,
-    #     repo_id="Saving-Willy/temp_dataset",
-    #     repo_type="dataset",
-    # )
-    # print(rv)
-    # msg = f"observation attempted tx to repo happy walrus: {rv}"
-    g_logger.info(msg)
-    st.info(msg)
     return rv
-def push_all_observations():
     '''
     open an API connection to Hugging Face, and push all observation one by one
     '''
@@ -74,7 +75,7 @@ def push_all_observations():
     # iterate over the list of observations
     for hash in st.session_state.public_observations.keys():
-        rv = push_observation(hash, api)

 g_logger = logging.getLogger(__name__)
 g_logger.setLevel(LOG_LEVEL)
+def push_observation(image_hash:str, api:HfApi, enable_push:False) -> CommitInfo:
     '''
     push one observation to the Hugging Face dataset
     f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
     f.write(metadata_str)
     f.close()
+    #st.info(f"temp file: {f.name} with metadata written...")
     path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
     msg = f"fname: {f.name} | path: {path_in_repo}"
     print(msg)
     st.warning(msg)
+    if enable_push:
+        rv = api.upload_file(
+            path_or_fileobj=f.name,
+            path_in_repo=path_in_repo,
+            repo_id="Saving-Willy/temp_dataset",
+            repo_type="dataset",
+        )
+        print(rv)
+        msg = f"observation attempted tx to repo happy walrus: {rv}"
+        g_logger.info(msg)
+        st.info(msg)
+    else:
+        rv = None # temp don't send anything
     return rv
+def push_all_observations(enable_push:bool=False):
     '''
     open an API connection to Hugging Face, and push all observation one by one
     '''
     # iterate over the list of observations
     for hash in st.session_state.public_observations.keys():
+        rv = push_observation(hash, api, enable_push=enable_push)

src/input/input_observation.py CHANGED Viewed

@@ -92,13 +92,7 @@ class InputObservation:
         raise DeprecationWarning("This method is deprecated. hash is a required constructor argument.")
         if not self.image_md5:
             self.image_md5 = hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5()
-			# new comment / hybj hunk
-            self._cprint(f"[D] Assigned image md5: {self.image_md5} for {self.uploaded_file}")
-    def _cprint(self, msg:str, color:str=OKGREEN):
-        """Print colored message"""
-        print(f"{color}{msg}{ENDC}")
     def __str__(self):
         _im_str = "None" if self.image is None else f"image dims: {self.image.shape}"

         raise DeprecationWarning("This method is deprecated. hash is a required constructor argument.")
         if not self.image_md5:
             self.image_md5 = hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5()
+            m_logger.debug(f"[D] Assigned image md5: {self.image_md5} for {self.uploaded_file}")
     def __str__(self):
         _im_str = "None" if self.image is None else f"image dims: {self.image.shape}"

src/main.py CHANGED Viewed

@@ -44,6 +44,11 @@ data_files = "data/train-00000-of-00001.parquet"
 USE_BASIC_MAP = False
 DEV_SIDEBAR_LIB = True
 # get a global var for logger accessor in this module
 LOG_LEVEL = logging.DEBUG
 g_logger = logging.getLogger(__name__)
@@ -249,7 +254,8 @@ def main() -> None:
     #
     with tab_inference:
-        dbg_show_obs_hashes()
         add_classifier_header()
         # if we are before data_entry_validated, show the button, disabled.
@@ -277,17 +283,16 @@ def main() -> None:
         elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
             # show the results, and allow manual validation
-            s = ""
-            for k, v in st.session_state.whale_prediction1.items():
-                s += f"* Image {k}: {v}\n"
-            st.markdown("""
-                        ### Inference Results and manual validation/adjustment
-                        :construction: for now we just show the num images processed.
-                        """)
-            st.markdown(s)
             # add a button to advance the state
-            if st.button("mock: manual validation done."):
                 st.session_state.workflow_fsm.complete_current_state()
                 # -> manual_inspection_completed
                 st.rerun()
@@ -296,27 +301,25 @@ def main() -> None:
         elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
             # show the ML results, and allow the user to upload the observation
-            st.markdown("""
-                        ### Inference Results (after manual validation)
-                        :construction: for now we just show the button.
-                        """)
-            if st.button("(nooop) Upload observation to THE INTERNET!"):
                 # let this go through to the push_all func, since it just reports to log for now.
-                push_all_observations()
                 st.session_state.workflow_fsm.complete_current_state()
                 # -> data_uploaded
             cetacean_show_results()
         elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
             # the data has been sent. Lets show the observations again
             # but no buttons to upload (or greyed out ok)
-            st.markdown("""
-                        ### Observation(s) uploaded
-                        :construction: for now we just show the observations.
-                        """)
             df = pd.DataFrame(st.session_state.observations, index=[0])
             st.table(df)

 USE_BASIC_MAP = False
 DEV_SIDEBAR_LIB = True
+# one toggle for all the extra debug text
+if "MODE_DEV_STATEFUL" not in st.session_state:
+    st.session_state.MODE_DEV_STATEFUL = False
 # get a global var for logger accessor in this module
 LOG_LEVEL = logging.DEBUG
 g_logger = logging.getLogger(__name__)
     #
     with tab_inference:
+        if st.session_state.MODE_DEV_STATEFUL:
+            dbg_show_obs_hashes()
         add_classifier_header()
         # if we are before data_entry_validated, show the button, disabled.
         elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
             # show the results, and allow manual validation
+            st.markdown("""### Inference results and manual validation/adjustment """)
+            if st.session_state.MODE_DEV_STATEFUL:
+                s = ""
+                for k, v in st.session_state.whale_prediction1.items():
+                    s += f"* Image {k}: {v}\n"
+                st.markdown(s)
             # add a button to advance the state
+            if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"):
                 st.session_state.workflow_fsm.complete_current_state()
                 # -> manual_inspection_completed
                 st.rerun()
         elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
             # show the ML results, and allow the user to upload the observation
+            st.markdown("""### Inference Results (after manual validation) """)
+            if st.button("Upload all observations to THE INTERNET!"):
                 # let this go through to the push_all func, since it just reports to log for now.
+                push_all_observations(enable_push=False)
                 st.session_state.workflow_fsm.complete_current_state()
                 # -> data_uploaded
+                st.rerun()
             cetacean_show_results()
         elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
             # the data has been sent. Lets show the observations again
             # but no buttons to upload (or greyed out ok)
+            st.markdown("""### Observation(s) uploaded - thank you!""")
+            cetacean_show_results()
+            st.divider()
             df = pd.DataFrame(st.session_state.observations, index=[0])
             st.table(df)