rmm commited on
Commit
8c4b1f7
·
1 Parent(s): aa998c4

chore: tidy up of workflow and debug clutter

Browse files

- basically all phases seem ok, almost ready for validation

src/classifier/classifier_image.py CHANGED
@@ -39,9 +39,10 @@ def cetacean_just_classify(cetacean_classifier):
39
  msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
40
  g_logger.info(msg)
41
 
42
- # TODO: what is the difference between public and regular; and why is this not array-ready?
43
  st.session_state.public_observations[hash] = observation
44
- st.write(f"*[D] Observation {hash} classified as {st.session_state.whale_prediction1[hash]}*")
 
45
 
46
 
47
  # func to show results and allow review
@@ -70,7 +71,7 @@ def cetacean_show_results_and_review():
70
  else:
71
  pred1 = st.session_state.whale_prediction1[hash]
72
  # get index of pred1 from WHALE_CLASSES, none if not present
73
- print(f"[D] pred1: {pred1}")
74
  ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
75
  selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
76
 
@@ -79,7 +80,7 @@ def cetacean_show_results_and_review():
79
  observation['class_overriden'] = selected_class # TODO: this should be boolean!
80
 
81
  st.session_state.public_observations[hash] = observation
82
- st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
83
  # TODO: the metadata only fills properly if `validate` was clicked.
84
  st.markdown(metadata2md(hash))
85
 
@@ -91,7 +92,7 @@ def cetacean_show_results_and_review():
91
  whale_classes = observations[hash].top_predictions
92
  # render images for the top 3 (that is what the model api returns)
93
  n = len(whale_classes)
94
- st.markdown(f"Top {n} Predictions for observation {str(o)}")
95
  for i in range(n):
96
  viewer.display_whale(whale_classes, i)
97
  o += 1
@@ -134,9 +135,14 @@ def cetacean_show_results():
134
  # observation['class_overriden'] = selected_class # TODO: this should be boolean!
135
 
136
  # st.session_state.public_observation = observation
137
- st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
138
- # TODO: the metadata only fills properly if `validate` was clicked.
 
139
  st.markdown(metadata2md(hash))
 
 
 
 
140
  st.markdown(f"- **hash**: {hash}")
141
 
142
  msg = f"[D] full observation after inference: {observation}"
@@ -147,7 +153,7 @@ def cetacean_show_results():
147
  whale_classes = observations[hash].top_predictions
148
  # render images for the top 3 (that is what the model api returns)
149
  n = len(whale_classes)
150
- st.markdown(f"Top {n} Predictions for observation {str(o)}")
151
  for i in range(n):
152
  viewer.display_whale(whale_classes, i)
153
  o += 1
 
39
  msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
40
  g_logger.info(msg)
41
 
42
+ # store the elements of the observation that will be transmitted (not image)
43
  st.session_state.public_observations[hash] = observation
44
+ if st.session_state.MODE_DEV_STATEFUL:
45
+ st.write(f"*[D] Observation {hash} classified as {st.session_state.whale_prediction1[hash]}*")
46
 
47
 
48
  # func to show results and allow review
 
71
  else:
72
  pred1 = st.session_state.whale_prediction1[hash]
73
  # get index of pred1 from WHALE_CLASSES, none if not present
74
+ print(f"[D] {o:3} pred1: {pred1:30} | {hash}")
75
  ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
76
  selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
77
 
 
80
  observation['class_overriden'] = selected_class # TODO: this should be boolean!
81
 
82
  st.session_state.public_observations[hash] = observation
83
+ #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
84
  # TODO: the metadata only fills properly if `validate` was clicked.
85
  st.markdown(metadata2md(hash))
86
 
 
92
  whale_classes = observations[hash].top_predictions
93
  # render images for the top 3 (that is what the model api returns)
94
  n = len(whale_classes)
95
+ st.markdown(f"**Top {n} Predictions for observation {str(o)}**")
96
  for i in range(n):
97
  viewer.display_whale(whale_classes, i)
98
  o += 1
 
135
  # observation['class_overriden'] = selected_class # TODO: this should be boolean!
136
 
137
  # st.session_state.public_observation = observation
138
+
139
+ #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
140
+ #
141
  st.markdown(metadata2md(hash))
142
+ # TODO: FIXME: this is the data taht will get pushed -- it DOESN'T reflect any adjustments
143
+ # # made via the dropdown on the last step!!!!
144
+ #st.markdown(f"- **selected species**: {observation['predicted_class']}")
145
+ st.markdown(f"- **selected species**: {st.session_state.whale_prediction1[hash]}")
146
  st.markdown(f"- **hash**: {hash}")
147
 
148
  msg = f"[D] full observation after inference: {observation}"
 
153
  whale_classes = observations[hash].top_predictions
154
  # render images for the top 3 (that is what the model api returns)
155
  n = len(whale_classes)
156
+ st.markdown(f"**Top {n} Predictions for observation {str(o)}**")
157
  for i in range(n):
158
  viewer.display_whale(whale_classes, i)
159
  o += 1
src/hf_push_observations.py CHANGED
@@ -13,7 +13,7 @@ LOG_LEVEL = logging.DEBUG
13
  g_logger = logging.getLogger(__name__)
14
  g_logger.setLevel(LOG_LEVEL)
15
 
16
- def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
17
  '''
18
  push one observation to the Hugging Face dataset
19
 
@@ -38,32 +38,33 @@ def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
38
  f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
39
  f.write(metadata_str)
40
  f.close()
41
- st.info(f"temp file: {f.name} with metadata written...")
42
 
43
- # observation['author_email']
44
- # observation['image_md5']
45
  path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
46
 
47
  msg = f"fname: {f.name} | path: {path_in_repo}"
48
  print(msg)
49
  st.warning(msg)
50
- rv = None # temp don't send anything
51
- # rv = api.upload_file(
52
- # path_or_fileobj=f.name,
53
- # path_in_repo=path_in_repo,
54
- # repo_id="Saving-Willy/temp_dataset",
55
- # repo_type="dataset",
56
- # )
57
- # print(rv)
58
- # msg = f"observation attempted tx to repo happy walrus: {rv}"
59
- g_logger.info(msg)
60
- st.info(msg)
 
 
 
61
 
62
  return rv
63
 
64
 
65
 
66
- def push_all_observations():
67
  '''
68
  open an API connection to Hugging Face, and push all observation one by one
69
  '''
@@ -74,7 +75,7 @@ def push_all_observations():
74
 
75
  # iterate over the list of observations
76
  for hash in st.session_state.public_observations.keys():
77
- rv = push_observation(hash, api)
78
 
79
 
80
 
 
13
  g_logger = logging.getLogger(__name__)
14
  g_logger.setLevel(LOG_LEVEL)
15
 
16
+ def push_observation(image_hash:str, api:HfApi, enable_push:False) -> CommitInfo:
17
  '''
18
  push one observation to the Hugging Face dataset
19
 
 
38
  f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
39
  f.write(metadata_str)
40
  f.close()
41
+ #st.info(f"temp file: {f.name} with metadata written...")
42
 
 
 
43
  path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
44
 
45
  msg = f"fname: {f.name} | path: {path_in_repo}"
46
  print(msg)
47
  st.warning(msg)
48
+
49
+ if enable_push:
50
+ rv = api.upload_file(
51
+ path_or_fileobj=f.name,
52
+ path_in_repo=path_in_repo,
53
+ repo_id="Saving-Willy/temp_dataset",
54
+ repo_type="dataset",
55
+ )
56
+ print(rv)
57
+ msg = f"observation attempted tx to repo happy walrus: {rv}"
58
+ g_logger.info(msg)
59
+ st.info(msg)
60
+ else:
61
+ rv = None # temp don't send anything
62
 
63
  return rv
64
 
65
 
66
 
67
+ def push_all_observations(enable_push:bool=False):
68
  '''
69
  open an API connection to Hugging Face, and push all observation one by one
70
  '''
 
75
 
76
  # iterate over the list of observations
77
  for hash in st.session_state.public_observations.keys():
78
+ rv = push_observation(hash, api, enable_push=enable_push)
79
 
80
 
81
 
src/input/input_observation.py CHANGED
@@ -92,13 +92,7 @@ class InputObservation:
92
  raise DeprecationWarning("This method is deprecated. hash is a required constructor argument.")
93
  if not self.image_md5:
94
  self.image_md5 = hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5()
95
-
96
- # new comment / hybj hunk
97
- self._cprint(f"[D] Assigned image md5: {self.image_md5} for {self.uploaded_file}")
98
-
99
- def _cprint(self, msg:str, color:str=OKGREEN):
100
- """Print colored message"""
101
- print(f"{color}{msg}{ENDC}")
102
 
103
  def __str__(self):
104
  _im_str = "None" if self.image is None else f"image dims: {self.image.shape}"
 
92
  raise DeprecationWarning("This method is deprecated. hash is a required constructor argument.")
93
  if not self.image_md5:
94
  self.image_md5 = hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5()
95
+ m_logger.debug(f"[D] Assigned image md5: {self.image_md5} for {self.uploaded_file}")
 
 
 
 
 
 
96
 
97
  def __str__(self):
98
  _im_str = "None" if self.image is None else f"image dims: {self.image.shape}"
src/main.py CHANGED
@@ -44,6 +44,11 @@ data_files = "data/train-00000-of-00001.parquet"
44
  USE_BASIC_MAP = False
45
  DEV_SIDEBAR_LIB = True
46
 
 
 
 
 
 
47
  # get a global var for logger accessor in this module
48
  LOG_LEVEL = logging.DEBUG
49
  g_logger = logging.getLogger(__name__)
@@ -249,7 +254,8 @@ def main() -> None:
249
  #
250
  with tab_inference:
251
 
252
- dbg_show_obs_hashes()
 
253
 
254
  add_classifier_header()
255
  # if we are before data_entry_validated, show the button, disabled.
@@ -277,17 +283,16 @@ def main() -> None:
277
 
278
  elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
279
  # show the results, and allow manual validation
280
- s = ""
281
- for k, v in st.session_state.whale_prediction1.items():
282
- s += f"* Image {k}: {v}\n"
283
-
284
- st.markdown("""
285
- ### Inference Results and manual validation/adjustment
286
- :construction: for now we just show the num images processed.
287
- """)
288
- st.markdown(s)
289
  # add a button to advance the state
290
- if st.button("mock: manual validation done."):
291
  st.session_state.workflow_fsm.complete_current_state()
292
  # -> manual_inspection_completed
293
  st.rerun()
@@ -296,27 +301,25 @@ def main() -> None:
296
 
297
  elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
298
  # show the ML results, and allow the user to upload the observation
299
- st.markdown("""
300
- ### Inference Results (after manual validation)
301
- :construction: for now we just show the button.
302
- """)
303
 
304
 
305
- if st.button("(nooop) Upload observation to THE INTERNET!"):
306
  # let this go through to the push_all func, since it just reports to log for now.
307
- push_all_observations()
308
  st.session_state.workflow_fsm.complete_current_state()
309
  # -> data_uploaded
 
310
 
311
  cetacean_show_results()
312
 
313
  elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
314
  # the data has been sent. Lets show the observations again
315
  # but no buttons to upload (or greyed out ok)
316
- st.markdown("""
317
- ### Observation(s) uploaded
318
- :construction: for now we just show the observations.
319
- """)
320
  df = pd.DataFrame(st.session_state.observations, index=[0])
321
  st.table(df)
322
 
 
44
  USE_BASIC_MAP = False
45
  DEV_SIDEBAR_LIB = True
46
 
47
+ # one toggle for all the extra debug text
48
+ if "MODE_DEV_STATEFUL" not in st.session_state:
49
+ st.session_state.MODE_DEV_STATEFUL = False
50
+
51
+
52
  # get a global var for logger accessor in this module
53
  LOG_LEVEL = logging.DEBUG
54
  g_logger = logging.getLogger(__name__)
 
254
  #
255
  with tab_inference:
256
 
257
+ if st.session_state.MODE_DEV_STATEFUL:
258
+ dbg_show_obs_hashes()
259
 
260
  add_classifier_header()
261
  # if we are before data_entry_validated, show the button, disabled.
 
283
 
284
  elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
285
  # show the results, and allow manual validation
286
+ st.markdown("""### Inference results and manual validation/adjustment """)
287
+ if st.session_state.MODE_DEV_STATEFUL:
288
+ s = ""
289
+ for k, v in st.session_state.whale_prediction1.items():
290
+ s += f"* Image {k}: {v}\n"
291
+
292
+ st.markdown(s)
293
+
 
294
  # add a button to advance the state
295
+ if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"):
296
  st.session_state.workflow_fsm.complete_current_state()
297
  # -> manual_inspection_completed
298
  st.rerun()
 
301
 
302
  elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
303
  # show the ML results, and allow the user to upload the observation
304
+ st.markdown("""### Inference Results (after manual validation) """)
 
 
 
305
 
306
 
307
+ if st.button("Upload all observations to THE INTERNET!"):
308
  # let this go through to the push_all func, since it just reports to log for now.
309
+ push_all_observations(enable_push=False)
310
  st.session_state.workflow_fsm.complete_current_state()
311
  # -> data_uploaded
312
+ st.rerun()
313
 
314
  cetacean_show_results()
315
 
316
  elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
317
  # the data has been sent. Lets show the observations again
318
  # but no buttons to upload (or greyed out ok)
319
+ st.markdown("""### Observation(s) uploaded - thank you!""")
320
+ cetacean_show_results()
321
+
322
+ st.divider()
323
  df = pd.DataFrame(st.session_state.observations, index=[0])
324
  st.table(df)
325