Spaces:
Running
Running
rmm
commited on
Commit
·
8c4b1f7
1
Parent(s):
aa998c4
chore: tidy up of workflow and debug clutter
Browse files- basically all phases seem ok, almost ready for validation
- src/classifier/classifier_image.py +14 -8
- src/hf_push_observations.py +18 -17
- src/input/input_observation.py +1 -7
- src/main.py +24 -21
src/classifier/classifier_image.py
CHANGED
@@ -39,9 +39,10 @@ def cetacean_just_classify(cetacean_classifier):
|
|
39 |
msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
|
40 |
g_logger.info(msg)
|
41 |
|
42 |
-
#
|
43 |
st.session_state.public_observations[hash] = observation
|
44 |
-
|
|
|
45 |
|
46 |
|
47 |
# func to show results and allow review
|
@@ -70,7 +71,7 @@ def cetacean_show_results_and_review():
|
|
70 |
else:
|
71 |
pred1 = st.session_state.whale_prediction1[hash]
|
72 |
# get index of pred1 from WHALE_CLASSES, none if not present
|
73 |
-
print(f"[D] pred1: {pred1}")
|
74 |
ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
|
75 |
selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
|
76 |
|
@@ -79,7 +80,7 @@ def cetacean_show_results_and_review():
|
|
79 |
observation['class_overriden'] = selected_class # TODO: this should be boolean!
|
80 |
|
81 |
st.session_state.public_observations[hash] = observation
|
82 |
-
st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
|
83 |
# TODO: the metadata only fills properly if `validate` was clicked.
|
84 |
st.markdown(metadata2md(hash))
|
85 |
|
@@ -91,7 +92,7 @@ def cetacean_show_results_and_review():
|
|
91 |
whale_classes = observations[hash].top_predictions
|
92 |
# render images for the top 3 (that is what the model api returns)
|
93 |
n = len(whale_classes)
|
94 |
-
st.markdown(f"Top {n} Predictions for observation {str(o)}")
|
95 |
for i in range(n):
|
96 |
viewer.display_whale(whale_classes, i)
|
97 |
o += 1
|
@@ -134,9 +135,14 @@ def cetacean_show_results():
|
|
134 |
# observation['class_overriden'] = selected_class # TODO: this should be boolean!
|
135 |
|
136 |
# st.session_state.public_observation = observation
|
137 |
-
|
138 |
-
#
|
|
|
139 |
st.markdown(metadata2md(hash))
|
|
|
|
|
|
|
|
|
140 |
st.markdown(f"- **hash**: {hash}")
|
141 |
|
142 |
msg = f"[D] full observation after inference: {observation}"
|
@@ -147,7 +153,7 @@ def cetacean_show_results():
|
|
147 |
whale_classes = observations[hash].top_predictions
|
148 |
# render images for the top 3 (that is what the model api returns)
|
149 |
n = len(whale_classes)
|
150 |
-
st.markdown(f"Top {n} Predictions for observation {str(o)}")
|
151 |
for i in range(n):
|
152 |
viewer.display_whale(whale_classes, i)
|
153 |
o += 1
|
|
|
39 |
msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
|
40 |
g_logger.info(msg)
|
41 |
|
42 |
+
# store the elements of the observation that will be transmitted (not image)
|
43 |
st.session_state.public_observations[hash] = observation
|
44 |
+
if st.session_state.MODE_DEV_STATEFUL:
|
45 |
+
st.write(f"*[D] Observation {hash} classified as {st.session_state.whale_prediction1[hash]}*")
|
46 |
|
47 |
|
48 |
# func to show results and allow review
|
|
|
71 |
else:
|
72 |
pred1 = st.session_state.whale_prediction1[hash]
|
73 |
# get index of pred1 from WHALE_CLASSES, none if not present
|
74 |
+
print(f"[D] {o:3} pred1: {pred1:30} | {hash}")
|
75 |
ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
|
76 |
selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
|
77 |
|
|
|
80 |
observation['class_overriden'] = selected_class # TODO: this should be boolean!
|
81 |
|
82 |
st.session_state.public_observations[hash] = observation
|
83 |
+
#st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
|
84 |
# TODO: the metadata only fills properly if `validate` was clicked.
|
85 |
st.markdown(metadata2md(hash))
|
86 |
|
|
|
92 |
whale_classes = observations[hash].top_predictions
|
93 |
# render images for the top 3 (that is what the model api returns)
|
94 |
n = len(whale_classes)
|
95 |
+
st.markdown(f"**Top {n} Predictions for observation {str(o)}**")
|
96 |
for i in range(n):
|
97 |
viewer.display_whale(whale_classes, i)
|
98 |
o += 1
|
|
|
135 |
# observation['class_overriden'] = selected_class # TODO: this should be boolean!
|
136 |
|
137 |
# st.session_state.public_observation = observation
|
138 |
+
|
139 |
+
#st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
|
140 |
+
#
|
141 |
st.markdown(metadata2md(hash))
|
142 |
+
# TODO: FIXME: this is the data taht will get pushed -- it DOESN'T reflect any adjustments
|
143 |
+
# # made via the dropdown on the last step!!!!
|
144 |
+
#st.markdown(f"- **selected species**: {observation['predicted_class']}")
|
145 |
+
st.markdown(f"- **selected species**: {st.session_state.whale_prediction1[hash]}")
|
146 |
st.markdown(f"- **hash**: {hash}")
|
147 |
|
148 |
msg = f"[D] full observation after inference: {observation}"
|
|
|
153 |
whale_classes = observations[hash].top_predictions
|
154 |
# render images for the top 3 (that is what the model api returns)
|
155 |
n = len(whale_classes)
|
156 |
+
st.markdown(f"**Top {n} Predictions for observation {str(o)}**")
|
157 |
for i in range(n):
|
158 |
viewer.display_whale(whale_classes, i)
|
159 |
o += 1
|
src/hf_push_observations.py
CHANGED
@@ -13,7 +13,7 @@ LOG_LEVEL = logging.DEBUG
|
|
13 |
g_logger = logging.getLogger(__name__)
|
14 |
g_logger.setLevel(LOG_LEVEL)
|
15 |
|
16 |
-
def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
|
17 |
'''
|
18 |
push one observation to the Hugging Face dataset
|
19 |
|
@@ -38,32 +38,33 @@ def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
|
|
38 |
f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
|
39 |
f.write(metadata_str)
|
40 |
f.close()
|
41 |
-
st.info(f"temp file: {f.name} with metadata written...")
|
42 |
|
43 |
-
# observation['author_email']
|
44 |
-
# observation['image_md5']
|
45 |
path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
|
46 |
|
47 |
msg = f"fname: {f.name} | path: {path_in_repo}"
|
48 |
print(msg)
|
49 |
st.warning(msg)
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
61 |
|
62 |
return rv
|
63 |
|
64 |
|
65 |
|
66 |
-
def push_all_observations():
|
67 |
'''
|
68 |
open an API connection to Hugging Face, and push all observation one by one
|
69 |
'''
|
@@ -74,7 +75,7 @@ def push_all_observations():
|
|
74 |
|
75 |
# iterate over the list of observations
|
76 |
for hash in st.session_state.public_observations.keys():
|
77 |
-
rv = push_observation(hash, api)
|
78 |
|
79 |
|
80 |
|
|
|
13 |
g_logger = logging.getLogger(__name__)
|
14 |
g_logger.setLevel(LOG_LEVEL)
|
15 |
|
16 |
+
def push_observation(image_hash:str, api:HfApi, enable_push:False) -> CommitInfo:
|
17 |
'''
|
18 |
push one observation to the Hugging Face dataset
|
19 |
|
|
|
38 |
f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
|
39 |
f.write(metadata_str)
|
40 |
f.close()
|
41 |
+
#st.info(f"temp file: {f.name} with metadata written...")
|
42 |
|
|
|
|
|
43 |
path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
|
44 |
|
45 |
msg = f"fname: {f.name} | path: {path_in_repo}"
|
46 |
print(msg)
|
47 |
st.warning(msg)
|
48 |
+
|
49 |
+
if enable_push:
|
50 |
+
rv = api.upload_file(
|
51 |
+
path_or_fileobj=f.name,
|
52 |
+
path_in_repo=path_in_repo,
|
53 |
+
repo_id="Saving-Willy/temp_dataset",
|
54 |
+
repo_type="dataset",
|
55 |
+
)
|
56 |
+
print(rv)
|
57 |
+
msg = f"observation attempted tx to repo happy walrus: {rv}"
|
58 |
+
g_logger.info(msg)
|
59 |
+
st.info(msg)
|
60 |
+
else:
|
61 |
+
rv = None # temp don't send anything
|
62 |
|
63 |
return rv
|
64 |
|
65 |
|
66 |
|
67 |
+
def push_all_observations(enable_push:bool=False):
|
68 |
'''
|
69 |
open an API connection to Hugging Face, and push all observation one by one
|
70 |
'''
|
|
|
75 |
|
76 |
# iterate over the list of observations
|
77 |
for hash in st.session_state.public_observations.keys():
|
78 |
+
rv = push_observation(hash, api, enable_push=enable_push)
|
79 |
|
80 |
|
81 |
|
src/input/input_observation.py
CHANGED
@@ -92,13 +92,7 @@ class InputObservation:
|
|
92 |
raise DeprecationWarning("This method is deprecated. hash is a required constructor argument.")
|
93 |
if not self.image_md5:
|
94 |
self.image_md5 = hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5()
|
95 |
-
|
96 |
-
# new comment / hybj hunk
|
97 |
-
self._cprint(f"[D] Assigned image md5: {self.image_md5} for {self.uploaded_file}")
|
98 |
-
|
99 |
-
def _cprint(self, msg:str, color:str=OKGREEN):
|
100 |
-
"""Print colored message"""
|
101 |
-
print(f"{color}{msg}{ENDC}")
|
102 |
|
103 |
def __str__(self):
|
104 |
_im_str = "None" if self.image is None else f"image dims: {self.image.shape}"
|
|
|
92 |
raise DeprecationWarning("This method is deprecated. hash is a required constructor argument.")
|
93 |
if not self.image_md5:
|
94 |
self.image_md5 = hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5()
|
95 |
+
m_logger.debug(f"[D] Assigned image md5: {self.image_md5} for {self.uploaded_file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
def __str__(self):
|
98 |
_im_str = "None" if self.image is None else f"image dims: {self.image.shape}"
|
src/main.py
CHANGED
@@ -44,6 +44,11 @@ data_files = "data/train-00000-of-00001.parquet"
|
|
44 |
USE_BASIC_MAP = False
|
45 |
DEV_SIDEBAR_LIB = True
|
46 |
|
|
|
|
|
|
|
|
|
|
|
47 |
# get a global var for logger accessor in this module
|
48 |
LOG_LEVEL = logging.DEBUG
|
49 |
g_logger = logging.getLogger(__name__)
|
@@ -249,7 +254,8 @@ def main() -> None:
|
|
249 |
#
|
250 |
with tab_inference:
|
251 |
|
252 |
-
|
|
|
253 |
|
254 |
add_classifier_header()
|
255 |
# if we are before data_entry_validated, show the button, disabled.
|
@@ -277,17 +283,16 @@ def main() -> None:
|
|
277 |
|
278 |
elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
|
279 |
# show the results, and allow manual validation
|
280 |
-
|
281 |
-
|
282 |
-
s
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
st.markdown(s)
|
289 |
# add a button to advance the state
|
290 |
-
if st.button("
|
291 |
st.session_state.workflow_fsm.complete_current_state()
|
292 |
# -> manual_inspection_completed
|
293 |
st.rerun()
|
@@ -296,27 +301,25 @@ def main() -> None:
|
|
296 |
|
297 |
elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
|
298 |
# show the ML results, and allow the user to upload the observation
|
299 |
-
st.markdown("""
|
300 |
-
### Inference Results (after manual validation)
|
301 |
-
:construction: for now we just show the button.
|
302 |
-
""")
|
303 |
|
304 |
|
305 |
-
if st.button("
|
306 |
# let this go through to the push_all func, since it just reports to log for now.
|
307 |
-
push_all_observations()
|
308 |
st.session_state.workflow_fsm.complete_current_state()
|
309 |
# -> data_uploaded
|
|
|
310 |
|
311 |
cetacean_show_results()
|
312 |
|
313 |
elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
|
314 |
# the data has been sent. Lets show the observations again
|
315 |
# but no buttons to upload (or greyed out ok)
|
316 |
-
st.markdown("""
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
df = pd.DataFrame(st.session_state.observations, index=[0])
|
321 |
st.table(df)
|
322 |
|
|
|
44 |
USE_BASIC_MAP = False
|
45 |
DEV_SIDEBAR_LIB = True
|
46 |
|
47 |
+
# one toggle for all the extra debug text
|
48 |
+
if "MODE_DEV_STATEFUL" not in st.session_state:
|
49 |
+
st.session_state.MODE_DEV_STATEFUL = False
|
50 |
+
|
51 |
+
|
52 |
# get a global var for logger accessor in this module
|
53 |
LOG_LEVEL = logging.DEBUG
|
54 |
g_logger = logging.getLogger(__name__)
|
|
|
254 |
#
|
255 |
with tab_inference:
|
256 |
|
257 |
+
if st.session_state.MODE_DEV_STATEFUL:
|
258 |
+
dbg_show_obs_hashes()
|
259 |
|
260 |
add_classifier_header()
|
261 |
# if we are before data_entry_validated, show the button, disabled.
|
|
|
283 |
|
284 |
elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
|
285 |
# show the results, and allow manual validation
|
286 |
+
st.markdown("""### Inference results and manual validation/adjustment """)
|
287 |
+
if st.session_state.MODE_DEV_STATEFUL:
|
288 |
+
s = ""
|
289 |
+
for k, v in st.session_state.whale_prediction1.items():
|
290 |
+
s += f"* Image {k}: {v}\n"
|
291 |
+
|
292 |
+
st.markdown(s)
|
293 |
+
|
|
|
294 |
# add a button to advance the state
|
295 |
+
if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"):
|
296 |
st.session_state.workflow_fsm.complete_current_state()
|
297 |
# -> manual_inspection_completed
|
298 |
st.rerun()
|
|
|
301 |
|
302 |
elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
|
303 |
# show the ML results, and allow the user to upload the observation
|
304 |
+
st.markdown("""### Inference Results (after manual validation) """)
|
|
|
|
|
|
|
305 |
|
306 |
|
307 |
+
if st.button("Upload all observations to THE INTERNET!"):
|
308 |
# let this go through to the push_all func, since it just reports to log for now.
|
309 |
+
push_all_observations(enable_push=False)
|
310 |
st.session_state.workflow_fsm.complete_current_state()
|
311 |
# -> data_uploaded
|
312 |
+
st.rerun()
|
313 |
|
314 |
cetacean_show_results()
|
315 |
|
316 |
elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
|
317 |
# the data has been sent. Lets show the observations again
|
318 |
# but no buttons to upload (or greyed out ok)
|
319 |
+
st.markdown("""### Observation(s) uploaded - thank you!""")
|
320 |
+
cetacean_show_results()
|
321 |
+
|
322 |
+
st.divider()
|
323 |
df = pd.DataFrame(st.session_state.observations, index=[0])
|
324 |
st.table(df)
|
325 |
|