File size: 8,865 Bytes
2f87aad
 
 
581fadf
2f87aad
 
 
 
 
 
85c8b0f
2f87aad
 
 
 
 
 
 
 
 
 
aba41f2
2f87aad
 
 
 
 
85c8b0f
2f87aad
 
 
 
581fadf
 
 
 
 
 
 
 
 
 
2f87aad
 
 
 
5c07215
 
 
 
 
 
 
 
2f87aad
 
 
 
 
 
 
 
 
 
53eb5bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
958b383
53eb5bc
 
 
 
8609001
 
 
 
53eb5bc
 
2f87aad
53eb5bc
8609001
53eb5bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f87aad
53eb5bc
 
 
 
 
 
 
 
 
 
 
 
2f87aad
53eb5bc
 
 
 
 
 
 
 
 
2f87aad
53eb5bc
 
 
 
2f87aad
85c8b0f
53eb5bc
 
 
 
 
 
 
 
 
 
 
 
2f87aad
53eb5bc
 
 
 
 
 
 
 
 
2f87aad
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import streamlit as st
import os
import pandas as pd
import logging

st.set_page_config(
    page_title="ML Models",
    page_icon="πŸ”₯",
)

from utils.st_logs import init_logging_session_states

from transformers import pipeline
from transformers import AutoModelForImageClassification
from classifier.classifier_image import add_classifier_header 

from input.input_handling import setup_input, check_inputs_are_set
from input.input_handling import init_input_container_states, add_input_UI_elements, init_input_data_session_states
from input.input_handling import dbg_show_observation_hashes

from utils.workflow_ui import refresh_progress_display, init_workflow_viz, init_workflow_session_states
from dataset.hf_push_observations import push_all_observations

from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results, init_classifier_session_states
from classifier.classifier_hotdog import hotdog_classify

############################################################
classifier_name = "Saving-Willy/cetacean-classifier"
#classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6'
classifier_revision = 'main' # default/latest version
############################################################

g_logger = logging.getLogger(__name__)
# setup for the ML model on huggingface (our wrapper)
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
# one toggle for all the extra debug text
if "MODE_DEV_STATEFUL" not in st.session_state:
    st.session_state.MODE_DEV_STATEFUL = False

############################################################


 # Streamlit app
tab_inference, tab_hotdogs= \
    st.tabs(["Cetecean classifier", "Hotdog classifier"])

# initialise various session state variables
init_logging_session_states() # logging init should be early 
init_workflow_session_states() 
init_input_data_session_states()
init_input_container_states()
init_workflow_viz()
init_classifier_session_states()

# put this early so the progress indicator is at the top (also refreshed at end)
refresh_progress_display()    

# create a sidebar, and parse all the input (returned as `observations` object)
with st.sidebar:
    # layout handling
    add_input_UI_elements()
    # input elements (file upload, text input, etc)
    setup_input()

with tab_inference:
    if st.session_state.workflow_fsm.is_in_state('doing_data_entry'):
            # can we advance state? - only when all inputs are set for all uploaded files
            all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False)
            if all_inputs_set:
                st.session_state.workflow_fsm.complete_current_state()
                # -> data_entry_complete
            else: 
                # button, disabled; no state change yet.
                st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.")
                
        
    if st.session_state.workflow_fsm.is_in_state('data_entry_complete'):
        # can we advance state? - only when the validate button is pressed
        if st.sidebar.button(":white_check_mark:[**Validate**]"):
            # create a dictionary with the submitted observation

            g_logger.info(f"{st.session_state.observations}")

            df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
            # with tab_coords:
            #     st.table(df)
           
            # now disable all the input boxes / widgets
            st.session_state.input_disabled = True
            
            # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
            # hmm, maybe it should actually just be "I'm done with data entry"
            st.session_state.workflow_fsm.complete_current_state()
            # -> data_entry_validated
            st.rerun() # refresh so the input widgets are immediately disabled

    if st.session_state.MODE_DEV_STATEFUL:
                dbg_show_observation_hashes()

    add_classifier_header()
    # if we are before data_entry_validated, show the button, disabled.
    if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
        tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True, 
                            help="Please validate inputs before proceeding", 
                            key="button_infer_ceteans")

    if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
        # show the button, enabled. If pressed, we start the ML model (And advance state)
        if tab_inference.button("Identify with cetacean classifier", 
                                key="button_infer_ceteans"):
            cetacean_classifier = AutoModelForImageClassification.from_pretrained(
                classifier_name, 
                revision=classifier_revision, 
                trust_remote_code=True)

            cetacean_just_classify(cetacean_classifier)
            st.session_state.workflow_fsm.complete_current_state()
            # trigger a refresh too (refreshhing the prog indicator means the script reruns and 
            # we can enter the next state - visualising the results / review)
            # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
            refresh_progress_display()
            #TODO: validate this doesn't harm performance adversely.
            st.rerun()

    elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
        # show the results, and allow manual validation
        st.markdown("""### Inference results and manual validation/adjustment """)
        if st.session_state.MODE_DEV_STATEFUL:
            s = ""
            for k, v in st.session_state.whale_prediction1.items():
                s += f"* Image {k}: {v}\n"
                
            st.markdown(s)

        # add a button to advance the state
        if st.button("I have looked over predictions and confirm correct species", icon= "πŸ‘€",
                    type="primary",
                    help="Confirm that all species are selected correctly"):
            st.session_state.workflow_fsm.complete_current_state()
            # -> manual_inspection_completed
            st.rerun()
        
        cetacean_show_results_and_review()

    elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
        # show the ML results, and allow the user to upload the observation
        st.markdown("""### Inference Results (after manual validation) """)
        
        
        if st.button("Upload all observations to THE INTERNET!", icon= "⬆️",
                    type="primary",):
            # let this go through to the push_all func, since it just reports to log for now.
            push_all_observations(enable_push=False)
            st.session_state.workflow_fsm.complete_current_state()
            # -> data_uploaded
            st.rerun()

        cetacean_show_results()

    elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
        # the data has been sent. Lets show the observations again
        # but no buttons to upload (or greyed out ok)
        st.markdown("""### Observation(s) uploaded - thank you!""")
        cetacean_show_results()

        st.divider()
        df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
        st.table(df)

        # didn't decide what the next state is here - I think we are in the terminal state.
        #st.session_state.workflow_fsm.complete_current_state()
        
  
with tab_hotdogs:
    # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
    # purposes, an hotdog image classifier) which will be run locally.
    # - this model predicts if the image is a hotdog or not, and returns probabilities
    # - the input image is the same as for the ceteacean classifier - defined in the sidebar
    tab_hotdogs.title("Hot Dog? Or Not?")
    tab_hotdogs.write("""
                *Run alternative classifer on input images. Here we are using
                a binary classifier - hotdog or not - from
                huggingface.co/julien-c/hotdog-not-hotdog.*""")

    if tab_hotdogs.button("Get Hotdog Prediction"):   
        
        pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")

        if st.session_state.image is None:
            st.info("Please upload an image first.")
            #st.info(str(observations.to_dict()))
            
        else:
            hotdog_classify(pipeline_hot_dog, tab_hotdogs)
            
        
# after all other processing, we can show the stage/state
refresh_progress_display()