Spaces:

ebrowne
/

retrieval-study

Sleeping

App Files Files Community

elibrowne commited on Aug 27, 2024

Commit

61aa64b

1 Parent(s): 70590fe

State?

Browse files

Files changed (1) hide show

app.py +149 -61

app.py CHANGED Viewed

@@ -8,13 +8,18 @@ from huggingface_hub import hf_hub_download, file_exists, HfApi
 from random import shuffle
 from markdown import markdown
-# Global variables which interact with loading and unloading
 user_data = {}
 current_response = {}
 current_question = {} # read-only within gradio blocks
 user_id = "no_id"
-qIDs = ["mbe_46", "mbe_132", "mbe_287", "mbe_326", "mbe_334", "mbe_389", "mbe_563", "mbe_614", "mbe_642", "mbe_747", "mbe_779", "mbe_826", "mbe_845", "mbe_1042", "mbe_1134"]
-mode_options = ["e5", "colbert"]
 # Control global variables
 step = 0
 mode = 1
@@ -99,6 +104,8 @@ def load_current_question():
         current_question = all_questions[qid]
         reset_current_response(user_data["order"][q_index])
 # THEMING: colors and styles (Gradio native)
 theme = gr.themes.Soft(
@@ -111,6 +118,92 @@ theme = gr.themes.Soft(
 # BLOCKS: main user interface
 with gr.Blocks(theme = theme) as user_eval:
     # Title text introducing study
     forward_btn = gr.Textbox("unchanged", visible = False, elem_id = "togglebutton") # used for toggling windows
     gr.HTML("""
@@ -165,15 +258,12 @@ with gr.Blocks(theme = theme) as user_eval:
             eval_satisfied = gr.Slider(1, 5, step = 0.5, label = "User Satisfaction", value = 3)
             btn_g = gr.Button("Next")
-        def next_p(e0, e1, e2, e3):
-            global step
-            global mode
-            global current_response
-            step += 1
             # Add user data to the current response
             current_response["e5_scores"].append([e0, e1, e2, e3])
             # Next item
-            if step == len(current_question["top10_e5"]): # should always be 10
                 # Step 10: all sources
                 collapsible_string = "<h2> Set of Passages </h2>\n"
                 for i, passage in enumerate(current_question["top10_e5"]):
@@ -188,7 +278,10 @@ with gr.Blocks(theme = theme) as user_eval:
                     eval_0: gr.Radio(value = None),
                     eval_1: gr.Slider(value = 3),
                     eval_2: gr.Slider(value = 3),
-                    eval_3: gr.Slider(value = 0)
                 }
             else:
                 return {
@@ -198,15 +291,14 @@ with gr.Blocks(theme = theme) as user_eval:
                     eval_0: gr.Radio(value = None),
                     eval_1: gr.Slider(value = 3),
                     eval_2: gr.Slider(value = 3),
-                    eval_3: gr.Slider(value = 0)
                 }
-        def next_g(e_h, e_s):
-            global step
-            global mode
-            global user_data
-            global current_response
-            step += 1
             if step == 11:
                 # Step 11: guaranteed to be generation
@@ -218,22 +310,28 @@ with gr.Blocks(theme = theme) as user_eval:
                         <h2> Autogenerated Response </h2>
                         <p>""" + markdown(current_question["generation_e5"]) + "</p>"),
                     eval_helps: gr.Slider(value = 0),
-                    eval_satisfied: gr.Slider(value = 3)
                 }
             # Steps 12 and 13 are gold passage + gold passage generation IF it is applicable
             if step > 11: # and not current_question["top10_contains_gold_passage"]
                 # When mode is 0 -> reset with mode = 1
                 if mode == 0:
                     # The user just evaluated a generation for mode 0
                     current_response[user_data["modes"][user_data["current"]][mode] + "_generation"] = [e_h, e_s]
                     return {
-                        selection: gr.HTML("""
                                 <h2> Retrieved Passage </h2>
-                                <p> """ + current_question["top10_" + user_data["modes"][user_data["current"]][1]][0] + "</p>"), # hard coded: first passage (0) of mode 2 (1),
                         forward_btn: gr.Textbox("load new data"),
                         eval_helps: gr.Slider(value = 0),
                         eval_satisfied: gr.Slider(value = 3)
                     }
                 # When mode is 1 -> display GP and GP generation, then switch
                 if step == 12:
                     # The user just evaluated a generation for mode 1
@@ -245,7 +343,11 @@ with gr.Blocks(theme = theme) as user_eval:
                             <p> """ + current_question["gold_passage"] + "</p>"),
                         forward_btn: gr.Textbox(),
                         eval_helps: gr.Slider(value = 0),
-                        eval_satisfied: gr.Slider(value = 3)
                     }
                 elif step == 13:
                    # The user just evaluated the gold passage
@@ -256,7 +358,11 @@ with gr.Blocks(theme = theme) as user_eval:
                             <p> """ + markdown(current_question["gold_passage_generation"]) + "</p>"),
                         forward_btn: gr.Textbox(),
                         eval_helps: gr.Slider(value = 0),
-                        eval_satisfied: gr.Slider(value = 3)
                    }
                 else: # step = 14
                     # The user just evaluated the gold passage generation
@@ -269,37 +375,15 @@ with gr.Blocks(theme = theme) as user_eval:
                         selection: gr.Markdown("Advancing to the next question..."),
                         forward_btn: gr.Textbox("changed" + str(user_data["current"])), # current forces event to trigger always
                         eval_helps: gr.Slider(value = 0),
-                        eval_satisfied: gr.Slider(value = 3)
                     }
-            # VERY UNCLEAN CODE: for practical purposes, this else block is unreachable: not current_question["top10_contains_gold_passage"] will always be True
-            """
-            else:
-                # When mode is 0 -> reset with mode = 1
-                if mode == 0:
-                    return {
-                        selection: gr.HTML(\"""
-                                <h2> Retrieved Passage </h2>
-                                <p> \""" + current_question["top10_" + user_data["modes"][user_data["current"]][1]][0] + "</p>"), # hard coded: first passage (0) of mode 2 (1)
-                        forward_btn: gr.Textbox("load new data"),
-                        eval_helps: gr.Slider(value = 1),
-                        eval_satisfied: gr.Slider(value = 1)
-                    }
-                # When mode is 1 -> change question
-                user_data["current"] += 1
-                user_data["responses"].append(current_response) # adds new answers to current list of responses
-                # Update stored data with new current, additional data
-                update_huggingface(user_id)
-                load_current_question()
-                return {
-                    selection: gr.Markdown("Advancing to the next question..."),
-                    forward_btn: gr.Textbox("changed"),
-                    eval_helps: gr.Slider(value = 1),
-                    eval_satisfied: gr.Slider(value = 1)
-                }
-                """
-        btn_p.click(fn = next_p, inputs = [eval_0, eval_1, eval_2, eval_3], outputs = [selection, scores_p, scores_g, eval_0, eval_1, eval_2, eval_3])
-        btn_g.click(fn = next_g, inputs = [eval_helps, eval_satisfied], outputs = [selection, forward_btn, eval_helps, eval_satisfied])
     # Question and answering dynamics
     with gr.Row(equal_height = False, visible = False) as question:
@@ -346,9 +430,7 @@ with gr.Blocks(theme = theme) as user_eval:
             c.click(fn = answer_c, outputs = [question, evals])
             d.click(fn = answer_d, outputs = [question, evals])
-    def toggle():
-        global step
-        global mode
         step = 0
         if mode == 0: # temporarily disabled — will never be mode 0
             mode = 1 # update mode to 1, will restart with same Q, next set of Ps
@@ -358,6 +440,8 @@ with gr.Blocks(theme = theme) as user_eval:
                 scores_g: gr.Column(visible = False),
                 evals: gr.Row(visible = True),
                 question: gr.Row(visible = False),
             }
         else:
             # reset mode to 0, will restart with new Q (set up new Q), first set of Ps
@@ -388,10 +472,12 @@ with gr.Blocks(theme = theme) as user_eval:
                         + """ + new_answers[3]),
                 selection: gr.HTML("""
                     <h2> Retrieved Passage </h2>
-                    <p> """ + current_question["top10_e5"][0] + "</p>")
             }                                   # note change from "top10_" + user_data["modes"][user_data["current"]][mode]][0]
-    forward_btn.change(fn = toggle, inputs = None, outputs = [scores_p, scores_g, evals, question, q_text, a, b, c, d, passage_display, selection])
     with gr.Row() as login:
         with gr.Column():
@@ -408,11 +494,10 @@ with gr.Blocks(theme = theme) as user_eval:
             email.change(fn = sanitize_login, inputs = [email], outputs = [s])
             def submit_email(email):
-                global user_id
                 user_id = email
-                load_user_data(user_id) # calls login, downloads data, initializes session
                 # After loading user data, update with current question
-                load_current_question()
                 new_answers = current_question["answers"].copy()
                 new_answers[current_question["correct_answer_index"]] = "**" + current_question["answers"][current_question["correct_answer_index"]] + "** ✅"
                 return {
@@ -437,9 +522,12 @@ with gr.Blocks(theme = theme) as user_eval:
                     a: gr.Button(current_question["answers"][0]),
                     b: gr.Button(current_question["answers"][1]),
                     c: gr.Button(current_question["answers"][2]),
-                    d: gr.Button(current_question["answers"][3])
                 }
-            s.click(fn = submit_email, inputs = [email], outputs = [question, login, selection, passage_display, q_text, a, b, c, d])
 # Starts on question, switches to evaluation after the user answers
 user_eval.launch()

 from random import shuffle
 from markdown import markdown
+# Read-only reference variables
+qIDs = ["mbe_46", "mbe_132", "mbe_287", "mbe_326", "mbe_334", "mbe_389", "mbe_563", "mbe_614", "mbe_642", "mbe_747", "mbe_779", "mbe_826", "mbe_845", "mbe_1042", "mbe_1134"]
+mode_options = ["e5", "colbert"]
+with open("question_data.json", "r") as f:
+    all_questions = json.load(f)
+"""
+# State variables which interact with loading and unloading
 user_data = {}
 current_response = {}
 current_question = {} # read-only within gradio blocks
 user_id = "no_id"
 # Control global variables
 step = 0
 mode = 1
         current_question = all_questions[qid]
         reset_current_response(user_data["order"][q_index])
+"""
 # THEMING: colors and styles (Gradio native)
 theme = gr.themes.Soft(
 # BLOCKS: main user interface
 with gr.Blocks(theme = theme) as user_eval:
+    # ALL VARIABLES AND LOADING
+    # State variables which interact with loading and unloading
+    user_data = gr.State({})
+    current_response = gr.State({})
+    current_question = gr.State({}) # read-only within gradio blocks
+    user_id = gr.State("no_id")
+    # Control global variables
+    step = gr.State(0)
+    mode = 1 # mode is always 1 for now
+    def load_user_data(id):
+        filename = id.replace('@', '_AT_').replace('.', '_DOT_')
+        if file_exists(filename = "users/" + filename + ".json", repo_id = "ebrowne/test-data", repo_type = "dataset", token = os.getenv("HF_TOKEN")):
+            print("File exists, downloading data.")
+            # If the ID exists, download the file from HuggingFace
+            path = hf_hub_download(repo_id = "ebrowne/test-data", token = os.getenv("HF_TOKEN"), filename = "users/" + filename + ".json", repo_type = "dataset")
+            # Add their current status to user_data
+            with open(path, "r") as f:
+                return json.load(f)
+        else:
+            # If the ID doesn't exist, create a format for the file and upload it to HuggingFace
+            print("File does not exist, creating user.")
+            shuffle(qIDs)
+            modes = []
+            for i in range(len(qIDs)):
+                temp = mode_options[:]
+                shuffle(temp)
+                modes.append(temp)
+            # This is the format for a user's file on HuggingFace
+            return {
+                "user_id": id, # original in email format, which was passed here
+                "order": qIDs, # randomized order for each user
+                "modes": modes, # randomized order for each user
+                "current": 0, # user starts on first question
+                "responses": [] # formatted as a list of current_responses
+            }
+            # No longer uploading after first creation: user must answer question for that.
+    def update_huggingface(id, data):
+        print("Updating data...")
+        filename = id.replace('@', '_AT_').replace('.', '_DOT_')
+        # Create a local file that will be uploaded to HuggingFace
+        with open(filename + ".json", "w") as f:
+            json.dump(data, f)
+        # Upload to hub (overwriting existing files...)
+        api = HfApi()
+        api.upload_file(
+            path_or_fileobj=filename + ".json",
+            path_in_repo="users/" + filename + ".json",
+            repo_id="ebrowne/test-data",
+            repo_type="dataset",
+            token = os.getenv("HF_TOKEN")
+        )
+    def reset_current_response(qid, user_id):
+        return {
+            current_response : {
+                "user_id": user_id,
+                "question_id": qid,
+                "user_answer": 0,
+                "e5_scores": [], # list of ten [score, score, score, score]
+                "e5_set": [], # two values
+                "e5_generation": [], # two values
+                "colbert_scores": [],
+                "colbert_set": [],
+                "colbert_generation": [],
+                "gold_set": [],
+                "gold_generation": []
+            }
+        }
+    # Loads the user's current question — this is the first question that the user has not made any progress on.
+    def load_current_question():
+        q_index = user_data["current"]
+        if q_index >= len(all_questions):
+            print("Done")
+            gr.Info("You've finished — thank you so much! There are no more questions. :)")
+            reset_current_response("USER FINISHED")
+            return {"question": "You're done! Thanks so much for your help.", "answers": ["I want to log out now.", "I want to keep answering questions.","I want to keep answering questions.", "I want to keep answering questions."], "correct_answer_index": 0, "top10_e5": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_e5": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_colbert": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_colbert": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_contains_gold_passage": False, "gold_passage": "GOLD PASSAGE: LOG OFF!", "gold_passage_generation": "what do you gain"}
+        else:
+            qid = user_data["order"][q_index]
+            reset_current_response(user_data["order"][q_index])
+            return all_questions[qid]
     # Title text introducing study
     forward_btn = gr.Textbox("unchanged", visible = False, elem_id = "togglebutton") # used for toggling windows
     gr.HTML("""
             eval_satisfied = gr.Slider(1, 5, step = 0.5, label = "User Satisfaction", value = 3)
             btn_g = gr.Button("Next")
+        def next_p(e0, e1, e2, e3, cur_step, mode, current_response):
+            step = cur_step + 1
             # Add user data to the current response
             current_response["e5_scores"].append([e0, e1, e2, e3])
             # Next item
+            if step >= len(current_question["top10_e5"]): # should always be 10 (DEBUG: >= to avoid out of bounds)
                 # Step 10: all sources
                 collapsible_string = "<h2> Set of Passages </h2>\n"
                 for i, passage in enumerate(current_question["top10_e5"]):
                     eval_0: gr.Radio(value = None),
                     eval_1: gr.Slider(value = 3),
                     eval_2: gr.Slider(value = 3),
+                    eval_3: gr.Slider(value = 0),
+                    step: step,
+                    mode: 1,
+                    current_response: current_response
                 }
             else:
                 return {
                     eval_0: gr.Radio(value = None),
                     eval_1: gr.Slider(value = 3),
                     eval_2: gr.Slider(value = 3),
+                    eval_3: gr.Slider(value = 0),
+                    step: step,
+                    mode: 1,
+                    current_response: current_response
                 }
+        def next_g(e_h, e_s, cur_step, mode, user_data, current_response):
+            step = cur_step + 1
             if step == 11:
                 # Step 11: guaranteed to be generation
                         <h2> Autogenerated Response </h2>
                         <p>""" + markdown(current_question["generation_e5"]) + "</p>"),
                     eval_helps: gr.Slider(value = 0),
+                    eval_satisfied: gr.Slider(value = 3),
+                    step: step,
+                    mode: mode,
+                    user_data: user_data,
+                    current_response: current_response
                 }
             # Steps 12 and 13 are gold passage + gold passage generation IF it is applicable
             if step > 11: # and not current_question["top10_contains_gold_passage"]
                 # When mode is 0 -> reset with mode = 1
+                """
                 if mode == 0:
                     # The user just evaluated a generation for mode 0
                     current_response[user_data["modes"][user_data["current"]][mode] + "_generation"] = [e_h, e_s]
                     return {
+                        selection: gr.HTML(\"""
                                 <h2> Retrieved Passage </h2>
+                                <p> \""" + current_question["top10_" + user_data["modes"][user_data["current"]][1]][0] + "</p>"), # hard coded: first passage (0) of mode 2 (1),
                         forward_btn: gr.Textbox("load new data"),
                         eval_helps: gr.Slider(value = 0),
                         eval_satisfied: gr.Slider(value = 3)
                     }
+                """
                 # When mode is 1 -> display GP and GP generation, then switch
                 if step == 12:
                     # The user just evaluated a generation for mode 1
                             <p> """ + current_question["gold_passage"] + "</p>"),
                         forward_btn: gr.Textbox(),
                         eval_helps: gr.Slider(value = 0),
+                        eval_satisfied: gr.Slider(value = 3),
+                        step: step,
+                        mode: mode,
+                        user_data: user_data,
+                        current_response: current_response
                     }
                 elif step == 13:
                    # The user just evaluated the gold passage
                             <p> """ + markdown(current_question["gold_passage_generation"]) + "</p>"),
                         forward_btn: gr.Textbox(),
                         eval_helps: gr.Slider(value = 0),
+                        eval_satisfied: gr.Slider(value = 3),
+                        step: step,
+                        mode: mode,
+                        user_data: user_data,
+                        current_response: current_response
                    }
                 else: # step = 14
                     # The user just evaluated the gold passage generation
                         selection: gr.Markdown("Advancing to the next question..."),
                         forward_btn: gr.Textbox("changed" + str(user_data["current"])), # current forces event to trigger always
                         eval_helps: gr.Slider(value = 0),
+                        eval_satisfied: gr.Slider(value = 3),
+                        step: step,
+                        mode: mode,
+                        user_data: user_data,
+                        current_response: current_response
                     }
+        btn_p.click(fn = next_p, inputs = [eval_0, eval_1, eval_2, eval_3, step, mode, current_response], outputs = [selection, scores_p, scores_g, eval_0, eval_1, eval_2, eval_3, step, mode, current_response])
+        btn_g.click(fn = next_g, inputs = [eval_helps, eval_satisfied, step, mode, user_data, current_response], outputs = [selection, forward_btn, eval_helps, eval_satisfied, step, mode, user_data, current_response])
     # Question and answering dynamics
     with gr.Row(equal_height = False, visible = False) as question:
             c.click(fn = answer_c, outputs = [question, evals])
             d.click(fn = answer_d, outputs = [question, evals])
+    def toggle(step, mode):
         step = 0
         if mode == 0: # temporarily disabled — will never be mode 0
             mode = 1 # update mode to 1, will restart with same Q, next set of Ps
                 scores_g: gr.Column(visible = False),
                 evals: gr.Row(visible = True),
                 question: gr.Row(visible = False),
+                step: step,
+                mode: mode
             }
         else:
             # reset mode to 0, will restart with new Q (set up new Q), first set of Ps
                         + """ + new_answers[3]),
                 selection: gr.HTML("""
                     <h2> Retrieved Passage </h2>
+                    <p> """ + current_question["top10_e5"][0] + "</p>"),
+                step: step,
+                mode: mode
             }                                   # note change from "top10_" + user_data["modes"][user_data["current"]][mode]][0]
+    forward_btn.change(fn = toggle, inputs = [step, mode], outputs = [scores_p, scores_g, evals, question, q_text, a, b, c, d, passage_display, selection, step, mode])
     with gr.Row() as login:
         with gr.Column():
             email.change(fn = sanitize_login, inputs = [email], outputs = [s])
             def submit_email(email):
                 user_id = email
+                user_data = load_user_data(user_id) # calls login, downloads data, initializes session
                 # After loading user data, update with current question
+                current_question = load_current_question()
                 new_answers = current_question["answers"].copy()
                 new_answers[current_question["correct_answer_index"]] = "**" + current_question["answers"][current_question["correct_answer_index"]] + "** ✅"
                 return {
                     a: gr.Button(current_question["answers"][0]),
                     b: gr.Button(current_question["answers"][1]),
                     c: gr.Button(current_question["answers"][2]),
+                    d: gr.Button(current_question["answers"][3]),
+                    user_id: user_id,
+                    user_data: user_data,
+                    current_question: current_question
                 }
+            s.click(fn = submit_email, inputs = [email], outputs = [question, login, selection, passage_display, q_text, a, b, c, d, user_id, user_data, current_question])
 # Starts on question, switches to evaluation after the user answers
 user_eval.launch()