Spaces:
Sleeping
Sleeping
elibrowne
commited on
Commit
·
8861533
1
Parent(s):
df99dda
Persistence and data collection?
Browse files
app.py
CHANGED
@@ -4,25 +4,9 @@ import os
|
|
4 |
# PERSISTENT DATA STORAGE: this code is used to make commits
|
5 |
|
6 |
import json
|
7 |
-
from
|
8 |
-
from pathlib import Path
|
9 |
-
from uuid import uuid4
|
10 |
-
from huggingface_hub import CommitScheduler, hf_hub_download, file_exists, HfApi
|
11 |
from random import shuffle
|
12 |
|
13 |
-
JSON_DATASET_DIR = Path("json_dataset")
|
14 |
-
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
15 |
-
|
16 |
-
JSON_DATASET_PATH = JSON_DATASET_DIR / f"train-{uuid4()}.json"
|
17 |
-
|
18 |
-
scheduler = CommitScheduler(
|
19 |
-
repo_id="ebrowne/test-data",
|
20 |
-
repo_type="dataset",
|
21 |
-
folder_path=JSON_DATASET_DIR,
|
22 |
-
path_in_repo="data",
|
23 |
-
token = os.getenv("HF_TOKEN")
|
24 |
-
)
|
25 |
-
|
26 |
# Global variables which interact with loading and unloading
|
27 |
user_data = {}
|
28 |
current_response = {}
|
@@ -81,11 +65,11 @@ def update_huggingface(id):
|
|
81 |
token = os.getenv("HF_TOKEN")
|
82 |
)
|
83 |
|
84 |
-
def reset_current_response():
|
85 |
global current_response
|
86 |
current_response = {
|
87 |
"user_id": user_id,
|
88 |
-
"question_id":
|
89 |
"user_answer": 0,
|
90 |
"e5_scores": [], # list of ten [score, score, score, score]
|
91 |
"e5_set": [], # two values
|
@@ -108,17 +92,11 @@ def load_current_question():
|
|
108 |
print("Done")
|
109 |
gr.Info("You've finished — thank you so much! There are no more questions. :)")
|
110 |
current_question = {"question": "You're done! Thanks so much for your help.", "answers": ["I want to log out now.", "I want to keep answering questions.","I want to keep answering questions.", "I want to keep answering questions."], "correct_answer_index": 0, "top10_e5": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_e5": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_colbert": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_colbert": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_contains_gold_passage": False, "gold_passage": "GOLD PASSAGE: LOG OFF!", "gold_passage_generation": "what do you gain"}
|
|
|
111 |
else:
|
112 |
qid = user_data["order"][q_index]
|
113 |
current_question = all_questions[qid]
|
114 |
-
|
115 |
-
# This method is being used to save each set of individual scores (in case the main files have issues, the data should be saved)
|
116 |
-
def commit_current_and_reset():
|
117 |
-
with scheduler.lock:
|
118 |
-
with JSON_DATASET_PATH.open("a") as f:
|
119 |
-
json.dump(current_response, f)
|
120 |
-
f.write("\n")
|
121 |
-
reset_current_response()
|
122 |
|
123 |
# THEMING: colors and styles (Gradio native)
|
124 |
|
@@ -189,9 +167,11 @@ with gr.Blocks(theme = theme) as user_eval:
|
|
189 |
def next_p(e0, e1, e2, e3):
|
190 |
global step
|
191 |
global mode
|
|
|
192 |
step += 1
|
193 |
-
|
194 |
-
|
|
|
195 |
if step == len(current_question["top10_" + user_data["modes"][user_data["current"]][mode]]): # should always be 10
|
196 |
# Step 10: all sources
|
197 |
collapsible_string = ""
|
@@ -224,10 +204,13 @@ with gr.Blocks(theme = theme) as user_eval:
|
|
224 |
global step
|
225 |
global mode
|
226 |
global user_data
|
|
|
227 |
step += 1
|
228 |
-
|
229 |
if step == 11:
|
230 |
# Step 11: guaranteed to be generation
|
|
|
|
|
231 |
return {
|
232 |
selection: gr.HTML("""
|
233 |
<h2> Autogenerated Response </h2>
|
@@ -236,9 +219,11 @@ with gr.Blocks(theme = theme) as user_eval:
|
|
236 |
eval_satisfied: gr.Slider(value = 1)
|
237 |
}
|
238 |
# Steps 12 and 13 are gold passage + gold passage generation IF it is applicable
|
239 |
-
if step > 11 and not current_question["top10_contains_gold_passage"]
|
240 |
# When mode is 0 -> reset with mode = 1
|
241 |
if mode == 0:
|
|
|
|
|
242 |
return {
|
243 |
selection: gr.HTML("""
|
244 |
<h2> Retrieved Passage </h2>
|
@@ -249,6 +234,8 @@ with gr.Blocks(theme = theme) as user_eval:
|
|
249 |
}
|
250 |
# When mode is 1 -> display GP and GP generation, then switch
|
251 |
if step == 12:
|
|
|
|
|
252 |
return {
|
253 |
selection: gr.HTML("""
|
254 |
<h2> Retrieved Passage </h2>
|
@@ -258,6 +245,8 @@ with gr.Blocks(theme = theme) as user_eval:
|
|
258 |
eval_satisfied: gr.Slider(value = 1)
|
259 |
}
|
260 |
elif step == 13:
|
|
|
|
|
261 |
return {
|
262 |
selection: gr.HTML("""
|
263 |
<h2> Autogenerated Response </h2>
|
@@ -266,9 +255,11 @@ with gr.Blocks(theme = theme) as user_eval:
|
|
266 |
eval_helps: gr.Slider(value = 1),
|
267 |
eval_satisfied: gr.Slider(value = 1)
|
268 |
}
|
269 |
-
else:
|
|
|
|
|
270 |
user_data["current"] += 1
|
271 |
-
#
|
272 |
update_huggingface(user_id) # persistence — update progress online, save answers
|
273 |
load_current_question()
|
274 |
return {
|
@@ -277,19 +268,24 @@ with gr.Blocks(theme = theme) as user_eval:
|
|
277 |
eval_helps: gr.Slider(value = 1),
|
278 |
eval_satisfied: gr.Slider(value = 1)
|
279 |
}
|
|
|
|
|
|
|
280 |
else:
|
281 |
# When mode is 0 -> reset with mode = 1
|
282 |
if mode == 0:
|
283 |
return {
|
284 |
-
selection: gr.HTML("""
|
285 |
<h2> Retrieved Passage </h2>
|
286 |
-
<p> """ + current_question["top10_" + user_data["modes"][user_data["current"]][1]][0] + "</p>"), # hard coded: first passage (0) of mode 2 (1)
|
287 |
forward_btn: gr.Textbox("load new data"),
|
288 |
eval_helps: gr.Slider(value = 1),
|
289 |
eval_satisfied: gr.Slider(value = 1)
|
290 |
}
|
291 |
# When mode is 1 -> change question
|
292 |
user_data["current"] += 1
|
|
|
|
|
293 |
update_huggingface(user_id)
|
294 |
load_current_question()
|
295 |
return {
|
@@ -298,6 +294,7 @@ with gr.Blocks(theme = theme) as user_eval:
|
|
298 |
eval_helps: gr.Slider(value = 1),
|
299 |
eval_satisfied: gr.Slider(value = 1)
|
300 |
}
|
|
|
301 |
btn_p.click(fn = next_p, inputs = [eval_0, eval_1, eval_2, eval_3], outputs = [selection, scores_p, scores_g, eval_0, eval_1, eval_2, eval_3])
|
302 |
btn_g.click(fn = next_g, inputs = [eval_helps, eval_satisfied], outputs = [selection, forward_btn, eval_helps, eval_satisfied])
|
303 |
|
|
|
4 |
# PERSISTENT DATA STORAGE: this code is used to make commits
|
5 |
|
6 |
import json
|
7 |
+
from huggingface_hub import hf_hub_download, file_exists, HfApi
|
|
|
|
|
|
|
8 |
from random import shuffle
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
# Global variables which interact with loading and unloading
|
11 |
user_data = {}
|
12 |
current_response = {}
|
|
|
65 |
token = os.getenv("HF_TOKEN")
|
66 |
)
|
67 |
|
68 |
+
def reset_current_response(qid):
|
69 |
global current_response
|
70 |
current_response = {
|
71 |
"user_id": user_id,
|
72 |
+
"question_id": qid,
|
73 |
"user_answer": 0,
|
74 |
"e5_scores": [], # list of ten [score, score, score, score]
|
75 |
"e5_set": [], # two values
|
|
|
92 |
print("Done")
|
93 |
gr.Info("You've finished — thank you so much! There are no more questions. :)")
|
94 |
current_question = {"question": "You're done! Thanks so much for your help.", "answers": ["I want to log out now.", "I want to keep answering questions.","I want to keep answering questions.", "I want to keep answering questions."], "correct_answer_index": 0, "top10_e5": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_e5": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_colbert": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_colbert": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_contains_gold_passage": False, "gold_passage": "GOLD PASSAGE: LOG OFF!", "gold_passage_generation": "what do you gain"}
|
95 |
+
reset_current_response("USER FINISHED")
|
96 |
else:
|
97 |
qid = user_data["order"][q_index]
|
98 |
current_question = all_questions[qid]
|
99 |
+
reset_current_response(user_data["order"][q_index])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
# THEMING: colors and styles (Gradio native)
|
102 |
|
|
|
167 |
def next_p(e0, e1, e2, e3):
|
168 |
global step
|
169 |
global mode
|
170 |
+
global current_response
|
171 |
step += 1
|
172 |
+
# Add user data to the current response
|
173 |
+
current_response[user_data["modes"][user_data["current"]][mode] + "_scores"].append([e0, e1, e2, e3])
|
174 |
+
# Next item
|
175 |
if step == len(current_question["top10_" + user_data["modes"][user_data["current"]][mode]]): # should always be 10
|
176 |
# Step 10: all sources
|
177 |
collapsible_string = ""
|
|
|
204 |
global step
|
205 |
global mode
|
206 |
global user_data
|
207 |
+
global current_response
|
208 |
step += 1
|
209 |
+
|
210 |
if step == 11:
|
211 |
# Step 11: guaranteed to be generation
|
212 |
+
# Add user data to the current response as SET evaluation, which comes before the generation
|
213 |
+
current_response[user_data["modes"][user_data["current"]][mode] + "_set"] = [e_h, e_s]
|
214 |
return {
|
215 |
selection: gr.HTML("""
|
216 |
<h2> Autogenerated Response </h2>
|
|
|
219 |
eval_satisfied: gr.Slider(value = 1)
|
220 |
}
|
221 |
# Steps 12 and 13 are gold passage + gold passage generation IF it is applicable
|
222 |
+
if step > 11: # and not current_question["top10_contains_gold_passage"]
|
223 |
# When mode is 0 -> reset with mode = 1
|
224 |
if mode == 0:
|
225 |
+
# The user just evaluated a generation for mode 0
|
226 |
+
current_response[user_data["modes"][user_data["current"]][mode] + "_generation"] = [e_h, e_s]
|
227 |
return {
|
228 |
selection: gr.HTML("""
|
229 |
<h2> Retrieved Passage </h2>
|
|
|
234 |
}
|
235 |
# When mode is 1 -> display GP and GP generation, then switch
|
236 |
if step == 12:
|
237 |
+
# The user just evaluated a generation for mode 1
|
238 |
+
current_response[user_data["modes"][user_data["current"]][mode] + "_generation"] = [e_h, e_s]
|
239 |
return {
|
240 |
selection: gr.HTML("""
|
241 |
<h2> Retrieved Passage </h2>
|
|
|
245 |
eval_satisfied: gr.Slider(value = 1)
|
246 |
}
|
247 |
elif step == 13:
|
248 |
+
# The user just evaluated the gold passage
|
249 |
+
current_response["gold_set"] = [e_h, e_s]
|
250 |
return {
|
251 |
selection: gr.HTML("""
|
252 |
<h2> Autogenerated Response </h2>
|
|
|
255 |
eval_helps: gr.Slider(value = 1),
|
256 |
eval_satisfied: gr.Slider(value = 1)
|
257 |
}
|
258 |
+
else: # step = 14
|
259 |
+
# The user just evaluated the gold passage generation
|
260 |
+
current_response["gold_generation"] = [e_h, e_s]
|
261 |
user_data["current"] += 1
|
262 |
+
user_data["responses"].append(current_response) # adds new answers to current list of responses
|
263 |
update_huggingface(user_id) # persistence — update progress online, save answers
|
264 |
load_current_question()
|
265 |
return {
|
|
|
268 |
eval_helps: gr.Slider(value = 1),
|
269 |
eval_satisfied: gr.Slider(value = 1)
|
270 |
}
|
271 |
+
|
272 |
+
# VERY UNCLEAN CODE: for practical purposes, this else block is unreachable: not current_question["top10_contains_gold_passage"] will always be True
|
273 |
+
"""
|
274 |
else:
|
275 |
# When mode is 0 -> reset with mode = 1
|
276 |
if mode == 0:
|
277 |
return {
|
278 |
+
selection: gr.HTML(\"""
|
279 |
<h2> Retrieved Passage </h2>
|
280 |
+
<p> \""" + current_question["top10_" + user_data["modes"][user_data["current"]][1]][0] + "</p>"), # hard coded: first passage (0) of mode 2 (1)
|
281 |
forward_btn: gr.Textbox("load new data"),
|
282 |
eval_helps: gr.Slider(value = 1),
|
283 |
eval_satisfied: gr.Slider(value = 1)
|
284 |
}
|
285 |
# When mode is 1 -> change question
|
286 |
user_data["current"] += 1
|
287 |
+
user_data["responses"].append(current_response) # adds new answers to current list of responses
|
288 |
+
# Update stored data with new current, additional data
|
289 |
update_huggingface(user_id)
|
290 |
load_current_question()
|
291 |
return {
|
|
|
294 |
eval_helps: gr.Slider(value = 1),
|
295 |
eval_satisfied: gr.Slider(value = 1)
|
296 |
}
|
297 |
+
"""
|
298 |
btn_p.click(fn = next_p, inputs = [eval_0, eval_1, eval_2, eval_3], outputs = [selection, scores_p, scores_g, eval_0, eval_1, eval_2, eval_3])
|
299 |
btn_g.click(fn = next_g, inputs = [eval_helps, eval_satisfied], outputs = [selection, forward_btn, eval_helps, eval_satisfied])
|
300 |
|