Spaces:
Runtime error
Runtime error
Anon Anon
commited on
Commit
·
570c959
1
Parent(s):
20ff6da
formatting and minor text changes
Browse files
app.py
CHANGED
@@ -51,7 +51,8 @@ GENDERED_LIST = [
|
|
51 |
|
52 |
# %%
|
53 |
# Fire up the models
|
54 |
-
models = {m
|
|
|
55 |
|
56 |
# %%
|
57 |
# Get the winogender sentences
|
@@ -60,6 +61,8 @@ occs = sorted(list({sentence_id.split('_')[0]
|
|
60 |
for sentence_id in winogender_sentences}))
|
61 |
|
62 |
# %%
|
|
|
|
|
63 |
def get_gendered_token_ids():
|
64 |
male_gendered_tokens = [list[0] for list in GENDERED_LIST]
|
65 |
female_gendered_tokens = [list[1] for list in GENDERED_LIST]
|
@@ -107,7 +110,8 @@ def get_figure(df, model_name, occ):
|
|
107 |
ax.axis('tight')
|
108 |
ax.set_xlabel("Sentence number")
|
109 |
ax.set_ylabel("Uncertainty metric")
|
110 |
-
ax.set_title(
|
|
|
111 |
return fig
|
112 |
|
113 |
|
@@ -127,8 +131,8 @@ def predict_gender_pronouns(
|
|
127 |
|
128 |
# For debugging
|
129 |
print('input_texts', texts)
|
130 |
-
|
131 |
-
if model_name is None or model_name == '':
|
132 |
model_name = MODEL_NAMES[0]
|
133 |
model = models[model_name]
|
134 |
elif model_name == OWN_MODEL_NAME:
|
@@ -213,10 +217,9 @@ with demo:
|
|
213 |
we are able to identify likely spurious correlations and exploit them in \
|
214 |
the scenario of gender underspecified tasks. (Note that introspecting softmax probabilities alone is insufficient, as in the sentences \
|
215 |
below, LLMs may report a softmax prob of ~0.9 despite the task being underspecified.)")
|
216 |
-
|
217 |
gr.Markdown("We extend the [Winogender Schemas](https://github.com/rudinger/winogender-schemas) evaluation set to produce\
|
218 |
eight syntactically similar sentences. However semantically, \
|
219 |
-
only two of the sentences are
|
220 |
gr.Markdown("If a model can reliably tell us when it is uncertain about its predictions, one can replace only those uncertain predictions with\
|
221 |
an appropriate heuristic.")
|
222 |
|
@@ -241,11 +244,11 @@ with demo:
|
|
241 |
lines=2,
|
242 |
label=f"...If you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
|
243 |
to include a single MASK-ed out pronoun. \
|
244 |
-
If unsure on the required format, click an occupation above instead, to see some example input texts for this round."
|
245 |
)
|
246 |
|
247 |
with gr.Row():
|
248 |
-
get_text_btn = gr.Button("Load input texts")
|
249 |
|
250 |
get_text_btn.click(
|
251 |
fn=display_input_texts,
|
@@ -256,7 +259,7 @@ with demo:
|
|
256 |
)
|
257 |
|
258 |
with gr.Row():
|
259 |
-
uncertain_btn = gr.Button("Get uncertainty results!")
|
260 |
gr.Markdown(
|
261 |
"If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
|
262 |
|
@@ -277,4 +280,4 @@ with demo:
|
|
277 |
|
278 |
demo.launch(debug=True)
|
279 |
|
280 |
-
# %%
|
|
|
51 |
|
52 |
# %%
|
53 |
# Fire up the models
|
54 |
+
models = {m: pipeline("fill-mask", model=m)
|
55 |
+
for m in MODEL_NAMES if m != OWN_MODEL_NAME}
|
56 |
|
57 |
# %%
|
58 |
# Get the winogender sentences
|
|
|
61 |
for sentence_id in winogender_sentences}))
|
62 |
|
63 |
# %%
|
64 |
+
|
65 |
+
|
66 |
def get_gendered_token_ids():
|
67 |
male_gendered_tokens = [list[0] for list in GENDERED_LIST]
|
68 |
female_gendered_tokens = [list[1] for list in GENDERED_LIST]
|
|
|
110 |
ax.axis('tight')
|
111 |
ax.set_xlabel("Sentence number")
|
112 |
ax.set_ylabel("Uncertainty metric")
|
113 |
+
ax.set_title(
|
114 |
+
f"{MODEL_NAME_DICT[model_name]} gender pronoun uncertainty in '{occ}' sentences")
|
115 |
return fig
|
116 |
|
117 |
|
|
|
131 |
|
132 |
# For debugging
|
133 |
print('input_texts', texts)
|
134 |
+
|
135 |
+
if model_name is None or model_name == '':
|
136 |
model_name = MODEL_NAMES[0]
|
137 |
model = models[model_name]
|
138 |
elif model_name == OWN_MODEL_NAME:
|
|
|
217 |
we are able to identify likely spurious correlations and exploit them in \
|
218 |
the scenario of gender underspecified tasks. (Note that introspecting softmax probabilities alone is insufficient, as in the sentences \
|
219 |
below, LLMs may report a softmax prob of ~0.9 despite the task being underspecified.)")
|
|
|
220 |
gr.Markdown("We extend the [Winogender Schemas](https://github.com/rudinger/winogender-schemas) evaluation set to produce\
|
221 |
eight syntactically similar sentences. However semantically, \
|
222 |
+
only two of the sentences are well-specified while the rest remain underspecified.")
|
223 |
gr.Markdown("If a model can reliably tell us when it is uncertain about its predictions, one can replace only those uncertain predictions with\
|
224 |
an appropriate heuristic.")
|
225 |
|
|
|
244 |
lines=2,
|
245 |
label=f"...If you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
|
246 |
to include a single MASK-ed out pronoun. \
|
247 |
+
If unsure on the required format, click an occupation above instead, to see some example input texts for this round."
|
248 |
)
|
249 |
|
250 |
with gr.Row():
|
251 |
+
get_text_btn = gr.Button("1) Load input texts")
|
252 |
|
253 |
get_text_btn.click(
|
254 |
fn=display_input_texts,
|
|
|
259 |
)
|
260 |
|
261 |
with gr.Row():
|
262 |
+
uncertain_btn = gr.Button("2) Get uncertainty results!")
|
263 |
gr.Markdown(
|
264 |
"If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
|
265 |
|
|
|
280 |
|
281 |
demo.launch(debug=True)
|
282 |
|
283 |
+
# %%
|