Anon Anon commited on
Commit
570c959
·
1 Parent(s): 20ff6da

formatting and minor text changes

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -51,7 +51,8 @@ GENDERED_LIST = [
51
 
52
  # %%
53
  # Fire up the models
54
- models = {m : pipeline("fill-mask", model=m) for m in MODEL_NAMES if m != OWN_MODEL_NAME}
 
55
 
56
  # %%
57
  # Get the winogender sentences
@@ -60,6 +61,8 @@ occs = sorted(list({sentence_id.split('_')[0]
60
  for sentence_id in winogender_sentences}))
61
 
62
  # %%
 
 
63
  def get_gendered_token_ids():
64
  male_gendered_tokens = [list[0] for list in GENDERED_LIST]
65
  female_gendered_tokens = [list[1] for list in GENDERED_LIST]
@@ -107,7 +110,8 @@ def get_figure(df, model_name, occ):
107
  ax.axis('tight')
108
  ax.set_xlabel("Sentence number")
109
  ax.set_ylabel("Uncertainty metric")
110
- ax.set_title(f"{MODEL_NAME_DICT[model_name]} gender pronoun uncertainty in '{occ}' sentences")
 
111
  return fig
112
 
113
 
@@ -127,8 +131,8 @@ def predict_gender_pronouns(
127
 
128
  # For debugging
129
  print('input_texts', texts)
130
-
131
- if model_name is None or model_name == '':
132
  model_name = MODEL_NAMES[0]
133
  model = models[model_name]
134
  elif model_name == OWN_MODEL_NAME:
@@ -213,10 +217,9 @@ with demo:
213
  we are able to identify likely spurious correlations and exploit them in \
214
  the scenario of gender underspecified tasks. (Note that introspecting softmax probabilities alone is insufficient, as in the sentences \
215
  below, LLMs may report a softmax prob of ~0.9 despite the task being underspecified.)")
216
-
217
  gr.Markdown("We extend the [Winogender Schemas](https://github.com/rudinger/winogender-schemas) evaluation set to produce\
218
  eight syntactically similar sentences. However semantically, \
219
- only two of the sentences are gender-specified while the rest remain gender-underspecified")
220
  gr.Markdown("If a model can reliably tell us when it is uncertain about its predictions, one can replace only those uncertain predictions with\
221
  an appropriate heuristic.")
222
 
@@ -241,11 +244,11 @@ with demo:
241
  lines=2,
242
  label=f"...If you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
243
  to include a single MASK-ed out pronoun. \
244
- If unsure on the required format, click an occupation above instead, to see some example input texts for this round.",
245
  )
246
 
247
  with gr.Row():
248
- get_text_btn = gr.Button("Load input texts")
249
 
250
  get_text_btn.click(
251
  fn=display_input_texts,
@@ -256,7 +259,7 @@ with demo:
256
  )
257
 
258
  with gr.Row():
259
- uncertain_btn = gr.Button("Get uncertainty results!")
260
  gr.Markdown(
261
  "If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
262
 
@@ -277,4 +280,4 @@ with demo:
277
 
278
  demo.launch(debug=True)
279
 
280
- # %%
 
51
 
52
  # %%
53
  # Fire up the models
54
+ models = {m: pipeline("fill-mask", model=m)
55
+ for m in MODEL_NAMES if m != OWN_MODEL_NAME}
56
 
57
  # %%
58
  # Get the winogender sentences
 
61
  for sentence_id in winogender_sentences}))
62
 
63
  # %%
64
+
65
+
66
  def get_gendered_token_ids():
67
  male_gendered_tokens = [list[0] for list in GENDERED_LIST]
68
  female_gendered_tokens = [list[1] for list in GENDERED_LIST]
 
110
  ax.axis('tight')
111
  ax.set_xlabel("Sentence number")
112
  ax.set_ylabel("Uncertainty metric")
113
+ ax.set_title(
114
+ f"{MODEL_NAME_DICT[model_name]} gender pronoun uncertainty in '{occ}' sentences")
115
  return fig
116
 
117
 
 
131
 
132
  # For debugging
133
  print('input_texts', texts)
134
+
135
+ if model_name is None or model_name == '':
136
  model_name = MODEL_NAMES[0]
137
  model = models[model_name]
138
  elif model_name == OWN_MODEL_NAME:
 
217
  we are able to identify likely spurious correlations and exploit them in \
218
  the scenario of gender underspecified tasks. (Note that introspecting softmax probabilities alone is insufficient, as in the sentences \
219
  below, LLMs may report a softmax prob of ~0.9 despite the task being underspecified.)")
 
220
  gr.Markdown("We extend the [Winogender Schemas](https://github.com/rudinger/winogender-schemas) evaluation set to produce\
221
  eight syntactically similar sentences. However semantically, \
222
+ only two of the sentences are well-specified while the rest remain underspecified.")
223
  gr.Markdown("If a model can reliably tell us when it is uncertain about its predictions, one can replace only those uncertain predictions with\
224
  an appropriate heuristic.")
225
 
 
244
  lines=2,
245
  label=f"...If you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
246
  to include a single MASK-ed out pronoun. \
247
+ If unsure on the required format, click an occupation above instead, to see some example input texts for this round."
248
  )
249
 
250
  with gr.Row():
251
+ get_text_btn = gr.Button("1) Load input texts")
252
 
253
  get_text_btn.click(
254
  fn=display_input_texts,
 
259
  )
260
 
261
  with gr.Row():
262
+ uncertain_btn = gr.Button("2) Get uncertainty results!")
263
  gr.Markdown(
264
  "If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
265
 
 
280
 
281
  demo.launch(debug=True)
282
 
283
+ # %%