anonymousauthorsanonymous commited on
Commit
d0408b3
·
1 Parent(s): 67efd29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -109,9 +109,9 @@ def get_figure(df, model_name, occ):
109
  ax.bar(xs, ys)
110
  ax.axis('tight')
111
  ax.set_xlabel("Sentence number")
112
- ax.set_ylabel("Underspecification Metric")
113
  ax.set_title(
114
- f"Task Underspecification Metric on {MODEL_NAME_DICT[model_name]} for '{occ}' sentences")
115
  return fig
116
 
117
 
@@ -122,7 +122,7 @@ def predict_gender_pronouns(
122
  texts,
123
  occ,
124
  ):
125
- """Run inference on input_text for selected model type, returning Task Underspecification metric results.
126
  """
127
 
128
  # TODO: make these selectable by user
@@ -209,6 +209,7 @@ def predict_gender_pronouns(
209
  demo = gr.Blocks()
210
  with demo:
211
  input_texts = gr.Variable([])
 
212
  # gr.Markdown("LLMs are pretty good at reporting task underspecification. We just need to ask the right way.")
213
  # gr.Markdown("Using our Underspecification Metric informed by applying causal inference techniques, \
214
  # we are able to identify likely spurious correlations and exploit them in \
@@ -219,10 +220,16 @@ with demo:
219
  # only two of the sentences are well-specified while the rest remain underspecified.")
220
  # gr.Markdown("If a model can reliably report the underspecification of an inference-time task, an AI systems can replace only those task predictions with\
221
  # an appropriate heuristic or information retrieval process.")
222
- gr.Markdown("Follow the numbered steps below to test out one of the pre-loaded options. Once you get the hang of it, you can load a new model and/or provide your own input texts.")
223
-
 
 
 
 
224
 
225
 
 
 
226
  with gr.Row():
227
  model_name = gr.Radio(
228
  MODEL_NAMES,
 
109
  ax.bar(xs, ys)
110
  ax.axis('tight')
111
  ax.set_xlabel("Sentence number")
112
+ ax.set_ylabel("Specification Metric")
113
  ax.set_title(
114
+ f"Task Specification Metric on {MODEL_NAME_DICT[model_name]} for '{occ}' sentences")
115
  return fig
116
 
117
 
 
122
  texts,
123
  occ,
124
  ):
125
+ """Run inference on input_text for selected model type, returning Task Specification metric results.
126
  """
127
 
128
  # TODO: make these selectable by user
 
209
  demo = gr.Blocks()
210
  with demo:
211
  input_texts = gr.Variable([])
212
+ gr.Markdown("**Detect Task Specification at Inference-time.**")
213
  # gr.Markdown("LLMs are pretty good at reporting task underspecification. We just need to ask the right way.")
214
  # gr.Markdown("Using our Underspecification Metric informed by applying causal inference techniques, \
215
  # we are able to identify likely spurious correlations and exploit them in \
 
220
  # only two of the sentences are well-specified while the rest remain underspecified.")
221
  # gr.Markdown("If a model can reliably report the underspecification of an inference-time task, an AI systems can replace only those task predictions with\
222
  # an appropriate heuristic or information retrieval process.")
223
+ gr.Markdown("*Follow the numbered steps below to test one of the pre-loaded options.* Once you get the hang of it, you can load a new model and/or provide your own input texts.")
224
+
225
+ gr.Markdown("1) Pick a preloaded BERT-like model (note: RoBERTa-large performance is best).")
226
+ gr.Markdown(f"2) Pick an Occupation type from the Winogender Schemas evaluation set, or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).")
227
+ gr.Markdown("3) Click button to load input texts. Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.")
228
+ gr.Markdown("4) Click button to get Task Specification Metric results!")
229
 
230
 
231
+
232
+
233
  with gr.Row():
234
  model_name = gr.Radio(
235
  MODEL_NAMES,