anonymousauthorsanonymous commited on
Commit
2217075
·
1 Parent(s): cfe2e2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -19
app.py CHANGED
@@ -210,24 +210,15 @@ demo = gr.Blocks()
210
  with demo:
211
  input_texts = gr.Variable([])
212
  gr.Markdown("**Detect Task Specification at Inference-time.**")
213
- # gr.Markdown("LLMs are pretty good at reporting task underspecification. We just need to ask the right way.")
214
- # gr.Markdown("Using our Underspecification Metric informed by applying causal inference techniques, \
215
- # we are able to identify likely spurious correlations and exploit them in \
216
- # the scenario of gender underspecified tasks. (Note that introspecting softmax probabilities alone is insufficient, as in the sentences \
217
- # below, LLMs may report a softmax prob of ~0.9 despite the task being underspecified.)")
218
- # gr.Markdown("We extend the [Winogender Schemas](https://github.com/rudinger/winogender-schemas) evaluation set to produce\
219
- # eight syntactically similar sentences. However semantically, \
220
- # only two of the sentences are well-specified while the rest remain underspecified.")
221
- # gr.Markdown("If a model can reliably report the underspecification of an inference-time task, an AI systems can replace only those task predictions with\
222
- # an appropriate heuristic or information retrieval process.")
223
- gr.Markdown("*Follow the numbered steps below to test one of the pre-loaded options.* Once you get the hang of it, you can load a new model and/or provide your own input texts.")
224
- gr.Markdown(f"""1) Pick a preloaded BERT-like model.
225
- Note: RoBERTa-large performance is best.
226
- 2) Pick an Occupation type from the Winogender Schemas evaluation set.
227
- Or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).
228
- 3) Click button to load input texts.
229
- Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.
230
- 4) Click button to get Task Specification Metric results!
231
  """)
232
 
233
 
@@ -258,7 +249,7 @@ with demo:
258
  )
259
 
260
  with gr.Row():
261
- get_text_btn = gr.Button("3) Click to load input texts.\n(Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.)")
262
 
263
  get_text_btn.click(
264
  fn=display_input_texts,
 
210
  with demo:
211
  input_texts = gr.Variable([])
212
  gr.Markdown("**Detect Task Specification at Inference-time.**")
213
+
214
+ gr.Markdown("**Follow the numbered steps below to test one of the pre-loaded options.** Once you get the hang of it, you can load a new model and/or provide your own input texts.")
215
+ gr.Markdown(f"""1) Pick a preloaded BERT-like model.
216
+ *Note: RoBERTa-large performance is best.*
217
+ 2) Pick an Occupation type from the Winogender Schemas evaluation set.
218
+ *Or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).*
219
+ 3) Click button to load input texts.
220
+ *Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.*
221
+ 4) Click button to get Task Specification Metric results!
 
 
 
 
 
 
 
 
 
222
  """)
223
 
224
 
 
249
  )
250
 
251
  with gr.Row():
252
+ get_text_btn = gr.Button("3) Click to load input texts.)")
253
 
254
  get_text_btn.click(
255
  fn=display_input_texts,