Spaces:

anonymousauthorsanonymous
/

uncertainty

Runtime error

App Files Files Community

anonymousauthorsanonymous commited on Feb 21, 2023

Commit

d0408b3

1 Parent(s): 67efd29

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -5

app.py CHANGED Viewed

@@ -109,9 +109,9 @@ def get_figure(df, model_name, occ):
     ax.bar(xs, ys)
     ax.axis('tight')
     ax.set_xlabel("Sentence number")
-    ax.set_ylabel("Underspecification Metric")
     ax.set_title(
-        f"Task Underspecification Metric on {MODEL_NAME_DICT[model_name]}  for '{occ}' sentences")
     return fig
@@ -122,7 +122,7 @@ def predict_gender_pronouns(
     texts,
     occ,
 ):
-    """Run inference on input_text for selected model type, returning Task Underspecification metric results.
     """
     # TODO: make these selectable by user
@@ -209,6 +209,7 @@ def predict_gender_pronouns(
 demo = gr.Blocks()
 with demo:
     input_texts = gr.Variable([])
     # gr.Markdown("LLMs are pretty good at reporting task underspecification. We just need to ask the right way.")
     # gr.Markdown("Using our Underspecification Metric informed by applying causal inference techniques, \
     #     we are able to identify likely spurious correlations and exploit them in \
@@ -219,10 +220,16 @@ with demo:
     #     only two of the sentences are well-specified while the rest remain underspecified.")
     # gr.Markdown("If a model can reliably report the underspecification of an inference-time task, an AI systems can replace only those task predictions with\
     #     an appropriate heuristic or information retrieval process.")
-    gr.Markdown("Follow the numbered steps below to test out one of the pre-loaded options. Once you get the hang of it, you can load a new model and/or provide your own input texts.")
     with gr.Row():
         model_name = gr.Radio(
             MODEL_NAMES,

     ax.bar(xs, ys)
     ax.axis('tight')
     ax.set_xlabel("Sentence number")
+    ax.set_ylabel("Specification Metric")
     ax.set_title(
+        f"Task Specification Metric on {MODEL_NAME_DICT[model_name]}  for '{occ}' sentences")
     return fig
     texts,
     occ,
 ):
+    """Run inference on input_text for selected model type, returning Task Specification metric results.
     """
     # TODO: make these selectable by user
 demo = gr.Blocks()
 with demo:
     input_texts = gr.Variable([])
+    gr.Markdown("**Detect Task Specification at Inference-time.**")
     # gr.Markdown("LLMs are pretty good at reporting task underspecification. We just need to ask the right way.")
     # gr.Markdown("Using our Underspecification Metric informed by applying causal inference techniques, \
     #     we are able to identify likely spurious correlations and exploit them in \
     #     only two of the sentences are well-specified while the rest remain underspecified.")
     # gr.Markdown("If a model can reliably report the underspecification of an inference-time task, an AI systems can replace only those task predictions with\
     #     an appropriate heuristic or information retrieval process.")
+    gr.Markdown("*Follow the numbered steps below to test one of the pre-loaded options.* Once you get the hang of it, you can load a new model and/or provide your own input texts.")
+    gr.Markdown("1) Pick a preloaded BERT-like model (note: RoBERTa-large performance is best).")
+    gr.Markdown(f"2) Pick an Occupation type from the Winogender Schemas evaluation set, or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).")
+    gr.Markdown("3) Click button to load input texts. Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.")
+    gr.Markdown("4) Click button to get Task Specification Metric results!")
     with gr.Row():
         model_name = gr.Radio(
             MODEL_NAMES,