Spaces:

anonymousauthorsanonymous
/

uncertainty

Runtime error

App Files Files Community

Anon Anon commited on Nov 11, 2022

Commit

8eee1b1

1 Parent(s): 43d49fa

Create app.py

Browse files

Files changed (1) hide show

app.py +280 -0

app.py ADDED Viewed

	@@ -0,0 +1,280 @@

+# %%
+import gradio as gr
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import random
+from matplotlib.ticker import MaxNLocator
+from transformers import pipeline
+from winogender_sentences import get_sentences
+OWN_MODEL_NAME = 'add-a-model'
+PICK_YOUR_OWN_LABEL = 'pick-your-own'
+MODEL_NAME_DICT = {
+    "roberta-large": "RoBERTa-large",
+    "bert-large-uncased": "BERT-large",
+    "roberta-base": "RoBERTa-base",
+    "bert-base-uncased": "BERT-base",
+    OWN_MODEL_NAME: "Your model's"
+}
+MODEL_NAMES = list(MODEL_NAME_DICT.keys())
+DECIMAL_PLACES = 1
+EPS = 1e-5  # to avoid /0 errors
+NUM_PTS_TO_AVERAGE = 2
+# Example date conts
+DATE_SPLIT_KEY = "DATE"
+START_YEAR = 1901
+STOP_YEAR = 2016
+NUM_PTS = 30
+DATES = np.linspace(START_YEAR, STOP_YEAR, NUM_PTS).astype(int).tolist()
+DATES = [f'{d}' for d in DATES]
+GENDERED_LIST = [
+    ['he', 'she'],
+    ['him', 'her'],
+    ['his', 'hers'],
+    ["himself", "herself"],
+    ['male', 'female'],
+    # ['man', 'woman']  Explicitly added in winogender extended sentences
+    ['men', 'women'],
+    ["husband", "wife"],
+    ['father', 'mother'],
+    ['boyfriend', 'girlfriend'],
+    ['brother', 'sister'],
+    ["actor", "actress"],
+]
+# %%
+# Fire up the models
+models = {m : pipeline("fill-mask", model=m) for m in MODEL_NAMES if m != OWN_MODEL_NAME}
+# %%
+# Get the winogender sentences
+winogender_sentences = get_sentences()
+occs = sorted(list({sentence_id.split('_')[0]
+                    for sentence_id in winogender_sentences}))
+# %%
+def get_gendered_token_ids():
+    male_gendered_tokens = [list[0] for list in GENDERED_LIST]
+    female_gendered_tokens = [list[1] for list in GENDERED_LIST]
+    return male_gendered_tokens, female_gendered_tokens
+def get_winogender_texts(occ):
+    return [winogender_sentences[id] for id in winogender_sentences.keys() if id.split('_')[0] == occ]
+def display_input_texts(occ, alt_text):
+    if occ == PICK_YOUR_OWN_LABEL:
+        texts = alt_text.split('\n')
+    else:
+        texts = get_winogender_texts(occ)
+    display_texts = [
+        f"{i+1}) {text}" for (i, text) in enumerate(texts)]
+    return "\n".join(display_texts), texts
+def get_avg_prob_from_pipeline_outputs(pipeline_preds, gendered_tokens, num_preds):
+    pronoun_preds = [sum([
+        pronoun["score"] if pronoun["token_str"].strip(
+        ).lower() in gendered_tokens else 0.0
+        for pronoun in top_preds])
+        for top_preds in pipeline_preds
+    ]
+    return round(sum(pronoun_preds) / (EPS + num_preds) * 100, DECIMAL_PLACES)
+def is_top_pred_gendered(pipeline_preds, gendered_tokens):
+    return pipeline_preds[0][0]['token_str'].strip().lower() in gendered_tokens
+# %%
+def get_figure(df, model_name, occ):
+    xs = df[df.columns[0]]
+    ys = df[df.columns[1]]
+    fig, ax = plt.subplots()
+    ax.bar(xs, ys)
+    ax.axis('tight')
+    ax.set_xlabel("Sentence number")
+    ax.set_ylabel("Uncertainty metric")
+    ax.set_title(f"{MODEL_NAME_DICT[model_name]} gender pronoun uncertainty in '{occ}' sentences")
+    return fig
+# %%
+def predict_gender_pronouns(
+    model_name,
+    own_model_name,
+    texts,
+    occ,
+):
+    """Run inference on input_text for selected model type, returning uncertainty results.
+    """
+    # TODO: make these selectable by user
+    indie_vars = ', '.join(DATES)
+    num_ave = NUM_PTS_TO_AVERAGE
+    # For debugging
+    print('input_texts', texts)
+    if model_name is None or model_name == '':
+        model_name = MODEL_NAMES[0]
+        model = models[model_name]
+    elif model_name == OWN_MODEL_NAME:
+        model = pipeline("fill-mask", model=own_model_name)
+    else:
+        model = models[model_name]
+    mask_token = model.tokenizer.mask_token
+    indie_vars_list = indie_vars.split(',')
+    male_gendered_tokens, female_gendered_tokens = get_gendered_token_ids()
+    masked_texts = [text.replace('MASK', mask_token) for text in texts]
+    all_uncertainty_f = {}
+    not_top_gendered = set()
+    for i, text in enumerate(masked_texts):
+        female_pronoun_preds = []
+        male_pronoun_preds = []
+        top_pred_gendered = True  # Assume true unless told otherwise
+        print(f"{i+1}) {text}")
+        for indie_var in indie_vars_list[:num_ave] + indie_vars_list[-num_ave:]:
+            target_text = f"In {indie_var}: {text}"
+            pipeline_preds = model(target_text)
+            # Quick hack as realized return type based on how many MASKs in text.
+            if type(pipeline_preds[0]) is not list:
+                pipeline_preds = [pipeline_preds]
+            # If top-pred not gendered, record as such
+            if not is_top_pred_gendered(pipeline_preds, female_gendered_tokens + male_gendered_tokens):
+                top_pred_gendered = False
+            num_preds = 1  # By design
+            female_pronoun_preds.append(get_avg_prob_from_pipeline_outputs(
+                pipeline_preds,
+                female_gendered_tokens,
+                num_preds
+            ))
+            male_pronoun_preds.append(get_avg_prob_from_pipeline_outputs(
+                pipeline_preds,
+                male_gendered_tokens,
+                num_preds
+            ))
+        # Normalizing by all gendered predictions
+        total_gendered_probs = np.add(
+            female_pronoun_preds, male_pronoun_preds)
+        norm_female_pronoun_preds = np.around(
+            np.divide(female_pronoun_preds, total_gendered_probs+EPS)*100,
+            decimals=DECIMAL_PLACES
+        )
+        sent_idx = f"{i+1}" if top_pred_gendered else f"{i+1}*"
+        all_uncertainty_f[sent_idx] = round(abs((sum(norm_female_pronoun_preds[-num_ave:]) - sum(norm_female_pronoun_preds[:num_ave]))
+                                                / num_ave), DECIMAL_PLACES)
+    uncertain_df = pd.DataFrame.from_dict(
+        all_uncertainty_f, orient='index', columns=['Uncertainty metric'])
+    uncertain_df = uncertain_df.reset_index().rename(
+        columns={'index': 'Sentence number'})
+    return (
+        target_text,
+        uncertain_df,
+        get_figure(uncertain_df, model_name, occ),
+    )
+demo = gr.Blocks()
+with demo:
+    input_texts = gr.Variable([])
+    gr.Markdown("## Are you certain?")
+    gr.Markdown(
+        "#### LLMs are pretty good at reporting their uncertainty. We just need to ask the right way.")
+    gr.Markdown("Using our uncertainty metric informed by applying causal inference techniques in \
+        [Our ICLR paper under review](https://openreview.net/pdf?id=25VgHaPz0l4), \
+        we are able to identify likely spurious correlations and exploit them in \
+        the scenario of gender underspecified tasks. (Note that introspecting softmax probabilities alone is insufficient, as in the sentences \
+        below, LLMs may report a softmax prob of ~0.9 despite the task being underspecified.)")
+    gr.Markdown("We extend the [Winogender Schemas](https://github.com/rudinger/winogender-schemas) evaluation set to produce\
+        eight syntactically similar sentences. However semantically, \
+        only two of the sentences are gender-specified while the rest remain gender-underspecified")
+    gr.Markdown("If a model can reliably tell us when it is uncertain about its predictions, one can replace only those uncertain predictions with\
+        an appropriate heuristic.")
+    with gr.Row():
+        model_name = gr.Radio(
+            MODEL_NAMES,
+            type="value",
+            label="Pick a preloaded BERT-like model for uncertainty evaluation (note: BERT-base performance least consistent)...",
+        )
+        own_model_name = gr.Textbox(
+            label=f"...Or, if you selected an '{OWN_MODEL_NAME}' model, put any Hugging Face pipeline model name \
+            (that supports the `fill-mask` task (see list at https://huggingface.co/models?pipeline_tag=fill-mask).",
+        )
+    with gr.Row():
+        occ_box = gr.Radio(
+            occs+[PICK_YOUR_OWN_LABEL], label=f"Pick an Occupation type from the Winogender Schemas evaluation set, or select '{PICK_YOUR_OWN_LABEL}'\
+                 (it need not be about an occupation).")
+    with gr.Row():
+        alt_input_texts = gr.Textbox(
+            lines=2,
+            label=f"...If you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
+            to include a single MASK-ed out pronoun. \
+            If unsure on the required format, click an occupation above instead, to see some example input texts for this round.",
+        )
+    with gr.Row():
+        get_text_btn = gr.Button("Load input texts")
+    get_text_btn.click(
+        fn=display_input_texts,
+        inputs=[occ_box, alt_input_texts],
+        outputs=[gr.Textbox(
+            label='Numbered sentences for evaluation. Number below corresponds to number in x-axis of plot.'), input_texts],
+    )
+    with gr.Row():
+        uncertain_btn = gr.Button("Get uncertainty results!")
+    gr.Markdown(
+        "If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
+    with gr.Row():
+        female_fig = gr.Plot(type="auto")
+    with gr.Row():
+        female_df = gr.Dataframe()
+    with gr.Row():
+        display_text = gr.Textbox(
+            type="auto", label="Sample of text fed to model")
+    uncertain_btn.click(
+        fn=predict_gender_pronouns,
+        inputs=[model_name, own_model_name, input_texts, occ_box],
+        # inputs=date_example,
+        outputs=[display_text, female_df, female_fig]
+    )
+demo.launch(debug=True)
+# %%