Spaces:
Runtime error
Runtime error
File size: 3,213 Bytes
f4a1b77 38c1d39 3b1a0aa f4a1b77 3b1a0aa f4a1b77 3b1a0aa 27717dd 2a965c2 f4a1b77 3b1a0aa 2a965c2 3b1a0aa 2a965c2 3b1a0aa e9d81ac 3b1a0aa e9d81ac 3b1a0aa 1b9dc66 e9d81ac 3b1a0aa 1b9dc66 e9d81ac 3b1a0aa f4a1b77 55d104b f4a1b77 3b1a0aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import torch
import datasets
import gradio
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
class CrowSPairsDataset(object):
def __init__(self):
super().__init__()
self.df = (datasets
.load_dataset("BigScienceBiasEval/crows_pairs_multilingual")["test"]
.to_pandas()
.query('stereo_antistereo == "stereo"')
.drop(columns="stereo_antistereo")
)
def sample(self, bias_type, n=10):
return self.df[self.df["bias_type"] == bias_type].sample(n=n)
def bias_types(self):
return self.df.bias_type.unique().tolist()
def run(bias_type):
sample = dataset.sample(bias_type)
result = "<table><tr style='color: white; background-color: #555'><th>index</th><th>more stereotypical</th><th>less stereotypical<th></tr>"
for i, row in sample.iterrows():
result += f"<tr><td>{i}</td>"
more = row["sent_more"]
more = tokenizer(more, return_tensors="pt")["input_ids"].to(device)
with torch.no_grad():
out_more = model(more, labels=more.clone())
score_more = out_more["loss"]
perplexity_more = torch.exp(score_more).item()
less = row["sent_less"]
less = tokenizer(less, return_tensors="pt")["input_ids"].to(device)
with torch.no_grad():
out_less = model(less, labels=less.clone())
score_less = out_less["loss"]
perplexity_less = torch.exp(score_less).item()
if perplexity_more > perplexity_less:
shade = round(
abs((perplexity_more - perplexity_less) / perplexity_more), 2
)
shade = (shade + 0.2) / 1.2
result += f"<td style='padding: 0 1em;)'>{row['sent_more']}</td><td style='padding: 0 1em; background-color: rgba(255,0,255,{shade})'>{row['sent_less']}</td></tr>"
else:
shade = abs((perplexity_less - perplexity_more) / perplexity_less)
shade = (shade + 0.2) / 1.2
result += f"<td style='padding: 0 1em; background-color: rgba(0,255,255,{shade})'>{row['sent_more']}</td><td style='padding: 0 1em;'>{row['sent_less']}</td></tr>"
result += "</table>"
return result
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
model_id = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
dataset = CrowSPairsDataset()
bias_type_sel = gradio.Dropdown(label="Bias Type", choices=dataset.bias_types())
with open("description.md") as fh:
desc = fh.read()
with open("notice.md") as fh:
notice = fh.read()
with gradio.Blocks() as iface:
gradio.Markdown(desc)
with gradio.Row(equal_height=True):
with gradio.Column(scale=4):
inp = gradio.Dropdown(label="Bias Type", choices=dataset.bias_types())
with gradio.Column(scale=1):
but = gradio.Button("Sample")
out = gradio.HTML()
but.click(run, inp, out)
with gradio.Accordion("A note about explainability models"):
gradio.Markdown(notice)
iface.launch()
|