Spaces:
Sleeping
Sleeping
Martijn van Beers
commited on
Commit
•
40d472f
1
Parent(s):
50fec15
Compare original GPT-2 with a debiased model
Browse files
app.py
CHANGED
@@ -25,33 +25,53 @@ class CrowSPairsDataset(object):
|
|
25 |
|
26 |
|
27 |
def run(df):
|
28 |
-
result = "<table><tr style='color: white; background-color: #555'><th>index</th><th>more stereotypical</th><th>less stereotypical<th></tr>"
|
29 |
for i, row in df.iterrows():
|
30 |
-
result += f"<tr><td>{i}</td>"
|
31 |
more = row["sent_more"]
|
32 |
|
33 |
more = tokenizer(more, return_tensors="pt")["input_ids"].to(device)
|
34 |
with torch.no_grad():
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
38 |
|
39 |
less = row["sent_less"]
|
40 |
less = tokenizer(less, return_tensors="pt")["input_ids"].to(device)
|
41 |
with torch.no_grad():
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
result += "</table>"
|
56 |
return result
|
57 |
|
@@ -73,7 +93,8 @@ else:
|
|
73 |
device = torch.device("cpu")
|
74 |
|
75 |
model_id = "gpt2"
|
76 |
-
|
|
|
77 |
tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
|
78 |
dataset = CrowSPairsDataset()
|
79 |
|
|
|
25 |
|
26 |
|
27 |
def run(df):
|
28 |
+
result = "<table><tr style='color: white; background-color: #555'><th>index</th><th>more stereotypical</th><th>gpt-2</th><th>debiased</th><th>less stereotypical<th></tr>"
|
29 |
for i, row in df.iterrows():
|
30 |
+
result += f"<tr><td>{i}</td><td style='padding: 0 1em; background-image: linear-gradient(90deg, rgba(0,255,255,0.2) 0%, rgba(255,255,255,1) 100%)'>{row['sent_more']}</td>"
|
31 |
more = row["sent_more"]
|
32 |
|
33 |
more = tokenizer(more, return_tensors="pt")["input_ids"].to(device)
|
34 |
with torch.no_grad():
|
35 |
+
out_more_gpt = model_gpt(more, labels=more.clone())
|
36 |
+
out_more_custom = model_custom(more, labels=more.clone())
|
37 |
+
score_more_gpt = out_more_gpt["loss"]
|
38 |
+
score_more_custom = out_more_custom["loss"]
|
39 |
+
perplexity_more_gpt = torch.exp(score_more_gpt).item()
|
40 |
+
perplexity_more_custom = torch.exp(score_more_custom).item()
|
41 |
|
42 |
less = row["sent_less"]
|
43 |
less = tokenizer(less, return_tensors="pt")["input_ids"].to(device)
|
44 |
with torch.no_grad():
|
45 |
+
out_less_gpt = model_gpt(less, labels=less.clone())
|
46 |
+
out_less_custom = model_custom(less, labels=less.clone())
|
47 |
+
score_less_gpt = out_less_gpt["loss"]
|
48 |
+
score_less_custom = out_less_custom["loss"]
|
49 |
+
perplexity_less_gpt = torch.exp(score_less_gpt).item()
|
50 |
+
perplexity_less_custom = torch.exp(score_less_custom).item()
|
51 |
+
|
52 |
+
if perplexity_more_gpt > perplexity_less_gpt:
|
53 |
+
diff = round(
|
54 |
+
abs((perplexity_more_gpt - perplexity_less_gpt) / perplexity_more_gpt), 2
|
55 |
+
)
|
56 |
+
shade = (diff + 0.2) / 1.2
|
57 |
+
result += f"<td style='background-color: rgba(0,255,255,{shade})'>{diff:.2f}</td>"
|
58 |
+
else:
|
59 |
+
diff = abs((perplexity_less_gpt - perplexity_more_gpt) / perplexity_less_gpt)
|
60 |
+
shade = (diff + 0.2) / 1.2
|
61 |
+
result += f"<td style='background-color: rgba(255,0,255,{shade})'>{diff:.2f}</td>"
|
62 |
+
|
63 |
+
if perplexity_more_custom > perplexity_less_custom:
|
64 |
+
diff = round(
|
65 |
+
abs((perplexity_more_custom - perplexity_less_custom) / perplexity_more_custom), 2
|
66 |
+
)
|
67 |
+
shade = (diff + 0.2) / 1.2
|
68 |
+
result += f"<td style='background-color: rgba(0,255,255,{shade})'>{diff:.2f}</td>"
|
69 |
+
else:
|
70 |
+
diff = abs((perplexity_less_custom - perplexity_more_custom) / perplexity_less_custom)
|
71 |
+
shade = (diff + 0.2) / 1.2
|
72 |
+
result += f"<td style='background-color: rgba(255,0,255,{shade})'>{diff:.2f}</td>"
|
73 |
+
|
74 |
+
result += f"<td style='padding: 0 1em; background-image: linear-gradient(90deg, rgba(255,255,255,1) 0%, rgba(255,0,255,0.2) 100%)'>{row['sent_less']}</td></tr>"
|
75 |
result += "</table>"
|
76 |
return result
|
77 |
|
|
|
93 |
device = torch.device("cpu")
|
94 |
|
95 |
model_id = "gpt2"
|
96 |
+
model_gpt = GPT2LMHeadModel.from_pretrained(model_id).to(device)
|
97 |
+
model_custom = torch.load("./gpt2_attn_heads_dm_top10_seed_1.pt")
|
98 |
tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
|
99 |
dataset = CrowSPairsDataset()
|
100 |
|