File size: 2,954 Bytes
589bce0
5e94579
2f7cc88
d39e4f6
b64f334
d39e4f6
5e94579
 
8a2bdbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d1a91b
 
d39e4f6
ad47b8d
 
d39e4f6
 
ad47b8d
 
 
d39e4f6
 
 
 
 
a1a1ad8
d39e4f6
 
 
 
5e94579
 
 
a1a1ad8
 
 
d39e4f6
5e94579
a1a1ad8
 
 
5e94579
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import urllib.request
import gradio as gr
import numpy as np
from utils import similarity
import scipy



def load_glove(path):
    with open(path) as f:
        lines = f.readlines()
    
    wv = []
    vocab = []
    for line in lines:

        tokens = line.strip().split(" ")
        assert len(tokens) == 301
        vocab.append(tokens[0])
        wv.append([float(elem) for elem in tokens[1:]])
    w2i = {w: i for i, w in enumerate(vocab)}
    wv = np.array(wv).astype(float)
    print(len(vocab), wv.shape, len(w2i))
    
    return wv, w2i, vocab

urllib.request.urlretrieve("https://cdn-lfs.huggingface.co/repos/51/d0/51d02f0735de2187e78af7db593c8d71efbcddfe9f5e45cfa1c77904daf0dfdf/20dac974c73413d4b4f463f9b28eabfb326793d2732c108a9cd1262f2a055dc3?response-content-disposition=attachment%3B%20filename%3D%22glove_debiased_300d.txt%22", "glove_debiased_300d.txt")
urllib.request.urlretrieve("https://cdn-lfs.huggingface.co/repos/51/d0/51d02f0735de2187e78af7db593c8d71efbcddfe9f5e45cfa1c77904daf0dfdf/91125602f730fea7ca768736c6f442e668b49db095682bf2aad375db061c21ed?response-content-disposition=attachment%3B%20filename%3D%22glove.6B.300d.txt%222", "glove_300d.txt")

original_wv, original_w2i, original_vocab = load_glove('glove_300d.txt')
wv_debiased, w2i_debiased, vocab_debiased = load_glove('glove_debiased_300d.txt')

def debias(gendered_word1,gendered_word2,occupation,model):
    # if model == 'Glove-300d' :
    #     original_wv, original_w2i, original_vocab = load_glove('glove_300d.txt')
    #     wv_debiased, w2i_debiased, vocab_debiased = load_glove('glove_debiased_300d.txt')


    print(similarity('man', 'nurse', original_wv, original_w2i))

    return abs(similarity(gendered_word1, occupation, wv_debiased, w2i_debiased)-similarity(gendered_word2, occupation, wv_debiased, w2i_debiased)), \
        abs(similarity(gendered_word1, occupation, original_wv, original_w2i)-similarity(gendered_word2, occupation, original_wv, original_w2i)), "<h3>How the difference is measured?</h3> <p>abs(similarity(gendered_word1,occupation)-similarity(gendered_word2,occupation))</p>"




demo = gr.Interface(
        debias,
        inputs = [
        gr.Textbox(placeholder="Gendered Word 1(Exp. man)"),
        gr.Textbox(placeholder="Gendered Word 2(Exp. woman)"),
        gr.Textbox(placeholder="Occupation(Exp. nurse)"),
        gr.Radio(choices=['Glove-300d'],value='Glove-300d')
        ],
        outputs = [gr.Textbox(label="Absolute similarity differece in the debiased version"),gr.Textbox(label="Absolute similarity differece in the original version"), gr.Markdown(
            value="<h3>How the difference is measured?</h3> <p>abs(similarity(gendered_word1,occupation)-similarity(gendered_word2,occupation))</p>")],
            description = '<a href="https://aclanthology.org/2020.acl-main.484/">Double-Hard Debias: Tailoring Word Embeddings for Gender Bias Mitigation</a>'
    )
if __name__ == '__main__':
    demo.launch()