christopher commited on
Commit
2623e85
Β·
1 Parent(s): c97076c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -0
app.py CHANGED
@@ -1 +1,24 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from faiss import IndexFlatIP
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ input_embeddings = np.load("bert_input_embeddings.npy")
7
+ index = IndexFlatIP(input_embeddings.shape[-1])
8
+ index.add(input_embeddings)
9
+ vocab = {v:k for k,v in tokenizer.vocab.items()}
10
+ lookup_table = pd.Series(vocab).sort_index()
11
+
12
+ def get_first_subword(word):
13
+ try:
14
+ return tokenizer.vocab[word]
15
+ except:
16
+ return tokenizer(word, add_special_tokens=False)['input_ids'][0]
17
+
18
+ word_to_lookup = "##lity"
19
+ num_neighbors = 20
20
+ num_neighbors = 20
21
+ i = get_first_subword(word_to_lookup)
22
+ _ , I = index.search(input_embeddings[i:i+1], num_neighbors)
23
+ hits = lookup_table.take(I[0])
24
+ hits.values