Spaces:
Build error
Build error
File size: 1,979 Bytes
9b95338 bb788ed 2623e85 8ceef3d 2623e85 8ceef3d 0bad089 a147e52 0bad089 a147e52 0bad089 a147e52 2623e85 9c44dc5 95d2476 0bad089 d9adf81 7825e2d b8dfa9f 60220a8 d9adf81 c83bbd7 2bbff8e 46aec1c cfc29b0 c3ed860 9638484 cfc29b0 f834bd4 b016be0 3d77f49 6ad4ef6 3d77f49 cfc29b0 d9adf81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import gradio as gr
from faiss import IndexFlatIP, IndexFlatL2
import pandas as pd
import numpy as np
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased")
normalized_input_embeddings = np.load("normalized_bert_input_embeddings.npy")
unnormalized_input_embeddings = np.load("unnormalized_bert_input_embeddings.npy")
index_L2 = IndexFlatL2(unnormalized_input_embeddings.shape[-1])
index_L2.add(unnormalized_input_embeddings)
index_IP_normalized = IndexFlatIP(normalized_input_embeddings.shape[-1])
index_IP_normalized.add(normalized_input_embeddings)
vocab = {v:k for k,v in tokenizer.vocab.items()}
lookup_table = pd.Series(vocab).sort_index()
def get_first_subword(word):
try:
return tokenizer.vocab[word]
except:
return tokenizer(word, add_special_tokens=False)['input_ids'][0]
def search(token_to_lookup, num_neighbors):
i = get_first_subword(token_to_lookup)
_ , I = index_IP_normalized.search(normalized_input_embeddings[i:i+1], num_neighbors)
hits = lookup_table.take(I[0])
results = hits.values[1:]
results = [r for r in results if not "[unused" in r]
return [r for r in results if not "##" in r], [r for r in results if "##" in r]
iface = gr.Interface(
fn=search,
#inputs=[gr.Textbox(lines=1, label="Vocabulary Token", placeholder="Enter token..."), gr.Number(value=50, label="number of neighbors")],
inputs=gr.Textbox(lines=1, label="Vocabulary Token", placeholder="Enter token..."),
outputs=[gr.Textbox(label="Nearest tokens"), gr.Textbox(label="Nearest subwords")],
examples=[
["##logy"],
["##ness"],
["##nity"],
["responded"],
["queen"],
["king"],
["hospital"],
["disease"],
["grammar"],
["philosophy"],
["aristotle"],
["##ting"],
["woman"],
["man"]
],
)
iface.launch(enable_queue=True, debug=True, show_error=True) |