Spaces:
Configuration error
Configuration error
File size: 1,870 Bytes
a779273 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import time
import operator
from tqdm import tqdm
from annoy import AnnoyIndex
from memory_profiler import profile
class TicToc:
def __init__(self):
self.i = None
def start(self):
self.i = time.time()
def stop(self):
f = time.time()
print(f - self.i, "seg.")
class Ann:
def __init__(self, words, vectors, coord):
self.words = words.to_list()
self.vectors = vectors.to_list()
self.coord = coord.to_list()
self.tree = None
self.tt = TicToc()
@profile
def init(self, n_trees=10, metric='angular', n_jobs=-1):
# metrics options = "angular", "euclidean", "manhattan", "hamming", or "dot"
# n_jobs=-1 Run over all CPU availables
print("Init tree...")
self.tt.start()
self.tree = AnnoyIndex(len(self.vectors[0]), metric=metric)
for i,v in tqdm(enumerate(self.vectors), total=len(self.vectors)):
self.tree.add_item(i,v)
self.tt.stop()
print("Build tree...")
self.tt.start()
self.tree.build(n_trees=n_trees, n_jobs=n_jobs)
self.tt.stop()
def __getWordId(self, word):
word_id = None
try:
word_id = self.words.index(word)
except:
pass
return word_id
def get(self, word, n_neighbors=10):
word_id = self.__getWordId(word)
reword_xy_list = None
if word_id != None:
neighbord_id = self.tree.get_nns_by_item(word_id, n_neighbors)
# word_xy_list = list(map(lambda i: (self.words[i],self.coord[i]), neighbord_id))
# word_xy_list = list(map(lambda i: self.words[i], neighbord_id))
word_xy_list = operator.itemgetter(*neighbord_id)(self.words)
else:
print(f"The word '{word}' does not exist")
return word_xy_list |