Spaces:
Configuration error
Configuration error
import time | |
import operator | |
from tqdm import tqdm | |
from annoy import AnnoyIndex | |
from memory_profiler import profile | |
class TicToc: | |
def __init__(self): | |
self.i = None | |
def start(self): | |
self.i = time.time() | |
def stop(self): | |
f = time.time() | |
print(f - self.i, "seg.") | |
class Ann: | |
def __init__(self, words, vectors, coord): | |
self.words = words.to_list() | |
self.vectors = vectors.to_list() | |
self.coord = coord.to_list() | |
self.tree = None | |
self.tt = TicToc() | |
def init(self, n_trees=10, metric='angular', n_jobs=-1): | |
# metrics options = "angular", "euclidean", "manhattan", "hamming", or "dot" | |
# n_jobs=-1 Run over all CPU availables | |
print("Init tree...") | |
self.tt.start() | |
self.tree = AnnoyIndex(len(self.vectors[0]), metric=metric) | |
for i,v in tqdm(enumerate(self.vectors), total=len(self.vectors)): | |
self.tree.add_item(i,v) | |
self.tt.stop() | |
print("Build tree...") | |
self.tt.start() | |
self.tree.build(n_trees=n_trees, n_jobs=n_jobs) | |
self.tt.stop() | |
def __getWordId(self, word): | |
word_id = None | |
try: | |
word_id = self.words.index(word) | |
except: | |
pass | |
return word_id | |
def get(self, word, n_neighbors=10): | |
word_id = self.__getWordId(word) | |
reword_xy_list = None | |
if word_id != None: | |
neighbord_id = self.tree.get_nns_by_item(word_id, n_neighbors) | |
# word_xy_list = list(map(lambda i: (self.words[i],self.coord[i]), neighbord_id)) | |
# word_xy_list = list(map(lambda i: self.words[i], neighbord_id)) | |
word_xy_list = operator.itemgetter(*neighbord_id)(self.words) | |
else: | |
print(f"The word '{word}' does not exist") | |
return word_xy_list |