Spaces:

felipekitamura
/

word_embeddings

Sleeping

App Files Files Community

felipekitamura commited on Apr 19, 2024

Commit

b065d7a

verified ·

1 Parent(s): a367dae

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -6

app.py CHANGED Viewed

@@ -4,10 +4,29 @@ import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
-model = np.load('gpt2-1k-words.npy',allow_pickle='TRUE').item()
 cache = "/home/user/app/d.jpg"
 # Function to reduce dimensions
 def reduce_dimensions(data, method='PCA'):
     if method == 'PCA':
@@ -62,18 +81,18 @@ sp = gr.Image()
 def inference(word1, word2, word3):
     transform = model[word3] + model[word2] - model[word1]
-    output = model.similar_by_vector(transform)
     print(output)
     word_list = [word1, word2, word3]
-    word_list.extend([x for x,y in [item for item in output[:4]]])
     words = {key: model[key] for key in word_list}
     words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform
     data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
     print(data.shape)
     labels = words.keys()
-    reduced_data_pca = reduce_dimensions(data, method='PCA')
-    print(reduced_data_pca.shape)
-    plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
     return cache
 examples = [

 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
+model = np.load('gpt2-red-1k-words.npy',allow_pickle='TRUE').item()
+data = np.concatenate([x for x in model.values()], axis=0)
+keys = [x for x in model.keys()]
 cache = "/home/user/app/d.jpg"
+def find_most_similar_vectors(vector, lookup_table):
+    """
+    Finds the indices of the three most similar vectors in the lookup table to the given vector.
+    :param vector: A 1xN numpy array (the vector to compare against others)
+    :param lookup_table: An MxN numpy array (a matrix of vectors)
+    :return: A list of indices of the three most similar vectors from the lookup table
+    """
+    # Calculate the Euclidean distances from the given vector to all vectors in the lookup table
+    distances = np.linalg.norm(lookup_table - vector, axis=1)
+    # Get the indices of the three smallest distances
+    indices_of_smallest = np.argsort(distances)[:3]
+    return indices_of_smallest.tolist()
 # Function to reduce dimensions
 def reduce_dimensions(data, method='PCA'):
     if method == 'PCA':
 def inference(word1, word2, word3):
     transform = model[word3] + model[word2] - model[word1]
+    output = keys[find_most_similar_vectors(transform, data)]
     print(output)
     word_list = [word1, word2, word3]
+    word_list.extend(output)
     words = {key: model[key] for key in word_list}
     words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform
     data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
     print(data.shape)
     labels = words.keys()
+    #reduced_data_pca = reduce_dimensions(data, method='PCA')
+    #print(reduced_data_pca.shape)
+    plot_reduced_data(data, labels, 'PCA Results')
     return cache
 examples = [