felipekitamura commited on
Commit
b065d7a
·
verified ·
1 Parent(s): a367dae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -6
app.py CHANGED
@@ -4,10 +4,29 @@ import numpy as np
4
  import matplotlib.pyplot as plt
5
  from sklearn.decomposition import PCA
6
  from sklearn.manifold import TSNE
7
- model = np.load('gpt2-1k-words.npy',allow_pickle='TRUE').item()
 
 
8
 
9
  cache = "/home/user/app/d.jpg"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # Function to reduce dimensions
12
  def reduce_dimensions(data, method='PCA'):
13
  if method == 'PCA':
@@ -62,18 +81,18 @@ sp = gr.Image()
62
 
63
  def inference(word1, word2, word3):
64
  transform = model[word3] + model[word2] - model[word1]
65
- output = model.similar_by_vector(transform)
66
  print(output)
67
  word_list = [word1, word2, word3]
68
- word_list.extend([x for x,y in [item for item in output[:4]]])
69
  words = {key: model[key] for key in word_list}
70
  words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform
71
  data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
72
  print(data.shape)
73
  labels = words.keys()
74
- reduced_data_pca = reduce_dimensions(data, method='PCA')
75
- print(reduced_data_pca.shape)
76
- plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
77
  return cache
78
 
79
  examples = [
 
4
  import matplotlib.pyplot as plt
5
  from sklearn.decomposition import PCA
6
  from sklearn.manifold import TSNE
7
+ model = np.load('gpt2-red-1k-words.npy',allow_pickle='TRUE').item()
8
+ data = np.concatenate([x for x in model.values()], axis=0)
9
+ keys = [x for x in model.keys()]
10
 
11
  cache = "/home/user/app/d.jpg"
12
 
13
+ def find_most_similar_vectors(vector, lookup_table):
14
+ """
15
+ Finds the indices of the three most similar vectors in the lookup table to the given vector.
16
+
17
+ :param vector: A 1xN numpy array (the vector to compare against others)
18
+ :param lookup_table: An MxN numpy array (a matrix of vectors)
19
+ :return: A list of indices of the three most similar vectors from the lookup table
20
+ """
21
+ # Calculate the Euclidean distances from the given vector to all vectors in the lookup table
22
+ distances = np.linalg.norm(lookup_table - vector, axis=1)
23
+
24
+ # Get the indices of the three smallest distances
25
+ indices_of_smallest = np.argsort(distances)[:3]
26
+
27
+ return indices_of_smallest.tolist()
28
+
29
+
30
  # Function to reduce dimensions
31
  def reduce_dimensions(data, method='PCA'):
32
  if method == 'PCA':
 
81
 
82
  def inference(word1, word2, word3):
83
  transform = model[word3] + model[word2] - model[word1]
84
+ output = keys[find_most_similar_vectors(transform, data)]
85
  print(output)
86
  word_list = [word1, word2, word3]
87
+ word_list.extend(output)
88
  words = {key: model[key] for key in word_list}
89
  words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform
90
  data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
91
  print(data.shape)
92
  labels = words.keys()
93
+ #reduced_data_pca = reduce_dimensions(data, method='PCA')
94
+ #print(reduced_data_pca.shape)
95
+ plot_reduced_data(data, labels, 'PCA Results')
96
  return cache
97
 
98
  examples = [