Arko Banik commited on
Commit
7cf9102
·
1 Parent(s): 78a8cf5

change function calls, add inital attempt

Browse files
Files changed (1) hide show
  1. app.py +43 -6
app.py CHANGED
@@ -21,7 +21,7 @@ def cosine_similarity(x, y):
21
  ##################################
22
  ### TODO: Add code here ##########
23
  ##################################
24
- pass
25
 
26
 
27
  # Function to Load Glove Embeddings
@@ -125,6 +125,18 @@ def averaged_glove_embeddings_gdrive(sentence, word_index_dict, embeddings, mode
125
  ##################################
126
  ##### TODO: Add code here ########
127
  ##################################
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
 
130
  def get_category_embeddings(embeddings_metadata):
@@ -176,6 +188,9 @@ def get_sorted_cosine_similarity(embeddings_metadata):
176
  ##########################################
177
  ## TODO: Get embeddings for categories ###
178
  ##########################################
 
 
 
179
 
180
  else:
181
  model_name = embeddings_metadata["model_name"]
@@ -190,13 +205,33 @@ def get_sorted_cosine_similarity(embeddings_metadata):
190
  else:
191
  input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search)
192
  for index in range(len(categories)):
193
- pass
194
  ##########################################
195
  # TODO: Compute cosine similarity between input sentence and categories
196
  # TODO: Update category embeddings if category not found
197
  ##########################################
198
-
199
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
 
202
  def plot_piechart(sorted_cosine_scores_items):
@@ -354,7 +389,8 @@ if st.session_state.text_search:
354
  }
355
  with st.spinner("Obtaining Cosine similarity for Glove..."):
356
  sorted_cosine_sim_glove = get_sorted_cosine_similarity(
357
- st.session_state.text_search, embeddings_metadata
 
358
  )
359
 
360
  # Sentence transformer embeddings
@@ -362,7 +398,8 @@ if st.session_state.text_search:
362
  embeddings_metadata = {"embedding_model": "transformers", "model_name": ""}
363
  with st.spinner("Obtaining Cosine similarity for 384d sentence transformer..."):
364
  sorted_cosine_sim_transformer = get_sorted_cosine_similarity(
365
- st.session_state.text_search, embeddings_metadata
 
366
  )
367
 
368
  # Results and Plot Pie Chart for Glove
 
21
  ##################################
22
  ### TODO: Add code here ##########
23
  ##################################
24
+ return np.exp(np.dot(x,y)/(np.linalg.norm(x)*np.linalg.norm(y)))
25
 
26
 
27
  # Function to Load Glove Embeddings
 
125
  ##################################
126
  ##### TODO: Add code here ########
127
  ##################################
128
+ for word in sentence:
129
+ #print(sentence)
130
+ words = [word.strip('.,?!').lower() for word in sentence.split()]
131
+ total = 0
132
+ for w in words:
133
+ if w in embeddings:
134
+ embed += embeddings[w]
135
+ total +=1
136
+ if total != 0:
137
+ embed = embed/total
138
+
139
+ return embed
140
 
141
 
142
  def get_category_embeddings(embeddings_metadata):
 
188
  ##########################################
189
  ## TODO: Get embeddings for categories ###
190
  ##########################################
191
+ category_embeddings = []
192
+ for cat in categories:
193
+ category_embeddings.append(get_glove_embeddings(cat))
194
 
195
  else:
196
  model_name = embeddings_metadata["model_name"]
 
205
  else:
206
  input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search)
207
  for index in range(len(categories)):
208
+
209
  ##########################################
210
  # TODO: Compute cosine similarity between input sentence and categories
211
  # TODO: Update category embeddings if category not found
212
  ##########################################
213
+ cat_scores = []
214
+ cat_idx = 0
215
+ for cat_embed in category_embeddings:
216
+ # Calc cosine sim
217
+ cat_scores.append((cat_idx, np.dot(input,cat_embed)))
218
+ # Store doc_id and score as a tuple
219
+ cat_idx +=1
220
+
221
+
222
+ sorted_list = sorted(cat_scores, key=lambda x: x[1])
223
+
224
+ sorted_cats = [element[0] for element in sorted_list]
225
+
226
+ #flip sorting order
227
+ sorted_cats = sorted_cats[::-1]
228
+ # Add list to Map
229
+ result = sorted_cats[0]
230
+ selected_cat = categories[result]
231
+ print(selected_cat)
232
+
233
+
234
+ return selected_cat
235
 
236
 
237
  def plot_piechart(sorted_cosine_scores_items):
 
389
  }
390
  with st.spinner("Obtaining Cosine similarity for Glove..."):
391
  sorted_cosine_sim_glove = get_sorted_cosine_similarity(
392
+ # st.session_state.text_search,
393
+ embeddings_metadata
394
  )
395
 
396
  # Sentence transformer embeddings
 
398
  embeddings_metadata = {"embedding_model": "transformers", "model_name": ""}
399
  with st.spinner("Obtaining Cosine similarity for 384d sentence transformer..."):
400
  sorted_cosine_sim_transformer = get_sorted_cosine_similarity(
401
+ # st.session_state.text_search,
402
+ embeddings_metadata
403
  )
404
 
405
  # Results and Plot Pie Chart for Glove