Spaces:
Sleeping
Sleeping
Arko Banik
commited on
Commit
·
6de238b
1
Parent(s):
7cf9102
fix attempt
Browse files
app.py
CHANGED
@@ -125,18 +125,17 @@ def averaged_glove_embeddings_gdrive(sentence, word_index_dict, embeddings, mode
|
|
125 |
##################################
|
126 |
##### TODO: Add code here ########
|
127 |
##################################
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
embed = embed/total
|
138 |
|
139 |
-
return
|
140 |
|
141 |
|
142 |
def get_category_embeddings(embeddings_metadata):
|
@@ -204,34 +203,31 @@ def get_sorted_cosine_similarity(embeddings_metadata):
|
|
204 |
input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search, model_name=model_name)
|
205 |
else:
|
206 |
input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search)
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
cat_idx +=1
|
220 |
-
|
221 |
-
|
222 |
-
sorted_list = sorted(cat_scores, key=lambda x: x[1])
|
223 |
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
|
|
233 |
|
234 |
-
return
|
235 |
|
236 |
|
237 |
def plot_piechart(sorted_cosine_scores_items):
|
|
|
125 |
##################################
|
126 |
##### TODO: Add code here ########
|
127 |
##################################
|
128 |
+
|
129 |
+
words = [word.strip('.,?!').lower() for word in sentence.split()]
|
130 |
+
total = 0
|
131 |
+
for w in words:
|
132 |
+
if w in word_index_dict:
|
133 |
+
embedding += embeddings[word_index_dict[w]]
|
134 |
+
total +=1
|
135 |
+
if total != 0:
|
136 |
+
embedding = embedding/total
|
|
|
137 |
|
138 |
+
return embedding
|
139 |
|
140 |
|
141 |
def get_category_embeddings(embeddings_metadata):
|
|
|
203 |
input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search, model_name=model_name)
|
204 |
else:
|
205 |
input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search)
|
206 |
+
|
207 |
+
cat_scores = []
|
208 |
+
for index in range(len(categories)):
|
209 |
+
##########################################
|
210 |
+
# TODO: Compute cosine similarity between input sentence and categories
|
211 |
+
# TODO: Update category embeddings if category not found
|
212 |
+
##########################################
|
213 |
+
cat_embed = category_embeddings[index]
|
214 |
+
cat = categories[index]
|
215 |
+
# Calc cosine sim
|
216 |
+
cat_scores.append((cat, np.dot(input_embedding,cat_embed)))
|
217 |
+
# Store doc_id and score as a tuple
|
|
|
|
|
|
|
|
|
218 |
|
219 |
+
|
220 |
+
sorted_list = sorted(cat_scores, key=lambda x: x[1])
|
221 |
+
|
222 |
+
sorted_cats = [element[0] for element in sorted_list]
|
223 |
+
|
224 |
+
#flip sorting order
|
225 |
+
sorted_cats = sorted_cats[::-1]
|
226 |
+
# Add list to Map
|
227 |
+
for cat_pair in sorted_cats:
|
228 |
+
cosine_sim[cat_pair[0]] = cat_pair[1]
|
229 |
|
230 |
+
return cosine_sim
|
231 |
|
232 |
|
233 |
def plot_piechart(sorted_cosine_scores_items):
|