Spaces:
Sleeping
Sleeping
Arko Banik
commited on
Commit
·
6de238b
1
Parent(s):
7cf9102
fix attempt
Browse files
app.py
CHANGED
|
@@ -125,18 +125,17 @@ def averaged_glove_embeddings_gdrive(sentence, word_index_dict, embeddings, mode
|
|
| 125 |
##################################
|
| 126 |
##### TODO: Add code here ########
|
| 127 |
##################################
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
embed = embed/total
|
| 138 |
|
| 139 |
-
return
|
| 140 |
|
| 141 |
|
| 142 |
def get_category_embeddings(embeddings_metadata):
|
|
@@ -204,34 +203,31 @@ def get_sorted_cosine_similarity(embeddings_metadata):
|
|
| 204 |
input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search, model_name=model_name)
|
| 205 |
else:
|
| 206 |
input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search)
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
cat_idx +=1
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
sorted_list = sorted(cat_scores, key=lambda x: x[1])
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
|
|
|
| 233 |
|
| 234 |
-
return
|
| 235 |
|
| 236 |
|
| 237 |
def plot_piechart(sorted_cosine_scores_items):
|
|
|
|
| 125 |
##################################
|
| 126 |
##### TODO: Add code here ########
|
| 127 |
##################################
|
| 128 |
+
|
| 129 |
+
words = [word.strip('.,?!').lower() for word in sentence.split()]
|
| 130 |
+
total = 0
|
| 131 |
+
for w in words:
|
| 132 |
+
if w in word_index_dict:
|
| 133 |
+
embedding += embeddings[word_index_dict[w]]
|
| 134 |
+
total +=1
|
| 135 |
+
if total != 0:
|
| 136 |
+
embedding = embedding/total
|
|
|
|
| 137 |
|
| 138 |
+
return embedding
|
| 139 |
|
| 140 |
|
| 141 |
def get_category_embeddings(embeddings_metadata):
|
|
|
|
| 203 |
input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search, model_name=model_name)
|
| 204 |
else:
|
| 205 |
input_embedding = get_sentence_transformer_embeddings(st.session_state.text_search)
|
| 206 |
+
|
| 207 |
+
cat_scores = []
|
| 208 |
+
for index in range(len(categories)):
|
| 209 |
+
##########################################
|
| 210 |
+
# TODO: Compute cosine similarity between input sentence and categories
|
| 211 |
+
# TODO: Update category embeddings if category not found
|
| 212 |
+
##########################################
|
| 213 |
+
cat_embed = category_embeddings[index]
|
| 214 |
+
cat = categories[index]
|
| 215 |
+
# Calc cosine sim
|
| 216 |
+
cat_scores.append((cat, np.dot(input_embedding,cat_embed)))
|
| 217 |
+
# Store doc_id and score as a tuple
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
| 219 |
+
|
| 220 |
+
sorted_list = sorted(cat_scores, key=lambda x: x[1])
|
| 221 |
+
|
| 222 |
+
sorted_cats = [element[0] for element in sorted_list]
|
| 223 |
+
|
| 224 |
+
#flip sorting order
|
| 225 |
+
sorted_cats = sorted_cats[::-1]
|
| 226 |
+
# Add list to Map
|
| 227 |
+
for cat_pair in sorted_cats:
|
| 228 |
+
cosine_sim[cat_pair[0]] = cat_pair[1]
|
| 229 |
|
| 230 |
+
return cosine_sim
|
| 231 |
|
| 232 |
|
| 233 |
def plot_piechart(sorted_cosine_scores_items):
|