File size: 2,191 Bytes
412b7ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
import numpy as np
import numpy.linalg as la
import pickle 
#import streamlit_analytics


# Compute Cosine Similarity
def cosine_similarity(x,y):

    x_arr = np.array(x)
    y_arr = np.array(y)
    return np.dot(x_arr,y_arr)/(la.norm(x_arr)*la.norm(y_arr))


# Function to Load Glove Embeddings

def load_glove_embeddings(file):
    print("Loading Glove Model")
    glove_model = {}
    with open(file, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            glove_model[word] = vector
    print("Loaded {} words".format(len(glove_model)))
    return glove_model

# Get Averaged Glove Embedding of a sentence
def averaged_glove_embeddings(sentence, embeddings_dict):
    words = sentence.split(" ")
    glove_embedding = np.zeros(50)
    count_words = 0
    for word in words:
        if word in embeddings_dict:
            glove_embedding += embeddings_dict[word]
            count_words += 1
    
    return glove_embedding/max(count_words,1)



# Gold standard words to search from
gold_words = ["flower","mountain","tree","car","building"]

# Text Search
#with streamlit_analytics.track():
st.title("Search Based Retrieval Demo")
st.subheader("Pass in an input word or even a sentence (e.g. jasmine or mount adams)")
text_search = st.text_input("", value="")

# Load glove embeddings
glove_embeddings = load_glove_embeddings('glove.6B.50d.txt')

# Find closest word to an input word
if text_search:
    input_embedding = averaged_glove_embeddings(text_search, glove_embeddings)
    cosine_sim = {}
    for index in range(len(gold_words)):
        cosine_sim[index] = cosine_similarity(input_embedding, glove_embeddings[gold_words[index]])

    print(cosine_sim)
    sorted_cosine_sim = sorted(cosine_sim.items(), key = lambda x: x[1], reverse=True)

    st.write("(My search uses glove embeddings)")
    st.write("Closest word I have between flower, mountain, tree, car and building for your input is: ")
    st.subheader(gold_words[sorted_cosine_sim[0][0]] )
    st.write("")
    st.write("Demo developed by Dr. Karthik Mohan")