File size: 1,156 Bytes
d660b02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from sentence_transformers import SentenceTransformer

# Leverage the Poetry virtual environment to run the code:
# poetry run python code_snippets/08_text_embeddings.py

if __name__ == "__main__":
    # 1. Load a pretrained Sentence Transformer model.
    model = SentenceTransformer("all-MiniLM-L6-v2")

    # The sentences to encode.
    sentences = ["The dog sits outside waiting for a treat.", "I am going swimming.", "The dog is swimming."]

    # 2. Calculate embeddings.
    embeddings = model.encode(sentences)
    print(embeddings.shape)  # noqa
    # Output: [3, 384]

    # 3. Calculate the embedding similarities using cosine similarity.
    similarities = model.similarity(embeddings, embeddings)
    print(similarities)  # noqa
    # Output:
    # tensor([[ 1.0000, -0.0389,  0.2692],
    #     [-0.0389,  1.0000,  0.3837],
    #     [ 0.2692,  0.3837,  1.0000]])
    #
    # similarities[0, 0] = The similarity between the first sentence and itself.
    # similarities[0, 1] = The similarity between the first and second sentence.
    # similarities[2, 1] = The similarity between the third and second sentence.