File size: 1,194 Bytes
dc4b86a
ba0a0d3
dc4b86a
 
 
 
 
ba0a0d3
dc4b86a
 
 
 
 
 
 
 
ba0a0d3
f2b9b39
 
ba0a0d3
 
dc4b86a
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from z_utils import load_cache_embeddings 
from z_embedding import load_model, get_embeddings
import torch 
import numpy as np

books_summaries_embs = load_cache_embeddings()

def computes_similarity_w_hypothetical(hypothetical_summaries: list[str], model = None) -> (np.ndarray, np.ndarray):
    '''Computes cosine similarity between stored book_summaries and all hypothetical_summaries
    
    Returns: 
        
        Avg cosine similiarity between actual books sumamries' embeddings and hypothetical summaries
        
        Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
    '''
    global books_summaries_embs

    # Select model
    model = model if model else load_model()
    
    hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
    similarity: torch.Tensor = model.similarity(books_summaries_embs, hypothetical_summaries_embs)

    # Average ouut the distance across all hypothetical embddings
    similarity = torch.mean(similarity, dim=1)

    # Get the order
    ranks = torch.argsort(similarity, descending=True)

    # return None
    return similarity.detach().numpy(), ranks.detach().numpy()