File size: 1,134 Bytes
57cf043
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd

from common.configuration import Configuration


class DocumentRanking:

    def __init__(self, df: pd.DataFrame, config: Configuration):
        self.df = df
        self.config = config
        self.alpha = config.db_config.ranker.alpha
        self.beta = config.db_config.ranker.beta

    def doc_ranking(self, query_embedding, scores, indexes):
        title_embeddings = self.df.iloc[indexes]['TitleEmbedding'].to_list()
        norms = []
        for emb in title_embeddings:
            d = emb - query_embedding
            norm = d.dot(d)
            norms.append(norm)

        new_score = []
        texts = self.df.iloc[indexes]['Text'].to_list()
        for ind, text in enumerate(texts):
            new_score.append(scores[ind] * len(text) ** self.beta + self.alpha * norms[ind])

        metric_df = pd.DataFrame()
        metric_df['NewScores'] = new_score
        metric_df['Indexes'] = indexes
        metric_df.sort_values(by=['NewScores'], inplace=True)
        new_indexes = metric_df['Indexes'].to_list()[:self.config.db_config.search.vector_search.k_neighbors]
        return new_indexes