File size: 1,735 Bytes
58627fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# from colbert.indexing.codecs.residual import ResidualCodec
import colbert.indexing.codecs.residual_embeddings as residual_embeddings

from colbert.search.strided_tensor import StridedTensor

"""
import line_profiler
import atexit
profile = line_profiler.LineProfiler()
atexit.register(profile.print_stats)
"""

class ResidualEmbeddingsStrided:
    def __init__(self, codec, embeddings, doclens):
        self.codec = codec
        self.codes = embeddings.codes
        self.residuals = embeddings.residuals
        self.use_gpu = self.codec.use_gpu

        self.codes_strided = StridedTensor(self.codes, doclens, use_gpu=self.use_gpu)
        self.residuals_strided = StridedTensor(self.residuals, doclens, use_gpu=self.use_gpu)

    def lookup_eids(self, embedding_ids, codes=None, out_device='cuda'):
        codes = self.codes[embedding_ids] if codes is None else codes
        residuals = self.residuals[embedding_ids]

        return self.codec.decompress(residual_embeddings.ResidualEmbeddings(codes, residuals))

    # @profile
    def lookup_pids(self, passage_ids, out_device='cuda'):
        codes_packed, codes_lengths = self.codes_strided.lookup(passage_ids)#.as_packed_tensor()
        residuals_packed, _ = self.residuals_strided.lookup(passage_ids)#.as_packed_tensor()

        embeddings_packed = self.codec.decompress(residual_embeddings.ResidualEmbeddings(codes_packed, residuals_packed))

        return embeddings_packed, codes_lengths
        # return StridedTensor(embeddings_packed, codes_lengths).as_padded_tensor()
        # return StridedTensor.pad_packed(embeddings_packed, codes_lengths)

    def lookup_codes(self, passage_ids):
        return self.codes_strided.lookup(passage_ids)#.as_packed_tensor()