File size: 1,285 Bytes
7fea1f4
 
 
 
 
 
 
 
 
 
 
 
 
 
988c713
7fea1f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from src.model.doc import Doc
from src.model.block import Block


class Retriever:

    def __init__(self, db_client, plan_doc: Doc, content_doc: Doc, content_fr_doc: Doc, collection_name: str):
        plan_blocks: [Block] = plan_doc.blocks
        content_blocks: [Block] = content_doc.blocks
        content_fr_blocks: [Block] = content_fr_doc.blocks
        for pb, cb in zip(plan_blocks, content_blocks):
            cb.specials = pb.specials
        for cb, cb_fr in zip(content_blocks, content_fr_blocks):
            cb.content_fr = cb_fr.content
            cb.title_fr = cb_fr.title
        self.collection = db_client.create_collection(name=collection_name)
        self.collection.add(
            documents=[block.content for block in plan_blocks],
            ids=[block.index for block in plan_blocks],
            metadatas=[block.to_dict() for block in content_blocks]
        )

    def similarity_search(self, query: str) -> {}:
        res = self.collection.query(query_texts=query)
        block_dict_sources = res['metadatas'][0]
        distances = res['distances'][0]
        blocks = []
        for bd, d in zip(block_dict_sources, distances):
            b = Block().from_dict(bd)
            b.distance = d
            blocks.append(b)
        return blocks