File size: 878 Bytes
b93b2dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import faiss, json
from script.preprocessing_text import Preprocessor
from rank_bm25 import BM25Okapi
import numpy as np

class KnowledgeBase:
    def __init__(self, faiss_path, preprocessing_path) -> None:
        self.BM25_model = BM25Okapi(self._load(preprocessing_path))
        self.vector_base = faiss.read_index(faiss_path)

    def _load(self, path):
        with open(path, 'rb') as file:
            data = json.load(file)
            return data

    def search_by_BM25(self, query, k=5):
        preprocessor = Preprocessor()
        prep_query = preprocessor.preprocessing_text(query)
        doc_scores = self.BM25_model.get_scores(prep_query)
        sorted_docs = np.argsort(-doc_scores)
        return sorted_docs[:k].tolist()

    def search_by_embedding(self, embedding, k):
        _, indexes = self.vector_base.search(embedding, k)
        return indexes