Spaces:
Sleeping
Sleeping
import faiss, json | |
from script.preprocessing_text import Preprocessor | |
from rank_bm25 import BM25Okapi | |
import numpy as np | |
class KnowledgeBase: | |
def __init__(self, faiss_path, preprocessing_path) -> None: | |
self.BM25_model = BM25Okapi(self._load(preprocessing_path)) | |
self.vector_base = faiss.read_index(faiss_path) | |
def _load(self, path): | |
with open(path, 'rb') as file: | |
data = json.load(file) | |
return data | |
def search_by_BM25(self, query, k=5): | |
preprocessor = Preprocessor() | |
prep_query = preprocessor.preprocessing_text(query) | |
doc_scores = self.BM25_model.get_scores(prep_query) | |
sorted_docs = np.argsort(-doc_scores) | |
return sorted_docs[:k].tolist() | |
def search_by_embedding(self, embedding, k): | |
_, indexes = self.vector_base.search(embedding, k) | |
return indexes | |