Spaces:
Sleeping
Sleeping
File size: 878 Bytes
b93b2dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import faiss, json
from script.preprocessing_text import Preprocessor
from rank_bm25 import BM25Okapi
import numpy as np
class KnowledgeBase:
def __init__(self, faiss_path, preprocessing_path) -> None:
self.BM25_model = BM25Okapi(self._load(preprocessing_path))
self.vector_base = faiss.read_index(faiss_path)
def _load(self, path):
with open(path, 'rb') as file:
data = json.load(file)
return data
def search_by_BM25(self, query, k=5):
preprocessor = Preprocessor()
prep_query = preprocessor.preprocessing_text(query)
doc_scores = self.BM25_model.get_scores(prep_query)
sorted_docs = np.argsort(-doc_scores)
return sorted_docs[:k].tolist()
def search_by_embedding(self, embedding, k):
_, indexes = self.vector_base.search(embedding, k)
return indexes
|