|
import numpy as np
|
|
from sentence_transformers import SentenceTransformer
|
|
import faiss
|
|
import pandas as pd
|
|
|
|
def search_kg(query, index_path, dataset_path, top_k=5):
|
|
index = faiss.read_index(index_path)
|
|
df = pd.read_json(dataset_path, lines=True)
|
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
query_embedding = model.encode([query], convert_to_tensor=True).cpu().numpy()
|
|
distances, indices = index.search(query_embedding, top_k)
|
|
|
|
results = []
|
|
for i in range(top_k):
|
|
idx = indices[0][i]
|
|
if 0 <= idx < len(df):
|
|
headline = df.iloc[idx]['headline']
|
|
description = df.iloc[idx]['short_description']
|
|
results.append(f"{headline}. {description}")
|
|
return " ".join(results)
|
|
|