File size: 837 Bytes
ecbc596 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine
import numpy as np
import pandas as pd
from datasets import load_dataset
import pickle as pkl
def recommend(query, n=5):
# Load the model
model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
# Load the data
# data = pd.read_csv('data/medium_articles.csv')
data = load_dataset('Mohamed-BC/Articles')['train'].to_pandas()
# get the embeddings
a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb'))
# Encode the query
q_embedding = model.encode(query)
# Calculate the cosine similarity
cos_sim = np.array([1 - cosine(q_embedding, emb) for emb in a_embeddings[:1000]])
# Get the top n recommendations
top_n = np.argsort(cos_sim)[-n:]
return data.iloc[top_n]['title']
|