File size: 837 Bytes
ecbc596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine
import numpy as np
import pandas as pd
from datasets import load_dataset
import pickle as pkl
def recommend(query, n=5):
    # Load the model
    model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
    # Load the data
    # data = pd.read_csv('data/medium_articles.csv')
    data = load_dataset('Mohamed-BC/Articles')['train'].to_pandas()
    # get the embeddings
    a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb'))
    # Encode the query
    q_embedding = model.encode(query)
    # Calculate the cosine similarity
    cos_sim = np.array([1 - cosine(q_embedding, emb) for emb in a_embeddings[:1000]])
    # Get the top n recommendations
    top_n = np.argsort(cos_sim)[-n:]
    return data.iloc[top_n]['title']