File size: 1,986 Bytes
ceee38b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Define create_matrix function
def create_matrix(df):
    # Adapt the function to match your DataFrame's column names and structure
    N = len(df['user_id_mapping'].unique())  # Adjusted to 'user_id_mapping'
    M = len(df['book_id'].unique())          # Adjusted to 'book_id'
    
    # Map IDs to indices
    user_mapper = dict(zip(np.unique(df["user_id_mapping"]), list(range(N))))
    book_mapper = dict(zip(np.unique(df["book_id"]), list(range(M))))
    
    # Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["user_id_mapping"])))
    book_inv_mapper = dict(zip(list(range(M)), np.unique(df["book_id"])))
    
    user_index = [user_mapper[i] for i in df['user_id_mapping']]  # Adjusted to 'user_id_mapping'
    book_index = [book_mapper[i] for i in df['book_id']]          # Adjusted to 'book_id'
    
    # Use 'Actual Rating' for the matrix values; corrected data handling for sparse matrix creation
    X = csr_matrix((df["Actual Rating"], (book_index, user_index)), shape=(M, N))
    
    return X, user_mapper, book_mapper, user_inv_mapper, book_inv_mapper


"""

Find similar books using KNN

"""
# Define find_similar_books function - Source - https://www.geeksforgeeks.org/recommendation-system-in-python/?ref=rp
def find_similar_books(book_id, X, k,book_mapper,book_inv_mapper, metric='cosine'):
      
    neighbour_ids = []
      
    book_ind = book_mapper[book_id]
    book_vec = X[book_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    book_vec = book_vec.reshape(1,-1)
    distances, indices = kNN.kneighbors(book_vec, return_distance=True)
    for i in range(0,k):
        n = indices.item(i)
        neighbour_ids.append(book_inv_mapper[n])
    neighbour_ids.pop(0)
    return distances, neighbour_ids