Nechba's picture
Upload 5 files
ceee38b verified
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
# Define create_matrix function
def create_matrix(df):
# Adapt the function to match your DataFrame's column names and structure
N = len(df['user_id_mapping'].unique()) # Adjusted to 'user_id_mapping'
M = len(df['book_id'].unique()) # Adjusted to 'book_id'
# Map IDs to indices
user_mapper = dict(zip(np.unique(df["user_id_mapping"]), list(range(N))))
book_mapper = dict(zip(np.unique(df["book_id"]), list(range(M))))
# Map indices to IDs
user_inv_mapper = dict(zip(list(range(N)), np.unique(df["user_id_mapping"])))
book_inv_mapper = dict(zip(list(range(M)), np.unique(df["book_id"])))
user_index = [user_mapper[i] for i in df['user_id_mapping']] # Adjusted to 'user_id_mapping'
book_index = [book_mapper[i] for i in df['book_id']] # Adjusted to 'book_id'
# Use 'Actual Rating' for the matrix values; corrected data handling for sparse matrix creation
X = csr_matrix((df["Actual Rating"], (book_index, user_index)), shape=(M, N))
return X, user_mapper, book_mapper, user_inv_mapper, book_inv_mapper
"""
Find similar books using KNN
"""
# Define find_similar_books function - Source - https://www.geeksforgeeks.org/recommendation-system-in-python/?ref=rp
def find_similar_books(book_id, X, k,book_mapper,book_inv_mapper, metric='cosine'):
neighbour_ids = []
book_ind = book_mapper[book_id]
book_vec = X[book_ind]
k+=1
kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
kNN.fit(X)
book_vec = book_vec.reshape(1,-1)
distances, indices = kNN.kneighbors(book_vec, return_distance=True)
for i in range(0,k):
n = indices.item(i)
neighbour_ids.append(book_inv_mapper[n])
neighbour_ids.pop(0)
return distances, neighbour_ids