|
import pandas as pd |
|
import numpy as np |
|
from zipfile import ZipFile |
|
import tensorflow as tf |
|
from tensorflow import keras |
|
from pathlib import Path |
|
import matplotlib.pyplot as plt |
|
import gradio as gr |
|
from huggingface_hub import from_pretrained_keras |
|
from datasets import load_dataset |
|
|
|
book_data_load = load_dataset("hqasmei/collaborative-filtering-dataset", data_files="book_data.csv") |
|
filtered_data_load = load_dataset("hqasmei/collaborative-filtering-dataset", data_files="filtered_data.csv") |
|
|
|
book_data_arr = [] |
|
filtered_data_arr = [] |
|
|
|
for item in book_data_load['train']: |
|
book_data_arr.append(item) |
|
|
|
for item in filtered_data_load['train']: |
|
filtered_data_arr.append(item) |
|
|
|
book_df = pd.DataFrame(book_data_arr) |
|
filtered_df = pd.DataFrame(filtered_data_arr) |
|
|
|
|
|
user_ids = filtered_df["user_id"].unique().tolist() |
|
user2user_encoded = {x: i for i, x in enumerate(user_ids)} |
|
user_encoded2user = {i: x for i, x in enumerate(user_ids)} |
|
filtered_df["user"] = filtered_df["user_id"].map(user2user_encoded) |
|
num_users = len(user2user_encoded) |
|
|
|
|
|
book_ids = filtered_df["book_id"].unique().tolist() |
|
book2book_encoded = {x: i for i, x in enumerate(book_ids)} |
|
book_encoded2book = {i: x for i, x in enumerate(book_ids)} |
|
filtered_df["book"] = filtered_df["book_id"].map(book2book_encoded) |
|
num_books = len(book_encoded2book) |
|
|
|
|
|
filtered_df["rating"] = filtered_df["rating"].values.astype(np.float32) |
|
|
|
|
|
model = from_pretrained_keras('hqasmei/collaborative-filtering-model') |
|
|
|
|
|
def update_user(id): |
|
return get_top_rated_books_from_user(id), get_recommendations(id) |
|
|
|
|
|
def get_top_rated_books_from_user(id): |
|
decoded_id = user_encoded2user.get(id) |
|
|
|
|
|
books_read_by_user = filtered_df[filtered_df.user_id == decoded_id] |
|
top_books_user = (books_read_by_user.sort_values(by="rating", ascending=False).head(5).book_id.values) |
|
book_df_rows = book_df[book_df["book_id"].isin(top_books_user)] |
|
book_df_rows = book_df_rows.drop('book_id', axis=1) |
|
return book_df_rows |
|
|
|
def random_user(): |
|
return update_user(np.random.randint(0, num_users-1)) |
|
|
|
def get_recommendations(id): |
|
decoded_id = user_encoded2user.get(id) |
|
|
|
|
|
books_read_by_user = filtered_df[filtered_df.user_id == decoded_id] |
|
books_not_read = book_df[~book_df["book_id"].isin(books_read_by_user.book_id.values)]["book_id"] |
|
books_not_read = list(set(books_not_read).intersection(set(book2book_encoded.keys()))) |
|
books_not_read = [[book2book_encoded.get(x)] for x in books_not_read] |
|
|
|
|
|
encoded_id = id |
|
|
|
|
|
user_book_array = np.hstack(([[encoded_id]] * len(books_not_read), books_not_read)) |
|
|
|
|
|
ratings = model.predict(user_book_array).flatten() |
|
|
|
|
|
top_ratings_indices = ratings.argsort()[-10:][::-1] |
|
|
|
|
|
recommended_book_ids = [book_encoded2book.get(books_not_read[x][0]) for x in top_ratings_indices] |
|
recommended_books = book_df[book_df["book_id"].isin(recommended_book_ids)] |
|
recommended_books = recommended_books.drop('book_id', axis=1) |
|
|
|
return recommended_books |
|
|
|
demo = gr.Blocks() |
|
|
|
with demo: |
|
gr.Markdown(""" |
|
<div> |
|
<h1 style='text-align: center'>Book Recommender</h1> |
|
Collaborative Filtering is used to predict the top 10 recommended books for a particular user from the dataset based on that user and previous books they have rated. |
|
|
|
Note: Currently there is a bug with sliders. If you "click and drag" on the slider it will not use the correct user. Please only "click" on the slider. |
|
</div> |
|
""") |
|
|
|
with gr.Box(): |
|
gr.Markdown( |
|
""" |
|
### Input |
|
#### Select a user to get recommendations for. |
|
""") |
|
|
|
inp1 = gr.Slider(0, num_users-1, value=0, label='User') |
|
|
|
|
|
|
|
gr.Markdown( |
|
""" |
|
<br> |
|
""") |
|
gr.Markdown( |
|
""" |
|
#### Books with the Highest Ratings from this user |
|
""") |
|
df1 = gr.DataFrame(headers=["title"], datatype=["str"], interactive=False) |
|
|
|
with gr.Box(): |
|
|
|
gr.Markdown( |
|
""" |
|
### Output |
|
#### Top 10 book recommendations |
|
""") |
|
df2 = gr.DataFrame(headers=["title"], datatype=["str"], interactive=False) |
|
|
|
|
|
|
|
inp1.change(fn=update_user, |
|
inputs=inp1, |
|
outputs=[df1, df2]) |
|
|
|
|
|
demo.launch(debug=True) |