import pandas as pd import numpy as np from zipfile import ZipFile import tensorflow as tf from tensorflow import keras from pathlib import Path import matplotlib.pyplot as plt import gradio as gr from huggingface_hub import from_pretrained_keras from datasets import load_dataset book_data_load = load_dataset("hqasmei/collaborative-filtering-dataset", data_files="book_data.csv") filtered_data_load = load_dataset("hqasmei/collaborative-filtering-dataset", data_files="filtered_data.csv") book_data_arr = [] filtered_data_arr = [] for item in book_data_load['train']: book_data_arr.append(item) for item in filtered_data_load['train']: filtered_data_arr.append(item) book_df = pd.DataFrame(book_data_arr) filtered_df = pd.DataFrame(filtered_data_arr) # Make the encodings for users user_ids = filtered_df["user_id"].unique().tolist() user2user_encoded = {x: i for i, x in enumerate(user_ids)} user_encoded2user = {i: x for i, x in enumerate(user_ids)} filtered_df["user"] = filtered_df["user_id"].map(user2user_encoded) num_users = len(user2user_encoded) # Make the encodings for books book_ids = filtered_df["book_id"].unique().tolist() book2book_encoded = {x: i for i, x in enumerate(book_ids)} book_encoded2book = {i: x for i, x in enumerate(book_ids)} filtered_df["book"] = filtered_df["book_id"].map(book2book_encoded) num_books = len(book_encoded2book) # Set ratings type filtered_df["rating"] = filtered_df["rating"].values.astype(np.float32) # Load model model = from_pretrained_keras('hqasmei/collaborative-filtering-model') def update_user(id): return get_top_rated_books_from_user(id), get_recommendations(id) def get_top_rated_books_from_user(id): decoded_id = user_encoded2user.get(id) # Get the top rated books by this user books_read_by_user = filtered_df[filtered_df.user_id == decoded_id] top_books_user = (books_read_by_user.sort_values(by="rating", ascending=False).head(5).book_id.values) book_df_rows = book_df[book_df["book_id"].isin(top_books_user)] book_df_rows = book_df_rows.drop('book_id', axis=1) return book_df_rows def random_user(): return update_user(np.random.randint(0, num_users-1)) def get_recommendations(id): decoded_id = user_encoded2user.get(id) # Get the top 10 recommended books for this user books_read_by_user = filtered_df[filtered_df.user_id == decoded_id] books_not_read = book_df[~book_df["book_id"].isin(books_read_by_user.book_id.values)]["book_id"] books_not_read = list(set(books_not_read).intersection(set(book2book_encoded.keys()))) books_not_read = [[book2book_encoded.get(x)] for x in books_not_read] # Encoded user id encoded_id = id # Create data [[user_id, book_id],...] user_book_array = np.hstack(([[encoded_id]] * len(books_not_read), books_not_read)) # Predict ratings for books not read ratings = model.predict(user_book_array).flatten() # Get indices of top ten books top_ratings_indices = ratings.argsort()[-10:][::-1] # Decode each book recommended_book_ids = [book_encoded2book.get(books_not_read[x][0]) for x in top_ratings_indices] recommended_books = book_df[book_df["book_id"].isin(recommended_book_ids)] recommended_books = recommended_books.drop('book_id', axis=1) return recommended_books demo = gr.Blocks() with demo: gr.Markdown("""