{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "30252107",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import random\n",
    "import operator\n",
    "import requests\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy import sparse\n",
    "import sys\n",
    "from surprise import Dataset, Reader\n",
    "from surprise import KNNBasic, SVD\n",
    "from surprise.model_selection import train_test_split\n",
    "from surprise import accuracy\n",
    "from surprise.dataset import DatasetAutoFolds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c40008b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.read_csv('Book reviews\\BX-Users.csv', sep=';', encoding='ISO-8859-1')\n",
    "df2 = pd.read_csv('Book reviews\\BX_Books.csv', sep=';', encoding='ISO-8859-1')\n",
    "df3 = pd.read_csv('Book reviews\\BX-Book-Ratings.csv', sep=';', encoding='ISO-8859-1', nrows=20_000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a422a310",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2180"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_ids = df3['User-ID'].tolist()\n",
    "user_id = []\n",
    "for i in user_ids:\n",
    "    if i in user_id:\n",
    "        continue\n",
    "    else:\n",
    "        user_id.append(i)\n",
    "len(user_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "fea227ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "663d5ba4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     12660\n",
       "8      1694\n",
       "7      1526\n",
       "10     1272\n",
       "9      1105\n",
       "5       728\n",
       "6       663\n",
       "4       170\n",
       "3       108\n",
       "2        45\n",
       "1        29\n",
       "Name: Book-Rating, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3['Book-Rating'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c85ef134",
   "metadata": {},
   "outputs": [],
   "source": [
    "n=len(df3)\n",
    "N=list(range(n))\n",
    "random.seed(2023)\n",
    "random.shuffle(N)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "beb6246d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>User-ID</th>\n",
       "      <th>ISBN</th>\n",
       "      <th>Book-Rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15849</th>\n",
       "      <td>2442</td>\n",
       "      <td>8845252906</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11349</th>\n",
       "      <td>712</td>\n",
       "      <td>3784419445</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1732</th>\n",
       "      <td>277427</td>\n",
       "      <td>0553579274</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18333</th>\n",
       "      <td>3363</td>\n",
       "      <td>0553213164</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11806</th>\n",
       "      <td>882</td>\n",
       "      <td>0553801945</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       User-ID        ISBN  Book-Rating\n",
       "15849     2442  8845252906            0\n",
       "11349      712  3784419445            8\n",
       "1732    277427  0553579274            0\n",
       "18333     3363  0553213164           10\n",
       "11806      882  0553801945            0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train=data.iloc[N[0:(n*4)//5]]\n",
    "test=data.iloc[N[(n*4)//5:]]\n",
    "train.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "f27ca18d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 9, 10, 2, 7, 5, 8, 6, 1, 4, 3]\n",
      "1912\n",
      "14033\n"
     ]
    }
   ],
   "source": [
    "print(train['Book-Rating'].unique().tolist())\n",
    "print(len(train['User-ID'].unique().tolist()))\n",
    "print(len(train['ISBN'].unique().tolist()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "94ebe1ac",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'surprise.trainset.Trainset'>\n"
     ]
    }
   ],
   "source": [
    "reader = Reader(rating_scale=(1,10)) # rating scale range\n",
    "trainset = Dataset.load_from_df(train[['User-ID','ISBN','Book-Rating']],reader).build_full_trainset()\n",
    "print(type(trainset))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "25d0a6ff",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<surprise.prediction_algorithms.knns.KNNBasic at 0x11a39f0a3d0>"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Use the KNNBasic algorithm to train the model\n",
    "algo = KNNBasic()\n",
    "#algo = SVD()\n",
    "algo.fit(trainset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "a4155aff",
   "metadata": {},
   "outputs": [],
   "source": [
    "testset = Dataset.load_from_df(test[['User-ID','ISBN','Book-Rating']],reader).build_full_trainset().build_anti_testset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "376eb001",
   "metadata": {},
   "outputs": [],
   "source": [
    "items = trainset.build_anti_testset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "dc366b18",
   "metadata": {},
   "outputs": [],
   "source": [
    "books = df2\n",
    "mapping_dict = books.set_index(\"ISBN\")[\"Book-Title\"].to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "2fe30c88",
   "metadata": {},
   "outputs": [],
   "source": [
    "users=test['User-ID'].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "afc45dd3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1928"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random.seed()\n",
    "rd = random.randint(0,len(users))\n",
    "users[rd]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "1fff04c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "user = users[rd]\n",
    "user_items = list(filter(lambda x: x[0] == user, items))\n",
    "recommendations = algo.test(user_items)\n",
    "recommendations.sort(key=operator.itemgetter(3), reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "de2718ce",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "User 1928 Recommendation Top 5:\n",
      " [Item] Four Blind Mice, [Estimated Rating] 10\n",
      " [Item] KJV Giant Print Reference Bible, Personal Size Bronze Edition, [Estimated Rating] 10\n",
      " [Item] So You Want to Be a Wizard: The First Book in the Young Wizards Series, [Estimated Rating] 10\n",
      " [Item] The Princess Diaries, [Estimated Rating] 10\n",
      " [Item] Memoirs of a Geisha, [Estimated Rating] 10\n"
     ]
    }
   ],
   "source": [
    "print(f\"User {user} Recommendation Top 5:\")\n",
    "for r in recommendations[0:5]:\n",
    "    try:  \n",
    "        print(f\" [Item] {mapping_dict[r[1]]}, [Estimated Rating] {round(r[3],3)}\")\n",
    "    except:\n",
    "        continue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf7c2fcb",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}