{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "30252107", "metadata": {}, "outputs": [], "source": [ "import os\n", "import random\n", "import operator\n", "import requests\n", "import numpy as np\n", "import pandas as pd\n", "from scipy import sparse\n", "import sys\n", "from surprise import Dataset, Reader\n", "from surprise import KNNBasic, SVD\n", "from surprise.model_selection import train_test_split\n", "from surprise import accuracy\n", "from surprise.dataset import DatasetAutoFolds" ] }, { "cell_type": "code", "execution_count": 2, "id": "c40008b6", "metadata": {}, "outputs": [], "source": [ "df1 = pd.read_csv('Book reviews\\BX-Users.csv', sep=';', encoding='ISO-8859-1')\n", "df2 = pd.read_csv('Book reviews\\BX_Books.csv', sep=';', encoding='ISO-8859-1')\n", "df3 = pd.read_csv('Book reviews\\BX-Book-Ratings.csv', sep=';', encoding='ISO-8859-1', nrows=20_000)" ] }, { "cell_type": "code", "execution_count": 3, "id": "a422a310", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2180" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_ids = df3['User-ID'].tolist()\n", "user_id = []\n", "for i in user_ids:\n", " if i in user_id:\n", " continue\n", " else:\n", " user_id.append(i)\n", "len(user_id)" ] }, { "cell_type": "code", "execution_count": 4, "id": "fea227ef", "metadata": {}, "outputs": [], "source": [ "data = df3" ] }, { "cell_type": "code", "execution_count": 5, "id": "663d5ba4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 12660\n", "8 1694\n", "7 1526\n", "10 1272\n", "9 1105\n", "5 728\n", "6 663\n", "4 170\n", "3 108\n", "2 45\n", "1 29\n", "Name: Book-Rating, dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3['Book-Rating'].value_counts()" ] }, { "cell_type": "code", "execution_count": 6, "id": "c85ef134", "metadata": {}, "outputs": [], "source": [ "n=len(df3)\n", "N=list(range(n))\n", "random.seed(2023)\n", "random.shuffle(N)" ] }, { "cell_type": "code", "execution_count": 7, "id": "beb6246d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | User-ID | \n", "ISBN | \n", "Book-Rating | \n", "
---|---|---|---|
15849 | \n", "2442 | \n", "8845252906 | \n", "0 | \n", "
11349 | \n", "712 | \n", "3784419445 | \n", "8 | \n", "
1732 | \n", "277427 | \n", "0553579274 | \n", "0 | \n", "
18333 | \n", "3363 | \n", "0553213164 | \n", "10 | \n", "
11806 | \n", "882 | \n", "0553801945 | \n", "0 | \n", "