Spaces:

ayoni02
/

Books_recommended_system

Runtime error

App Files Files Community

ayoni02 commited on May 11, 2023

Commit

6a8f68c

1 Parent(s): 5b4c17d

added needed files

Browse files

Files changed (6) hide show

Book reviews/BX-Book-Ratings.csv +0 -0
Book reviews/BX-Users.csv +0 -0
Book reviews/BX_Books.csv +0 -0
README.md +1 -1
app.py +58 -0
it works.ipynb +416 -0

Book reviews/BX-Book-Ratings.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Book reviews/BX-Users.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Book reviews/BX_Books.csv ADDED Viewed

Binary file (77.4 MB). View file

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 title: Books Recommended System
 emoji: 📈
-colorFrom: pink
 colorTo: green
 sdk: gradio
 sdk_version: 3.29.0

 ---
 title: Books Recommended System
 emoji: 📈
+colorFrom: red
 colorTo: green
 sdk: gradio
 sdk_version: 3.29.0

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import gradio as gr
+import random
+import operator
+import pandas as pd
+from surprise import Dataset, Reader
+from surprise import KNNBasic
+def opendata(a, nrows):
+    df = pd.read_csv(a, nrows=nrows, sep=';', encoding='ISO-8859-1')
+    return df
+def split(df):
+    n=len(df)
+    N=list(range(n))
+    random.seed(2023)
+    random.shuffle(N)
+    train=df.iloc[N[0:(n*4)//5]]
+    test=df.iloc[N[(n*4)//5:]]
+    return train, test
+def red(df):
+    reader = Reader(rating_scale=(1,10)) # rating scale range
+    trainset = Dataset.load_from_df(df[['User-ID','ISBN','Book-Rating']],reader).build_full_trainset()
+    items = trainset.build_anti_testset()
+    return trainset, items
+def mod(df, user, items):
+    algo = KNNBasic()
+    algo.fit(df)
+    user_items = list(filter(lambda x: x[0] == user, items))
+    recommendations = algo.test(user_items)
+    recommendations.sort(key=operator.itemgetter(3), reverse=True)
+    return recommendations
+def gl(num):
+    data = opendata('Book reviews\BX-Book-Ratings.csv', nrows=20_000)
+    books = opendata('Book reviews\BX_Books.csv', nrows=None)
+    mapping_dict = books.set_index("ISBN")["Book-Title"].to_dict()
+    train, test = split(data)
+    users=test['User-ID'].tolist()
+    trainset, items  = red(train)
+    user = users[int(num)]
+    recommendations = mod(trainset, user, items)
+    op = []
+    for r in recommendations[0:5]:
+        try:
+            op.append(f"{mapping_dict[r[1]]} with Estimated Rating {round(r[3],3)}")
+        except:
+            continue
+    return ('\n\n'.join(map(str, op)))
+text = gr.components.Number(label="pick a number between 1 and 1000")
+label = gr.components.Text(label="Picked User Top 5 Recommendations:")
+example = [2, 3]
+intf = gr.Interface(fn=gl, inputs=text, outputs=label, examples=example)
+intf.launch(inline=False)

it works.ipynb ADDED Viewed

	@@ -0,0 +1,416 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "30252107",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import random\n",
+    "import operator\n",
+    "import requests\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy import sparse\n",
+    "import sys\n",
+    "from surprise import Dataset, Reader\n",
+    "from surprise import KNNBasic, SVD\n",
+    "from surprise.model_selection import train_test_split\n",
+    "from surprise import accuracy\n",
+    "from surprise.dataset import DatasetAutoFolds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c40008b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df1 = pd.read_csv('Book reviews\\BX-Users.csv', sep=';', encoding='ISO-8859-1')\n",
+    "df2 = pd.read_csv('Book reviews\\BX_Books.csv', sep=';', encoding='ISO-8859-1')\n",
+    "df3 = pd.read_csv('Book reviews\\BX-Book-Ratings.csv', sep=';', encoding='ISO-8859-1', nrows=20_000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a422a310",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2180"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "user_ids = df3['User-ID'].tolist()\n",
+    "user_id = []\n",
+    "for i in user_ids:\n",
+    "    if i in user_id:\n",
+    "        continue\n",
+    "    else:\n",
+    "        user_id.append(i)\n",
+    "len(user_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "fea227ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = df3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "663d5ba4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0     12660\n",
+       "8      1694\n",
+       "7      1526\n",
+       "10     1272\n",
+       "9      1105\n",
+       "5       728\n",
+       "6       663\n",
+       "4       170\n",
+       "3       108\n",
+       "2        45\n",
+       "1        29\n",
+       "Name: Book-Rating, dtype: int64"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df3['Book-Rating'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "c85ef134",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n=len(df3)\n",
+    "N=list(range(n))\n",
+    "random.seed(2023)\n",
+    "random.shuffle(N)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "beb6246d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>User-ID</th>\n",
+       "      <th>ISBN</th>\n",
+       "      <th>Book-Rating</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>15849</th>\n",
+       "      <td>2442</td>\n",
+       "      <td>8845252906</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11349</th>\n",
+       "      <td>712</td>\n",
+       "      <td>3784419445</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1732</th>\n",
+       "      <td>277427</td>\n",
+       "      <td>0553579274</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18333</th>\n",
+       "      <td>3363</td>\n",
+       "      <td>0553213164</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11806</th>\n",
+       "      <td>882</td>\n",
+       "      <td>0553801945</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       User-ID        ISBN  Book-Rating\n",
+       "15849     2442  8845252906            0\n",
+       "11349      712  3784419445            8\n",
+       "1732    277427  0553579274            0\n",
+       "18333     3363  0553213164           10\n",
+       "11806      882  0553801945            0"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train=data.iloc[N[0:(n*4)//5]]\n",
+    "test=data.iloc[N[(n*4)//5:]]\n",
+    "train.tail()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "f27ca18d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0, 9, 10, 2, 7, 5, 8, 6, 1, 4, 3]\n",
+      "1912\n",
+      "14033\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(train['Book-Rating'].unique().tolist())\n",
+    "print(len(train['User-ID'].unique().tolist()))\n",
+    "print(len(train['ISBN'].unique().tolist()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "94ebe1ac",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'surprise.trainset.Trainset'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "reader = Reader(rating_scale=(1,10)) # rating scale range\n",
+    "trainset = Dataset.load_from_df(train[['User-ID','ISBN','Book-Rating']],reader).build_full_trainset()\n",
+    "print(type(trainset))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "25d0a6ff",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Computing the msd similarity matrix...\n",
+      "Done computing similarity matrix.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<surprise.prediction_algorithms.knns.KNNBasic at 0x11a39f0a3d0>"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Use the KNNBasic algorithm to train the model\n",
+    "algo = KNNBasic()\n",
+    "#algo = SVD()\n",
+    "algo.fit(trainset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "a4155aff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "testset = Dataset.load_from_df(test[['User-ID','ISBN','Book-Rating']],reader).build_full_trainset().build_anti_testset()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "376eb001",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "items = trainset.build_anti_testset()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "dc366b18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "books = df2\n",
+    "mapping_dict = books.set_index(\"ISBN\")[\"Book-Title\"].to_dict()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "2fe30c88",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "users=test['User-ID'].tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "afc45dd3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1928"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "random.seed()\n",
+    "rd = random.randint(0,len(users))\n",
+    "users[rd]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "1fff04c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user = users[rd]\n",
+    "user_items = list(filter(lambda x: x[0] == user, items))\n",
+    "recommendations = algo.test(user_items)\n",
+    "recommendations.sort(key=operator.itemgetter(3), reverse=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "de2718ce",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "User 1928 Recommendation Top 5:\n",
+      " [Item] Four Blind Mice, [Estimated Rating] 10\n",
+      " [Item] KJV Giant Print Reference Bible, Personal Size Bronze Edition, [Estimated Rating] 10\n",
+      " [Item] So You Want to Be a Wizard: The First Book in the Young Wizards Series, [Estimated Rating] 10\n",
+      " [Item] The Princess Diaries, [Estimated Rating] 10\n",
+      " [Item] Memoirs of a Geisha, [Estimated Rating] 10\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"User {user} Recommendation Top 5:\")\n",
+    "for r in recommendations[0:5]:\n",
+    "    try:  \n",
+    "        print(f\" [Item] {mapping_dict[r[1]]}, [Estimated Rating] {round(r[3],3)}\")\n",
+    "    except:\n",
+    "        continue"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf7c2fcb",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}