ayoni02 commited on
Commit
6a8f68c
Β·
1 Parent(s): 5b4c17d

added needed files

Browse files
Book reviews/BX-Book-Ratings.csv ADDED
The diff for this file is too large to render. See raw diff
 
Book reviews/BX-Users.csv ADDED
The diff for this file is too large to render. See raw diff
 
Book reviews/BX_Books.csv ADDED
Binary file (77.4 MB). View file
 
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  title: Books Recommended System
3
  emoji: πŸ“ˆ
4
- colorFrom: pink
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.29.0
 
1
  ---
2
  title: Books Recommended System
3
  emoji: πŸ“ˆ
4
+ colorFrom: red
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.29.0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ import operator
4
+ import pandas as pd
5
+ from surprise import Dataset, Reader
6
+ from surprise import KNNBasic
7
+
8
+ def opendata(a, nrows):
9
+ df = pd.read_csv(a, nrows=nrows, sep=';', encoding='ISO-8859-1')
10
+ return df
11
+
12
+ def split(df):
13
+ n=len(df)
14
+ N=list(range(n))
15
+ random.seed(2023)
16
+ random.shuffle(N)
17
+ train=df.iloc[N[0:(n*4)//5]]
18
+ test=df.iloc[N[(n*4)//5:]]
19
+ return train, test
20
+
21
+ def red(df):
22
+ reader = Reader(rating_scale=(1,10)) # rating scale range
23
+ trainset = Dataset.load_from_df(df[['User-ID','ISBN','Book-Rating']],reader).build_full_trainset()
24
+ items = trainset.build_anti_testset()
25
+ return trainset, items
26
+
27
+
28
+ def mod(df, user, items):
29
+ algo = KNNBasic()
30
+ algo.fit(df)
31
+ user_items = list(filter(lambda x: x[0] == user, items))
32
+ recommendations = algo.test(user_items)
33
+ recommendations.sort(key=operator.itemgetter(3), reverse=True)
34
+ return recommendations
35
+
36
+ def gl(num):
37
+ data = opendata('Book reviews\BX-Book-Ratings.csv', nrows=20_000)
38
+ books = opendata('Book reviews\BX_Books.csv', nrows=None)
39
+ mapping_dict = books.set_index("ISBN")["Book-Title"].to_dict()
40
+ train, test = split(data)
41
+ users=test['User-ID'].tolist()
42
+ trainset, items = red(train)
43
+ user = users[int(num)]
44
+ recommendations = mod(trainset, user, items)
45
+ op = []
46
+ for r in recommendations[0:5]:
47
+ try:
48
+ op.append(f"{mapping_dict[r[1]]} with Estimated Rating {round(r[3],3)}")
49
+ except:
50
+ continue
51
+ return ('\n\n'.join(map(str, op)))
52
+
53
+ text = gr.components.Number(label="pick a number between 1 and 1000")
54
+ label = gr.components.Text(label="Picked User Top 5 Recommendations:")
55
+ example = [2, 3]
56
+
57
+ intf = gr.Interface(fn=gl, inputs=text, outputs=label, examples=example)
58
+ intf.launch(inline=False)
it works.ipynb ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "30252107",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import os\n",
11
+ "import random\n",
12
+ "import operator\n",
13
+ "import requests\n",
14
+ "import numpy as np\n",
15
+ "import pandas as pd\n",
16
+ "from scipy import sparse\n",
17
+ "import sys\n",
18
+ "from surprise import Dataset, Reader\n",
19
+ "from surprise import KNNBasic, SVD\n",
20
+ "from surprise.model_selection import train_test_split\n",
21
+ "from surprise import accuracy\n",
22
+ "from surprise.dataset import DatasetAutoFolds"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 2,
28
+ "id": "c40008b6",
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "df1 = pd.read_csv('Book reviews\\BX-Users.csv', sep=';', encoding='ISO-8859-1')\n",
33
+ "df2 = pd.read_csv('Book reviews\\BX_Books.csv', sep=';', encoding='ISO-8859-1')\n",
34
+ "df3 = pd.read_csv('Book reviews\\BX-Book-Ratings.csv', sep=';', encoding='ISO-8859-1', nrows=20_000)"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 3,
40
+ "id": "a422a310",
41
+ "metadata": {},
42
+ "outputs": [
43
+ {
44
+ "data": {
45
+ "text/plain": [
46
+ "2180"
47
+ ]
48
+ },
49
+ "execution_count": 3,
50
+ "metadata": {},
51
+ "output_type": "execute_result"
52
+ }
53
+ ],
54
+ "source": [
55
+ "user_ids = df3['User-ID'].tolist()\n",
56
+ "user_id = []\n",
57
+ "for i in user_ids:\n",
58
+ " if i in user_id:\n",
59
+ " continue\n",
60
+ " else:\n",
61
+ " user_id.append(i)\n",
62
+ "len(user_id)"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 4,
68
+ "id": "fea227ef",
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "data = df3"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 5,
78
+ "id": "663d5ba4",
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "data": {
83
+ "text/plain": [
84
+ "0 12660\n",
85
+ "8 1694\n",
86
+ "7 1526\n",
87
+ "10 1272\n",
88
+ "9 1105\n",
89
+ "5 728\n",
90
+ "6 663\n",
91
+ "4 170\n",
92
+ "3 108\n",
93
+ "2 45\n",
94
+ "1 29\n",
95
+ "Name: Book-Rating, dtype: int64"
96
+ ]
97
+ },
98
+ "execution_count": 5,
99
+ "metadata": {},
100
+ "output_type": "execute_result"
101
+ }
102
+ ],
103
+ "source": [
104
+ "df3['Book-Rating'].value_counts()"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 6,
110
+ "id": "c85ef134",
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "n=len(df3)\n",
115
+ "N=list(range(n))\n",
116
+ "random.seed(2023)\n",
117
+ "random.shuffle(N)"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": 7,
123
+ "id": "beb6246d",
124
+ "metadata": {},
125
+ "outputs": [
126
+ {
127
+ "data": {
128
+ "text/html": [
129
+ "<div>\n",
130
+ "<style scoped>\n",
131
+ " .dataframe tbody tr th:only-of-type {\n",
132
+ " vertical-align: middle;\n",
133
+ " }\n",
134
+ "\n",
135
+ " .dataframe tbody tr th {\n",
136
+ " vertical-align: top;\n",
137
+ " }\n",
138
+ "\n",
139
+ " .dataframe thead th {\n",
140
+ " text-align: right;\n",
141
+ " }\n",
142
+ "</style>\n",
143
+ "<table border=\"1\" class=\"dataframe\">\n",
144
+ " <thead>\n",
145
+ " <tr style=\"text-align: right;\">\n",
146
+ " <th></th>\n",
147
+ " <th>User-ID</th>\n",
148
+ " <th>ISBN</th>\n",
149
+ " <th>Book-Rating</th>\n",
150
+ " </tr>\n",
151
+ " </thead>\n",
152
+ " <tbody>\n",
153
+ " <tr>\n",
154
+ " <th>15849</th>\n",
155
+ " <td>2442</td>\n",
156
+ " <td>8845252906</td>\n",
157
+ " <td>0</td>\n",
158
+ " </tr>\n",
159
+ " <tr>\n",
160
+ " <th>11349</th>\n",
161
+ " <td>712</td>\n",
162
+ " <td>3784419445</td>\n",
163
+ " <td>8</td>\n",
164
+ " </tr>\n",
165
+ " <tr>\n",
166
+ " <th>1732</th>\n",
167
+ " <td>277427</td>\n",
168
+ " <td>0553579274</td>\n",
169
+ " <td>0</td>\n",
170
+ " </tr>\n",
171
+ " <tr>\n",
172
+ " <th>18333</th>\n",
173
+ " <td>3363</td>\n",
174
+ " <td>0553213164</td>\n",
175
+ " <td>10</td>\n",
176
+ " </tr>\n",
177
+ " <tr>\n",
178
+ " <th>11806</th>\n",
179
+ " <td>882</td>\n",
180
+ " <td>0553801945</td>\n",
181
+ " <td>0</td>\n",
182
+ " </tr>\n",
183
+ " </tbody>\n",
184
+ "</table>\n",
185
+ "</div>"
186
+ ],
187
+ "text/plain": [
188
+ " User-ID ISBN Book-Rating\n",
189
+ "15849 2442 8845252906 0\n",
190
+ "11349 712 3784419445 8\n",
191
+ "1732 277427 0553579274 0\n",
192
+ "18333 3363 0553213164 10\n",
193
+ "11806 882 0553801945 0"
194
+ ]
195
+ },
196
+ "execution_count": 7,
197
+ "metadata": {},
198
+ "output_type": "execute_result"
199
+ }
200
+ ],
201
+ "source": [
202
+ "train=data.iloc[N[0:(n*4)//5]]\n",
203
+ "test=data.iloc[N[(n*4)//5:]]\n",
204
+ "train.tail()"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "execution_count": 8,
210
+ "id": "f27ca18d",
211
+ "metadata": {},
212
+ "outputs": [
213
+ {
214
+ "name": "stdout",
215
+ "output_type": "stream",
216
+ "text": [
217
+ "[0, 9, 10, 2, 7, 5, 8, 6, 1, 4, 3]\n",
218
+ "1912\n",
219
+ "14033\n"
220
+ ]
221
+ }
222
+ ],
223
+ "source": [
224
+ "print(train['Book-Rating'].unique().tolist())\n",
225
+ "print(len(train['User-ID'].unique().tolist()))\n",
226
+ "print(len(train['ISBN'].unique().tolist()))"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": 9,
232
+ "id": "94ebe1ac",
233
+ "metadata": {},
234
+ "outputs": [
235
+ {
236
+ "name": "stdout",
237
+ "output_type": "stream",
238
+ "text": [
239
+ "<class 'surprise.trainset.Trainset'>\n"
240
+ ]
241
+ }
242
+ ],
243
+ "source": [
244
+ "reader = Reader(rating_scale=(1,10)) # rating scale range\n",
245
+ "trainset = Dataset.load_from_df(train[['User-ID','ISBN','Book-Rating']],reader).build_full_trainset()\n",
246
+ "print(type(trainset))"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": 10,
252
+ "id": "25d0a6ff",
253
+ "metadata": {},
254
+ "outputs": [
255
+ {
256
+ "name": "stdout",
257
+ "output_type": "stream",
258
+ "text": [
259
+ "Computing the msd similarity matrix...\n",
260
+ "Done computing similarity matrix.\n"
261
+ ]
262
+ },
263
+ {
264
+ "data": {
265
+ "text/plain": [
266
+ "<surprise.prediction_algorithms.knns.KNNBasic at 0x11a39f0a3d0>"
267
+ ]
268
+ },
269
+ "execution_count": 10,
270
+ "metadata": {},
271
+ "output_type": "execute_result"
272
+ }
273
+ ],
274
+ "source": [
275
+ "# Use the KNNBasic algorithm to train the model\n",
276
+ "algo = KNNBasic()\n",
277
+ "#algo = SVD()\n",
278
+ "algo.fit(trainset)"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": 11,
284
+ "id": "a4155aff",
285
+ "metadata": {},
286
+ "outputs": [],
287
+ "source": [
288
+ "testset = Dataset.load_from_df(test[['User-ID','ISBN','Book-Rating']],reader).build_full_trainset().build_anti_testset()"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "code",
293
+ "execution_count": 12,
294
+ "id": "376eb001",
295
+ "metadata": {},
296
+ "outputs": [],
297
+ "source": [
298
+ "items = trainset.build_anti_testset()"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": 13,
304
+ "id": "dc366b18",
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": [
308
+ "books = df2\n",
309
+ "mapping_dict = books.set_index(\"ISBN\")[\"Book-Title\"].to_dict()"
310
+ ]
311
+ },
312
+ {
313
+ "cell_type": "code",
314
+ "execution_count": 14,
315
+ "id": "2fe30c88",
316
+ "metadata": {},
317
+ "outputs": [],
318
+ "source": [
319
+ "users=test['User-ID'].tolist()"
320
+ ]
321
+ },
322
+ {
323
+ "cell_type": "code",
324
+ "execution_count": 29,
325
+ "id": "afc45dd3",
326
+ "metadata": {},
327
+ "outputs": [
328
+ {
329
+ "data": {
330
+ "text/plain": [
331
+ "1928"
332
+ ]
333
+ },
334
+ "execution_count": 29,
335
+ "metadata": {},
336
+ "output_type": "execute_result"
337
+ }
338
+ ],
339
+ "source": [
340
+ "random.seed()\n",
341
+ "rd = random.randint(0,len(users))\n",
342
+ "users[rd]"
343
+ ]
344
+ },
345
+ {
346
+ "cell_type": "code",
347
+ "execution_count": 30,
348
+ "id": "1fff04c1",
349
+ "metadata": {},
350
+ "outputs": [],
351
+ "source": [
352
+ "user = users[rd]\n",
353
+ "user_items = list(filter(lambda x: x[0] == user, items))\n",
354
+ "recommendations = algo.test(user_items)\n",
355
+ "recommendations.sort(key=operator.itemgetter(3), reverse=True)"
356
+ ]
357
+ },
358
+ {
359
+ "cell_type": "code",
360
+ "execution_count": 31,
361
+ "id": "de2718ce",
362
+ "metadata": {},
363
+ "outputs": [
364
+ {
365
+ "name": "stdout",
366
+ "output_type": "stream",
367
+ "text": [
368
+ "User 1928 Recommendation Top 5:\n",
369
+ " [Item] Four Blind Mice, [Estimated Rating] 10\n",
370
+ " [Item] KJV Giant Print Reference Bible, Personal Size Bronze Edition, [Estimated Rating] 10\n",
371
+ " [Item] So You Want to Be a Wizard: The First Book in the Young Wizards Series, [Estimated Rating] 10\n",
372
+ " [Item] The Princess Diaries, [Estimated Rating] 10\n",
373
+ " [Item] Memoirs of a Geisha, [Estimated Rating] 10\n"
374
+ ]
375
+ }
376
+ ],
377
+ "source": [
378
+ "print(f\"User {user} Recommendation Top 5:\")\n",
379
+ "for r in recommendations[0:5]:\n",
380
+ " try: \n",
381
+ " print(f\" [Item] {mapping_dict[r[1]]}, [Estimated Rating] {round(r[3],3)}\")\n",
382
+ " except:\n",
383
+ " continue"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": null,
389
+ "id": "bf7c2fcb",
390
+ "metadata": {},
391
+ "outputs": [],
392
+ "source": []
393
+ }
394
+ ],
395
+ "metadata": {
396
+ "kernelspec": {
397
+ "display_name": "Python 3 (ipykernel)",
398
+ "language": "python",
399
+ "name": "python3"
400
+ },
401
+ "language_info": {
402
+ "codemirror_mode": {
403
+ "name": "ipython",
404
+ "version": 3
405
+ },
406
+ "file_extension": ".py",
407
+ "mimetype": "text/x-python",
408
+ "name": "python",
409
+ "nbconvert_exporter": "python",
410
+ "pygments_lexer": "ipython3",
411
+ "version": "3.9.12"
412
+ }
413
+ },
414
+ "nbformat": 4,
415
+ "nbformat_minor": 5
416
+ }