sujoy0011 commited on
Commit
7c953c6
·
verified ·
1 Parent(s): 5c82ee4

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tmdb_5000_credits.csv filter=lfs diff=lfs merge=lfs -text
Movie Recommendation System.png ADDED
Movie_Recommendation_System.ipynb ADDED
@@ -0,0 +1,861 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import numpy as np\n",
11
+ "import ast\n"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 2,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "credits = pd.read_csv('tmdb_5000_credits.csv')\n",
21
+ "movies = pd.read_csv('tmdb_5000_movies.csv')"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 3,
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "data": {
31
+ "text/html": [
32
+ "<div>\n",
33
+ "<style scoped>\n",
34
+ " .dataframe tbody tr th:only-of-type {\n",
35
+ " vertical-align: middle;\n",
36
+ " }\n",
37
+ "\n",
38
+ " .dataframe tbody tr th {\n",
39
+ " vertical-align: top;\n",
40
+ " }\n",
41
+ "\n",
42
+ " .dataframe thead th {\n",
43
+ " text-align: right;\n",
44
+ " }\n",
45
+ "</style>\n",
46
+ "<table border=\"1\" class=\"dataframe\">\n",
47
+ " <thead>\n",
48
+ " <tr style=\"text-align: right;\">\n",
49
+ " <th></th>\n",
50
+ " <th>movie_id</th>\n",
51
+ " <th>title</th>\n",
52
+ " <th>cast</th>\n",
53
+ " <th>crew</th>\n",
54
+ " </tr>\n",
55
+ " </thead>\n",
56
+ " <tbody>\n",
57
+ " <tr>\n",
58
+ " <th>0</th>\n",
59
+ " <td>19995</td>\n",
60
+ " <td>Avatar</td>\n",
61
+ " <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
62
+ " <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
63
+ " </tr>\n",
64
+ " <tr>\n",
65
+ " <th>1</th>\n",
66
+ " <td>285</td>\n",
67
+ " <td>Pirates of the Caribbean: At World's End</td>\n",
68
+ " <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
69
+ " <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
70
+ " </tr>\n",
71
+ " <tr>\n",
72
+ " <th>2</th>\n",
73
+ " <td>206647</td>\n",
74
+ " <td>Spectre</td>\n",
75
+ " <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
76
+ " <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
77
+ " </tr>\n",
78
+ " <tr>\n",
79
+ " <th>3</th>\n",
80
+ " <td>49026</td>\n",
81
+ " <td>The Dark Knight Rises</td>\n",
82
+ " <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
83
+ " <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
84
+ " </tr>\n",
85
+ " <tr>\n",
86
+ " <th>4</th>\n",
87
+ " <td>49529</td>\n",
88
+ " <td>John Carter</td>\n",
89
+ " <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
90
+ " <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
91
+ " </tr>\n",
92
+ " </tbody>\n",
93
+ "</table>\n",
94
+ "</div>"
95
+ ],
96
+ "text/plain": [
97
+ " movie_id title \\\n",
98
+ "0 19995 Avatar \n",
99
+ "1 285 Pirates of the Caribbean: At World's End \n",
100
+ "2 206647 Spectre \n",
101
+ "3 49026 The Dark Knight Rises \n",
102
+ "4 49529 John Carter \n",
103
+ "\n",
104
+ " cast \\\n",
105
+ "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n",
106
+ "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n",
107
+ "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n",
108
+ "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n",
109
+ "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n",
110
+ "\n",
111
+ " crew \n",
112
+ "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n",
113
+ "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n",
114
+ "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n",
115
+ "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n",
116
+ "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... "
117
+ ]
118
+ },
119
+ "execution_count": 3,
120
+ "metadata": {},
121
+ "output_type": "execute_result"
122
+ }
123
+ ],
124
+ "source": [
125
+ "credits.head()"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": 4,
131
+ "metadata": {},
132
+ "outputs": [
133
+ {
134
+ "data": {
135
+ "text/html": [
136
+ "<div>\n",
137
+ "<style scoped>\n",
138
+ " .dataframe tbody tr th:only-of-type {\n",
139
+ " vertical-align: middle;\n",
140
+ " }\n",
141
+ "\n",
142
+ " .dataframe tbody tr th {\n",
143
+ " vertical-align: top;\n",
144
+ " }\n",
145
+ "\n",
146
+ " .dataframe thead th {\n",
147
+ " text-align: right;\n",
148
+ " }\n",
149
+ "</style>\n",
150
+ "<table border=\"1\" class=\"dataframe\">\n",
151
+ " <thead>\n",
152
+ " <tr style=\"text-align: right;\">\n",
153
+ " <th></th>\n",
154
+ " <th>budget</th>\n",
155
+ " <th>genres</th>\n",
156
+ " <th>homepage</th>\n",
157
+ " <th>id</th>\n",
158
+ " <th>keywords</th>\n",
159
+ " <th>original_language</th>\n",
160
+ " <th>original_title</th>\n",
161
+ " <th>overview</th>\n",
162
+ " <th>popularity</th>\n",
163
+ " <th>production_companies</th>\n",
164
+ " <th>production_countries</th>\n",
165
+ " <th>release_date</th>\n",
166
+ " <th>revenue</th>\n",
167
+ " <th>runtime</th>\n",
168
+ " <th>spoken_languages</th>\n",
169
+ " <th>status</th>\n",
170
+ " <th>tagline</th>\n",
171
+ " <th>title</th>\n",
172
+ " <th>vote_average</th>\n",
173
+ " <th>vote_count</th>\n",
174
+ " </tr>\n",
175
+ " </thead>\n",
176
+ " <tbody>\n",
177
+ " <tr>\n",
178
+ " <th>0</th>\n",
179
+ " <td>237000000</td>\n",
180
+ " <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
181
+ " <td>http://www.avatarmovie.com/</td>\n",
182
+ " <td>19995</td>\n",
183
+ " <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
184
+ " <td>en</td>\n",
185
+ " <td>Avatar</td>\n",
186
+ " <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
187
+ " <td>150.437577</td>\n",
188
+ " <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n",
189
+ " <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
190
+ " <td>2009-12-10</td>\n",
191
+ " <td>2787965087</td>\n",
192
+ " <td>162.0</td>\n",
193
+ " <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
194
+ " <td>Released</td>\n",
195
+ " <td>Enter the World of Pandora.</td>\n",
196
+ " <td>Avatar</td>\n",
197
+ " <td>7.2</td>\n",
198
+ " <td>11800</td>\n",
199
+ " </tr>\n",
200
+ " </tbody>\n",
201
+ "</table>\n",
202
+ "</div>"
203
+ ],
204
+ "text/plain": [
205
+ " budget genres \\\n",
206
+ "0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n",
207
+ "\n",
208
+ " homepage id \\\n",
209
+ "0 http://www.avatarmovie.com/ 19995 \n",
210
+ "\n",
211
+ " keywords original_language \\\n",
212
+ "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n",
213
+ "\n",
214
+ " original_title overview \\\n",
215
+ "0 Avatar In the 22nd century, a paraplegic Marine is di... \n",
216
+ "\n",
217
+ " popularity production_companies \\\n",
218
+ "0 150.437577 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... \n",
219
+ "\n",
220
+ " production_countries release_date revenue \\\n",
221
+ "0 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2009-12-10 2787965087 \n",
222
+ "\n",
223
+ " runtime spoken_languages status \\\n",
224
+ "0 162.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n",
225
+ "\n",
226
+ " tagline title vote_average vote_count \n",
227
+ "0 Enter the World of Pandora. Avatar 7.2 11800 "
228
+ ]
229
+ },
230
+ "execution_count": 4,
231
+ "metadata": {},
232
+ "output_type": "execute_result"
233
+ }
234
+ ],
235
+ "source": [
236
+ "movies.head(1)"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": 5,
242
+ "metadata": {},
243
+ "outputs": [],
244
+ "source": [
245
+ "movies = movies.merge(credits, left_on='title', right_on='title')"
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "code",
250
+ "execution_count": 6,
251
+ "metadata": {},
252
+ "outputs": [
253
+ {
254
+ "data": {
255
+ "text/html": [
256
+ "<div>\n",
257
+ "<style scoped>\n",
258
+ " .dataframe tbody tr th:only-of-type {\n",
259
+ " vertical-align: middle;\n",
260
+ " }\n",
261
+ "\n",
262
+ " .dataframe tbody tr th {\n",
263
+ " vertical-align: top;\n",
264
+ " }\n",
265
+ "\n",
266
+ " .dataframe thead th {\n",
267
+ " text-align: right;\n",
268
+ " }\n",
269
+ "</style>\n",
270
+ "<table border=\"1\" class=\"dataframe\">\n",
271
+ " <thead>\n",
272
+ " <tr style=\"text-align: right;\">\n",
273
+ " <th></th>\n",
274
+ " <th>budget</th>\n",
275
+ " <th>genres</th>\n",
276
+ " <th>homepage</th>\n",
277
+ " <th>id</th>\n",
278
+ " <th>keywords</th>\n",
279
+ " <th>original_language</th>\n",
280
+ " <th>original_title</th>\n",
281
+ " <th>overview</th>\n",
282
+ " <th>popularity</th>\n",
283
+ " <th>production_companies</th>\n",
284
+ " <th>...</th>\n",
285
+ " <th>runtime</th>\n",
286
+ " <th>spoken_languages</th>\n",
287
+ " <th>status</th>\n",
288
+ " <th>tagline</th>\n",
289
+ " <th>title</th>\n",
290
+ " <th>vote_average</th>\n",
291
+ " <th>vote_count</th>\n",
292
+ " <th>movie_id</th>\n",
293
+ " <th>cast</th>\n",
294
+ " <th>crew</th>\n",
295
+ " </tr>\n",
296
+ " </thead>\n",
297
+ " <tbody>\n",
298
+ " <tr>\n",
299
+ " <th>0</th>\n",
300
+ " <td>237000000</td>\n",
301
+ " <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
302
+ " <td>http://www.avatarmovie.com/</td>\n",
303
+ " <td>19995</td>\n",
304
+ " <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
305
+ " <td>en</td>\n",
306
+ " <td>Avatar</td>\n",
307
+ " <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
308
+ " <td>150.437577</td>\n",
309
+ " <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n",
310
+ " <td>...</td>\n",
311
+ " <td>162.0</td>\n",
312
+ " <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
313
+ " <td>Released</td>\n",
314
+ " <td>Enter the World of Pandora.</td>\n",
315
+ " <td>Avatar</td>\n",
316
+ " <td>7.2</td>\n",
317
+ " <td>11800</td>\n",
318
+ " <td>19995</td>\n",
319
+ " <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
320
+ " <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
321
+ " </tr>\n",
322
+ " </tbody>\n",
323
+ "</table>\n",
324
+ "<p>1 rows × 23 columns</p>\n",
325
+ "</div>"
326
+ ],
327
+ "text/plain": [
328
+ " budget genres \\\n",
329
+ "0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n",
330
+ "\n",
331
+ " homepage id \\\n",
332
+ "0 http://www.avatarmovie.com/ 19995 \n",
333
+ "\n",
334
+ " keywords original_language \\\n",
335
+ "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n",
336
+ "\n",
337
+ " original_title overview \\\n",
338
+ "0 Avatar In the 22nd century, a paraplegic Marine is di... \n",
339
+ "\n",
340
+ " popularity production_companies ... runtime \\\n",
341
+ "0 150.437577 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... ... 162.0 \n",
342
+ "\n",
343
+ " spoken_languages status \\\n",
344
+ "0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n",
345
+ "\n",
346
+ " tagline title vote_average vote_count movie_id \\\n",
347
+ "0 Enter the World of Pandora. Avatar 7.2 11800 19995 \n",
348
+ "\n",
349
+ " cast \\\n",
350
+ "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n",
351
+ "\n",
352
+ " crew \n",
353
+ "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n",
354
+ "\n",
355
+ "[1 rows x 23 columns]"
356
+ ]
357
+ },
358
+ "execution_count": 6,
359
+ "metadata": {},
360
+ "output_type": "execute_result"
361
+ }
362
+ ],
363
+ "source": [
364
+ "movies.head(1)"
365
+ ]
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "execution_count": 7,
370
+ "metadata": {},
371
+ "outputs": [],
372
+ "source": [
373
+ "movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]\n"
374
+ ]
375
+ },
376
+ {
377
+ "cell_type": "code",
378
+ "execution_count": 8,
379
+ "metadata": {},
380
+ "outputs": [
381
+ {
382
+ "data": {
383
+ "text/html": [
384
+ "<div>\n",
385
+ "<style scoped>\n",
386
+ " .dataframe tbody tr th:only-of-type {\n",
387
+ " vertical-align: middle;\n",
388
+ " }\n",
389
+ "\n",
390
+ " .dataframe tbody tr th {\n",
391
+ " vertical-align: top;\n",
392
+ " }\n",
393
+ "\n",
394
+ " .dataframe thead th {\n",
395
+ " text-align: right;\n",
396
+ " }\n",
397
+ "</style>\n",
398
+ "<table border=\"1\" class=\"dataframe\">\n",
399
+ " <thead>\n",
400
+ " <tr style=\"text-align: right;\">\n",
401
+ " <th></th>\n",
402
+ " <th>movie_id</th>\n",
403
+ " <th>title</th>\n",
404
+ " <th>overview</th>\n",
405
+ " <th>genres</th>\n",
406
+ " <th>keywords</th>\n",
407
+ " <th>cast</th>\n",
408
+ " <th>crew</th>\n",
409
+ " </tr>\n",
410
+ " </thead>\n",
411
+ " <tbody>\n",
412
+ " <tr>\n",
413
+ " <th>0</th>\n",
414
+ " <td>19995</td>\n",
415
+ " <td>Avatar</td>\n",
416
+ " <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
417
+ " <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
418
+ " <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
419
+ " <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
420
+ " <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
421
+ " </tr>\n",
422
+ " </tbody>\n",
423
+ "</table>\n",
424
+ "</div>"
425
+ ],
426
+ "text/plain": [
427
+ " movie_id title overview \\\n",
428
+ "0 19995 Avatar In the 22nd century, a paraplegic Marine is di... \n",
429
+ "\n",
430
+ " genres \\\n",
431
+ "0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n",
432
+ "\n",
433
+ " keywords \\\n",
434
+ "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n",
435
+ "\n",
436
+ " cast \\\n",
437
+ "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n",
438
+ "\n",
439
+ " crew \n",
440
+ "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... "
441
+ ]
442
+ },
443
+ "execution_count": 8,
444
+ "metadata": {},
445
+ "output_type": "execute_result"
446
+ }
447
+ ],
448
+ "source": [
449
+ "movies.head(1)"
450
+ ]
451
+ },
452
+ {
453
+ "cell_type": "code",
454
+ "execution_count": 9,
455
+ "metadata": {},
456
+ "outputs": [],
457
+ "source": [
458
+ "def convert(obj):\n",
459
+ " L = []\n",
460
+ " for i in ast.literal_eval(obj):\n",
461
+ " L.append(i['name'])\n",
462
+ " return L"
463
+ ]
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "execution_count": 10,
468
+ "metadata": {},
469
+ "outputs": [],
470
+ "source": [
471
+ "movies['genres'] = movies['genres'].apply(convert)"
472
+ ]
473
+ },
474
+ {
475
+ "cell_type": "code",
476
+ "execution_count": 11,
477
+ "metadata": {},
478
+ "outputs": [
479
+ {
480
+ "data": {
481
+ "text/plain": [
482
+ "0 [Action, Adventure, Fantasy, Science Fiction]\n",
483
+ "1 [Adventure, Fantasy, Action]\n",
484
+ "2 [Action, Adventure, Crime]\n",
485
+ "3 [Action, Crime, Drama, Thriller]\n",
486
+ "4 [Action, Adventure, Science Fiction]\n",
487
+ " ... \n",
488
+ "4804 [Action, Crime, Thriller]\n",
489
+ "4805 [Comedy, Romance]\n",
490
+ "4806 [Comedy, Drama, Romance, TV Movie]\n",
491
+ "4807 []\n",
492
+ "4808 [Documentary]\n",
493
+ "Name: genres, Length: 4809, dtype: object"
494
+ ]
495
+ },
496
+ "execution_count": 11,
497
+ "metadata": {},
498
+ "output_type": "execute_result"
499
+ }
500
+ ],
501
+ "source": [
502
+ "movies['genres']"
503
+ ]
504
+ },
505
+ {
506
+ "cell_type": "code",
507
+ "execution_count": 12,
508
+ "metadata": {},
509
+ "outputs": [],
510
+ "source": [
511
+ "movies['keywords'] = movies['keywords'].apply(convert)"
512
+ ]
513
+ },
514
+ {
515
+ "cell_type": "code",
516
+ "execution_count": 13,
517
+ "metadata": {},
518
+ "outputs": [
519
+ {
520
+ "data": {
521
+ "text/plain": [
522
+ "0 [culture clash, future, space war, space colon...\n",
523
+ "1 [ocean, drug abuse, exotic island, east india ...\n",
524
+ "2 [spy, based on novel, secret agent, sequel, mi...\n",
525
+ "3 [dc comics, crime fighter, terrorist, secret i...\n",
526
+ "4 [based on novel, mars, medallion, space travel...\n",
527
+ " ... \n",
528
+ "4804 [united states–mexico barrier, legs, arms, pap...\n",
529
+ "4805 []\n",
530
+ "4806 [date, love at first sight, narration, investi...\n",
531
+ "4807 []\n",
532
+ "4808 [obsession, camcorder, crush, dream girl]\n",
533
+ "Name: keywords, Length: 4809, dtype: object"
534
+ ]
535
+ },
536
+ "execution_count": 13,
537
+ "metadata": {},
538
+ "output_type": "execute_result"
539
+ }
540
+ ],
541
+ "source": [
542
+ "movies['keywords']"
543
+ ]
544
+ },
545
+ {
546
+ "cell_type": "code",
547
+ "execution_count": 14,
548
+ "metadata": {},
549
+ "outputs": [],
550
+ "source": [
551
+ "movies['cast'] = movies['cast'].apply(lambda x: [i['name'] for i in ast.literal_eval(x)[:3]]) # Only top 3 actors"
552
+ ]
553
+ },
554
+ {
555
+ "cell_type": "code",
556
+ "execution_count": 15,
557
+ "metadata": {},
558
+ "outputs": [],
559
+ "source": [
560
+ "movies['crew'] = movies['crew'].apply(lambda x: [i['name'] for i in ast.literal_eval(x) if i['job'] == 'Director'])"
561
+ ]
562
+ },
563
+ {
564
+ "cell_type": "code",
565
+ "execution_count": 16,
566
+ "metadata": {},
567
+ "outputs": [],
568
+ "source": [
569
+ "movies['tags'] = movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']\n"
570
+ ]
571
+ },
572
+ {
573
+ "cell_type": "code",
574
+ "execution_count": 17,
575
+ "metadata": {},
576
+ "outputs": [
577
+ {
578
+ "data": {
579
+ "text/plain": [
580
+ "0 [Action, Adventure, Fantasy, Science Fiction, ...\n",
581
+ "1 [Adventure, Fantasy, Action, ocean, drug abuse...\n",
582
+ "2 [Action, Adventure, Crime, spy, based on novel...\n",
583
+ "3 [Action, Crime, Drama, Thriller, dc comics, cr...\n",
584
+ "4 [Action, Adventure, Science Fiction, based on ...\n",
585
+ " ... \n",
586
+ "4804 [Action, Crime, Thriller, united states–mexico...\n",
587
+ "4805 [Comedy, Romance, Edward Burns, Kerry Bishé, M...\n",
588
+ "4806 [Comedy, Drama, Romance, TV Movie, date, love ...\n",
589
+ "4807 [Daniel Henney, Eliza Coupe, Bill Paxton, Dani...\n",
590
+ "4808 [Documentary, obsession, camcorder, crush, dre...\n",
591
+ "Name: tags, Length: 4809, dtype: object"
592
+ ]
593
+ },
594
+ "execution_count": 17,
595
+ "metadata": {},
596
+ "output_type": "execute_result"
597
+ }
598
+ ],
599
+ "source": [
600
+ "movies['tags']"
601
+ ]
602
+ },
603
+ {
604
+ "cell_type": "code",
605
+ "execution_count": 18,
606
+ "metadata": {},
607
+ "outputs": [],
608
+ "source": [
609
+ "movies['tags'] = movies['tags'].apply(lambda x: \" \".join(x))\n"
610
+ ]
611
+ },
612
+ {
613
+ "cell_type": "code",
614
+ "execution_count": 19,
615
+ "metadata": {},
616
+ "outputs": [
617
+ {
618
+ "data": {
619
+ "text/plain": [
620
+ "0 Action Adventure Fantasy Science Fiction cultu...\n",
621
+ "1 Adventure Fantasy Action ocean drug abuse exot...\n",
622
+ "2 Action Adventure Crime spy based on novel secr...\n",
623
+ "3 Action Crime Drama Thriller dc comics crime fi...\n",
624
+ "4 Action Adventure Science Fiction based on nove...\n",
625
+ " ... \n",
626
+ "4804 Action Crime Thriller united states–mexico bar...\n",
627
+ "4805 Comedy Romance Edward Burns Kerry Bishé Marsha...\n",
628
+ "4806 Comedy Drama Romance TV Movie date love at fir...\n",
629
+ "4807 Daniel Henney Eliza Coupe Bill Paxton Daniel Hsia\n",
630
+ "4808 Documentary obsession camcorder crush dream gi...\n",
631
+ "Name: tags, Length: 4809, dtype: object"
632
+ ]
633
+ },
634
+ "execution_count": 19,
635
+ "metadata": {},
636
+ "output_type": "execute_result"
637
+ }
638
+ ],
639
+ "source": [
640
+ "movies['tags']"
641
+ ]
642
+ },
643
+ {
644
+ "cell_type": "code",
645
+ "execution_count": 20,
646
+ "metadata": {},
647
+ "outputs": [],
648
+ "source": [
649
+ "movies = movies[['movie_id', 'title', 'overview', 'tags']]"
650
+ ]
651
+ },
652
+ {
653
+ "cell_type": "code",
654
+ "execution_count": 21,
655
+ "metadata": {},
656
+ "outputs": [],
657
+ "source": [
658
+ "movies['tags'] = movies['tags'].apply(lambda x: x.lower())"
659
+ ]
660
+ },
661
+ {
662
+ "cell_type": "code",
663
+ "execution_count": 22,
664
+ "metadata": {},
665
+ "outputs": [
666
+ {
667
+ "data": {
668
+ "text/html": [
669
+ "<div>\n",
670
+ "<style scoped>\n",
671
+ " .dataframe tbody tr th:only-of-type {\n",
672
+ " vertical-align: middle;\n",
673
+ " }\n",
674
+ "\n",
675
+ " .dataframe tbody tr th {\n",
676
+ " vertical-align: top;\n",
677
+ " }\n",
678
+ "\n",
679
+ " .dataframe thead th {\n",
680
+ " text-align: right;\n",
681
+ " }\n",
682
+ "</style>\n",
683
+ "<table border=\"1\" class=\"dataframe\">\n",
684
+ " <thead>\n",
685
+ " <tr style=\"text-align: right;\">\n",
686
+ " <th></th>\n",
687
+ " <th>movie_id</th>\n",
688
+ " <th>title</th>\n",
689
+ " <th>overview</th>\n",
690
+ " <th>tags</th>\n",
691
+ " </tr>\n",
692
+ " </thead>\n",
693
+ " <tbody>\n",
694
+ " <tr>\n",
695
+ " <th>0</th>\n",
696
+ " <td>19995</td>\n",
697
+ " <td>Avatar</td>\n",
698
+ " <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
699
+ " <td>action adventure fantasy science fiction cultu...</td>\n",
700
+ " </tr>\n",
701
+ " <tr>\n",
702
+ " <th>1</th>\n",
703
+ " <td>285</td>\n",
704
+ " <td>Pirates of the Caribbean: At World's End</td>\n",
705
+ " <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
706
+ " <td>adventure fantasy action ocean drug abuse exot...</td>\n",
707
+ " </tr>\n",
708
+ " <tr>\n",
709
+ " <th>2</th>\n",
710
+ " <td>206647</td>\n",
711
+ " <td>Spectre</td>\n",
712
+ " <td>A cryptic message from Bond’s past sends him o...</td>\n",
713
+ " <td>action adventure crime spy based on novel secr...</td>\n",
714
+ " </tr>\n",
715
+ " <tr>\n",
716
+ " <th>3</th>\n",
717
+ " <td>49026</td>\n",
718
+ " <td>The Dark Knight Rises</td>\n",
719
+ " <td>Following the death of District Attorney Harve...</td>\n",
720
+ " <td>action crime drama thriller dc comics crime fi...</td>\n",
721
+ " </tr>\n",
722
+ " <tr>\n",
723
+ " <th>4</th>\n",
724
+ " <td>49529</td>\n",
725
+ " <td>John Carter</td>\n",
726
+ " <td>John Carter is a war-weary, former military ca...</td>\n",
727
+ " <td>action adventure science fiction based on nove...</td>\n",
728
+ " </tr>\n",
729
+ " </tbody>\n",
730
+ "</table>\n",
731
+ "</div>"
732
+ ],
733
+ "text/plain": [
734
+ " movie_id title \\\n",
735
+ "0 19995 Avatar \n",
736
+ "1 285 Pirates of the Caribbean: At World's End \n",
737
+ "2 206647 Spectre \n",
738
+ "3 49026 The Dark Knight Rises \n",
739
+ "4 49529 John Carter \n",
740
+ "\n",
741
+ " overview \\\n",
742
+ "0 In the 22nd century, a paraplegic Marine is di... \n",
743
+ "1 Captain Barbossa, long believed to be dead, ha... \n",
744
+ "2 A cryptic message from Bond’s past sends him o... \n",
745
+ "3 Following the death of District Attorney Harve... \n",
746
+ "4 John Carter is a war-weary, former military ca... \n",
747
+ "\n",
748
+ " tags \n",
749
+ "0 action adventure fantasy science fiction cultu... \n",
750
+ "1 adventure fantasy action ocean drug abuse exot... \n",
751
+ "2 action adventure crime spy based on novel secr... \n",
752
+ "3 action crime drama thriller dc comics crime fi... \n",
753
+ "4 action adventure science fiction based on nove... "
754
+ ]
755
+ },
756
+ "execution_count": 22,
757
+ "metadata": {},
758
+ "output_type": "execute_result"
759
+ }
760
+ ],
761
+ "source": [
762
+ "movies.head()"
763
+ ]
764
+ },
765
+ {
766
+ "cell_type": "code",
767
+ "execution_count": 23,
768
+ "metadata": {},
769
+ "outputs": [],
770
+ "source": [
771
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
772
+ "tfidf = TfidfVectorizer(stop_words='english')\n",
773
+ "tfidf_matrix = tfidf.fit_transform(movies['tags'])"
774
+ ]
775
+ },
776
+ {
777
+ "cell_type": "code",
778
+ "execution_count": 24,
779
+ "metadata": {},
780
+ "outputs": [],
781
+ "source": [
782
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
783
+ "cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)"
784
+ ]
785
+ },
786
+ {
787
+ "cell_type": "code",
788
+ "execution_count": 25,
789
+ "metadata": {},
790
+ "outputs": [],
791
+ "source": [
792
+ "def get_recommendations(title, cosine_sim=cosine_sim):\n",
793
+ " idx = movies[movies['title'] == title].index[0]\n",
794
+ " sim_scores = list(enumerate(cosine_sim[idx]))\n",
795
+ " sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)\n",
796
+ " sim_scores = sim_scores[1:11] # Get top 10 similar movies\n",
797
+ " movie_indices = [i[0] for i in sim_scores]\n",
798
+ " return movies['title'].iloc[movie_indices]"
799
+ ]
800
+ },
801
+ {
802
+ "cell_type": "code",
803
+ "execution_count": 26,
804
+ "metadata": {},
805
+ "outputs": [
806
+ {
807
+ "name": "stdout",
808
+ "output_type": "stream",
809
+ "text": [
810
+ "65 The Dark Knight\n",
811
+ "119 Batman Begins\n",
812
+ "1360 Batman\n",
813
+ "210 Batman & Robin\n",
814
+ "428 Batman Returns\n",
815
+ "1361 Batman\n",
816
+ "1197 The Prestige\n",
817
+ "303 Catwoman\n",
818
+ "4644 Amidst the Devil's Wings\n",
819
+ "72 Suicide Squad\n",
820
+ "Name: title, dtype: object\n"
821
+ ]
822
+ }
823
+ ],
824
+ "source": [
825
+ "print(get_recommendations('The Dark Knight Rises'))"
826
+ ]
827
+ },
828
+ {
829
+ "cell_type": "code",
830
+ "execution_count": 27,
831
+ "metadata": {},
832
+ "outputs": [],
833
+ "source": [
834
+ "import pickle\n",
835
+ "with open('movie_data.pkl', 'wb') as file:\n",
836
+ " pickle.dump((movies, cosine_sim), file)"
837
+ ]
838
+ }
839
+ ],
840
+ "metadata": {
841
+ "kernelspec": {
842
+ "display_name": "Python 3",
843
+ "language": "python",
844
+ "name": "python3"
845
+ },
846
+ "language_info": {
847
+ "codemirror_mode": {
848
+ "name": "ipython",
849
+ "version": 3
850
+ },
851
+ "file_extension": ".py",
852
+ "mimetype": "text/x-python",
853
+ "name": "python",
854
+ "nbconvert_exporter": "python",
855
+ "pygments_lexer": "ipython3",
856
+ "version": "3.12.4"
857
+ }
858
+ },
859
+ "nbformat": 4,
860
+ "nbformat_minor": 2
861
+ }
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import requests
4
+ import pickle
5
+
6
+ # Load the processed data and similarity matrix
7
+ with open('movie_data.pkl', 'rb') as file:
8
+ movies, cosine_sim = pickle.load(file)
9
+
10
+ # Function to get movie recommendations
11
+ def get_recommendations(title, cosine_sim=cosine_sim):
12
+ idx = movies[movies['title'] == title].index[0]
13
+ sim_scores = list(enumerate(cosine_sim[idx]))
14
+ sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
15
+ sim_scores = sim_scores[1:11] # Get top 10 similar movies
16
+ movie_indices = [i[0] for i in sim_scores]
17
+ return movies[['title', 'movie_id']].iloc[movie_indices]
18
+
19
+ # Fetch movie poster from TMDB API
20
+ def fetch_poster(movie_id):
21
+ api_key = '7b995d3c6fd91a2284b4ad8cb390c7b8' # Replace with your TMDB API key
22
+ url = f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}'
23
+ response = requests.get(url)
24
+ data = response.json()
25
+ poster_path = data['poster_path']
26
+ full_path = f"https://image.tmdb.org/t/p/w500{poster_path}"
27
+ return full_path
28
+
29
+ # Streamlit UI
30
+ st.title("Movie Recommendation System")
31
+
32
+ selected_movie = st.selectbox("Select a movie:", movies['title'].values)
33
+
34
+ if st.button('Recommend'):
35
+ recommendations = get_recommendations(selected_movie)
36
+ st.write("Top 10 recommended movies:")
37
+
38
+ # Create a 2x5 grid layout
39
+ for i in range(0, 10, 5): # Loop over rows (2 rows, 5 movies each)
40
+ cols = st.columns(5) # Create 5 columns for each row
41
+ for col, j in zip(cols, range(i, i+5)):
42
+ if j < len(recommendations):
43
+ movie_title = recommendations.iloc[j]['title']
44
+ movie_id = recommendations.iloc[j]['movie_id']
45
+ poster_url = fetch_poster(movie_id)
46
+ with col:
47
+ st.image(poster_url, width=130)
48
+ st.write(movie_title)
movie_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee875ac979bc56a80e843eb9cb92960426d17640940fe962474d50e0c632095a
3
+ size 187413682
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pandas==2.2.2
2
+ Requests==2.32.3
3
+ streamlit==1.35.0
tmdb_5000_credits.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d0050599ff88d40366c4841204b1489862bca346bfa46c20b05a65d14508435
3
+ size 40044293
tmdb_5000_movies.csv ADDED
The diff for this file is too large to render. See raw diff