{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import ast\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "credits = pd.read_csv('tmdb_5000_credits.csv')\n", "movies = pd.read_csv('tmdb_5000_movies.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idtitlecastcrew
019995Avatar[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
1285Pirates of the Caribbean: At World's End[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...
2206647Spectre[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...
349026The Dark Knight Rises[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...
449529John Carter[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...
\n", "
" ], "text/plain": [ " movie_id title \\\n", "0 19995 Avatar \n", "1 285 Pirates of the Caribbean: At World's End \n", "2 206647 Spectre \n", "3 49026 The Dark Knight Rises \n", "4 49529 John Carter \n", "\n", " cast \\\n", "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n", "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n", "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n", "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n", "\n", " crew \n", "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n", "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n", "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n", "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "credits.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
budgetgenreshomepageidkeywordsoriginal_languageoriginal_titleoverviewpopularityproduction_companiesproduction_countriesrelease_daterevenueruntimespoken_languagesstatustaglinetitlevote_averagevote_count
0237000000[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...http://www.avatarmovie.com/19995[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...enAvatarIn the 22nd century, a paraplegic Marine is di...150.437577[{\"name\": \"Ingenious Film Partners\", \"id\": 289...[{\"iso_3166_1\": \"US\", \"name\": \"United States o...2009-12-102787965087162.0[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...ReleasedEnter the World of Pandora.Avatar7.211800
\n", "
" ], "text/plain": [ " budget genres \\\n", "0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", "\n", " homepage id \\\n", "0 http://www.avatarmovie.com/ 19995 \n", "\n", " keywords original_language \\\n", "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n", "\n", " original_title overview \\\n", "0 Avatar In the 22nd century, a paraplegic Marine is di... \n", "\n", " popularity production_companies \\\n", "0 150.437577 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... \n", "\n", " production_countries release_date revenue \\\n", "0 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2009-12-10 2787965087 \n", "\n", " runtime spoken_languages status \\\n", "0 162.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n", "\n", " tagline title vote_average vote_count \n", "0 Enter the World of Pandora. Avatar 7.2 11800 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head(1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "movies = movies.merge(credits, left_on='title', right_on='title')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
budgetgenreshomepageidkeywordsoriginal_languageoriginal_titleoverviewpopularityproduction_companies...runtimespoken_languagesstatustaglinetitlevote_averagevote_countmovie_idcastcrew
0237000000[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...http://www.avatarmovie.com/19995[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...enAvatarIn the 22nd century, a paraplegic Marine is di...150.437577[{\"name\": \"Ingenious Film Partners\", \"id\": 289......162.0[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...ReleasedEnter the World of Pandora.Avatar7.21180019995[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
\n", "

1 rows × 23 columns

\n", "
" ], "text/plain": [ " budget genres \\\n", "0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", "\n", " homepage id \\\n", "0 http://www.avatarmovie.com/ 19995 \n", "\n", " keywords original_language \\\n", "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n", "\n", " original_title overview \\\n", "0 Avatar In the 22nd century, a paraplegic Marine is di... \n", "\n", " popularity production_companies ... runtime \\\n", "0 150.437577 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... ... 162.0 \n", "\n", " spoken_languages status \\\n", "0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n", "\n", " tagline title vote_average vote_count movie_id \\\n", "0 Enter the World of Pandora. Avatar 7.2 11800 19995 \n", "\n", " cast \\\n", "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", "\n", " crew \n", "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", "\n", "[1 rows x 23 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head(1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idtitleoverviewgenreskeywordscastcrew
019995AvatarIn the 22nd century, a paraplegic Marine is di...[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
\n", "
" ], "text/plain": [ " movie_id title overview \\\n", "0 19995 Avatar In the 22nd century, a paraplegic Marine is di... \n", "\n", " genres \\\n", "0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", "\n", " keywords \\\n", "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n", "\n", " cast \\\n", "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", "\n", " crew \n", "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head(1)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def convert(obj):\n", " L = []\n", " for i in ast.literal_eval(obj):\n", " L.append(i['name'])\n", " return L" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "movies['genres'] = movies['genres'].apply(convert)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [Action, Adventure, Fantasy, Science Fiction]\n", "1 [Adventure, Fantasy, Action]\n", "2 [Action, Adventure, Crime]\n", "3 [Action, Crime, Drama, Thriller]\n", "4 [Action, Adventure, Science Fiction]\n", " ... \n", "4804 [Action, Crime, Thriller]\n", "4805 [Comedy, Romance]\n", "4806 [Comedy, Drama, Romance, TV Movie]\n", "4807 []\n", "4808 [Documentary]\n", "Name: genres, Length: 4809, dtype: object" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies['genres']" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "movies['keywords'] = movies['keywords'].apply(convert)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [culture clash, future, space war, space colon...\n", "1 [ocean, drug abuse, exotic island, east india ...\n", "2 [spy, based on novel, secret agent, sequel, mi...\n", "3 [dc comics, crime fighter, terrorist, secret i...\n", "4 [based on novel, mars, medallion, space travel...\n", " ... \n", "4804 [united states–mexico barrier, legs, arms, pap...\n", "4805 []\n", "4806 [date, love at first sight, narration, investi...\n", "4807 []\n", "4808 [obsession, camcorder, crush, dream girl]\n", "Name: keywords, Length: 4809, dtype: object" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies['keywords']" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "movies['cast'] = movies['cast'].apply(lambda x: [i['name'] for i in ast.literal_eval(x)[:3]]) # Only top 3 actors" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "movies['crew'] = movies['crew'].apply(lambda x: [i['name'] for i in ast.literal_eval(x) if i['job'] == 'Director'])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "movies['tags'] = movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [Action, Adventure, Fantasy, Science Fiction, ...\n", "1 [Adventure, Fantasy, Action, ocean, drug abuse...\n", "2 [Action, Adventure, Crime, spy, based on novel...\n", "3 [Action, Crime, Drama, Thriller, dc comics, cr...\n", "4 [Action, Adventure, Science Fiction, based on ...\n", " ... \n", "4804 [Action, Crime, Thriller, united states–mexico...\n", "4805 [Comedy, Romance, Edward Burns, Kerry Bishé, M...\n", "4806 [Comedy, Drama, Romance, TV Movie, date, love ...\n", "4807 [Daniel Henney, Eliza Coupe, Bill Paxton, Dani...\n", "4808 [Documentary, obsession, camcorder, crush, dre...\n", "Name: tags, Length: 4809, dtype: object" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies['tags']" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "movies['tags'] = movies['tags'].apply(lambda x: \" \".join(x))\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 Action Adventure Fantasy Science Fiction cultu...\n", "1 Adventure Fantasy Action ocean drug abuse exot...\n", "2 Action Adventure Crime spy based on novel secr...\n", "3 Action Crime Drama Thriller dc comics crime fi...\n", "4 Action Adventure Science Fiction based on nove...\n", " ... \n", "4804 Action Crime Thriller united states–mexico bar...\n", "4805 Comedy Romance Edward Burns Kerry Bishé Marsha...\n", "4806 Comedy Drama Romance TV Movie date love at fir...\n", "4807 Daniel Henney Eliza Coupe Bill Paxton Daniel Hsia\n", "4808 Documentary obsession camcorder crush dream gi...\n", "Name: tags, Length: 4809, dtype: object" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies['tags']" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "movies = movies[['movie_id', 'title', 'overview', 'tags']]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "movies['tags'] = movies['tags'].apply(lambda x: x.lower())" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idtitleoverviewtags
019995AvatarIn the 22nd century, a paraplegic Marine is di...action adventure fantasy science fiction cultu...
1285Pirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...adventure fantasy action ocean drug abuse exot...
2206647SpectreA cryptic message from Bond’s past sends him o...action adventure crime spy based on novel secr...
349026The Dark Knight RisesFollowing the death of District Attorney Harve...action crime drama thriller dc comics crime fi...
449529John CarterJohn Carter is a war-weary, former military ca...action adventure science fiction based on nove...
\n", "
" ], "text/plain": [ " movie_id title \\\n", "0 19995 Avatar \n", "1 285 Pirates of the Caribbean: At World's End \n", "2 206647 Spectre \n", "3 49026 The Dark Knight Rises \n", "4 49529 John Carter \n", "\n", " overview \\\n", "0 In the 22nd century, a paraplegic Marine is di... \n", "1 Captain Barbossa, long believed to be dead, ha... \n", "2 A cryptic message from Bond’s past sends him o... \n", "3 Following the death of District Attorney Harve... \n", "4 John Carter is a war-weary, former military ca... \n", "\n", " tags \n", "0 action adventure fantasy science fiction cultu... \n", "1 adventure fantasy action ocean drug abuse exot... \n", "2 action adventure crime spy based on novel secr... \n", "3 action crime drama thriller dc comics crime fi... \n", "4 action adventure science fiction based on nove... " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "from sklearn.feature_extraction.text import TfidfVectorizer\n", "tfidf = TfidfVectorizer(stop_words='english')\n", "tfidf_matrix = tfidf.fit_transform(movies['tags'])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics.pairwise import cosine_similarity\n", "cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def get_recommendations(title, cosine_sim=cosine_sim):\n", " idx = movies[movies['title'] == title].index[0]\n", " sim_scores = list(enumerate(cosine_sim[idx]))\n", " sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)\n", " sim_scores = sim_scores[1:11] # Get top 10 similar movies\n", " movie_indices = [i[0] for i in sim_scores]\n", " return movies['title'].iloc[movie_indices]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "65 The Dark Knight\n", "119 Batman Begins\n", "1360 Batman\n", "210 Batman & Robin\n", "428 Batman Returns\n", "1361 Batman\n", "1197 The Prestige\n", "303 Catwoman\n", "4644 Amidst the Devil's Wings\n", "72 Suicide Squad\n", "Name: title, dtype: object\n" ] } ], "source": [ "print(get_recommendations('The Dark Knight Rises'))" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "with open('movie_data.pkl', 'wb') as file:\n", " pickle.dump((movies, cosine_sim), file)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }