{ "cells": [ { "cell_type": "markdown", "id": "34c6730a-3af1-421e-b380-85b6659dfb1e", "metadata": {}, "source": [ "# Movie Recommendation prediction using ML" ] }, { "cell_type": "code", "execution_count": 14, "id": "abd4daf2-55b8-405e-a116-29f82aac5667", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Rank Movie_name Year Certificate Runtime_in_min \\\n", "66 67 Spider-Man: Into the Spider-Verse 2018 PG 117 \n", "157 158 Spider-Man: No Way Home 2021 PG-13 148 \n", "\n", " Genre Metascore Gross_in_$_M Rating_from_10 \n", "66 Animation, Action, Adventure 87.0 190.24 8.4 \n", "157 Action, Adventure, Fantasy 71.0 804.75 8.2 \n" ] } ], "source": [ "movies = pd.read_csv('./imdb.csv')\n", "contains_spider = movies['Movie_name'].str.contains('Spider', case=False, na=False)\n", "\n", "# Filter movies with 'Spider' in the title\n", "spider_movies = movies[contains_spider]\n", "\n", "print(spider_movies)" ] }, { "cell_type": "code", "execution_count": 32, "id": "ee5586e1-75a9-4331-a2b7-1589bf2443bb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['movie_recommender_model.pkl']" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "import joblib\n", "\n", "# Load the data\n", "movies = pd.read_csv('./imdb.csv')\n", "\n", "# Select the required columns\n", "movies = movies[['Rank', 'Movie_name', 'Rating_from_10', 'Certificate', 'Genre', 'Year', 'Runtime_in_min']]\n", "\n", "# Combine Certificate and Genre to create tags\n", "movies['tags'] = movies['Certificate'] + ' ' + movies['Genre']\n", "\n", "# Remove 'Certificate' and 'Genre'\n", "new_data = movies.drop(columns=['Certificate', 'Genre'])\n", "\n", "# Drop missing values\n", "new_cleaned = new_data.dropna()\n", "\n", "# Vectorizing the 'tags' column\n", "cv = CountVectorizer(max_features=5000, stop_words='english')\n", "vectorized_data = cv.fit_transform(new_cleaned['tags']).toarray()\n", "\n", "# Calculate cosine similarities\n", "similarity = cosine_similarity(vectorized_data)\n", "\n", "# Define the Recommender class\n", "class MovieRecommender:\n", " def __init__(self, similarity, movie_data):\n", " self.similarity = similarity\n", " self.movie_data = movie_data\n", "\n", " def recommend(self, movie_title):\n", " # Check if the movie title exists in the dataset\n", " if movie_title not in self.movie_data['Movie_name'].values:\n", " # Find similar movie names containing the keyword\n", " similar_movies = self.movie_data[self.movie_data['Movie_name'].str.contains(movie_title, case=False, na=False)]\n", " if not similar_movies.empty:\n", " suggestions = similar_movies['Movie_name'].tolist()\n", " return f\"Movie '{movie_title}' not found. Did you mean one of these?\\n\" + \"\\n\".join(suggestions)\n", " else:\n", " return f\"Movie '{movie_title}' not found. Please recheck the movie name.\"\n", " \n", " # Find the index of the movie in the DataFrame using 'Movie_name' column\n", " index = self.movie_data[self.movie_data['Movie_name'] == movie_title].index[0]\n", " \n", " # Calculate similarity scores, sort them, and store the titles of the top 5 movies\n", " distances = sorted(list(enumerate(self.similarity[index])), reverse=True, key=lambda vector: vector[1])\n", " recommendations = [self.movie_data.iloc[i[0]]['Movie_name'] for i in distances[1:6]] # Skip the movie itself\n", " \n", " return recommendations\n", "\n", "model = MovieRecommender(similarity, new_cleaned)\n", "\n", "# recommendations = model.recommend(\"Spider\")\n", "# print(recommendations)\n", "\n", "# Save the model to a file\n", "joblib.dump(model, 'movie_recommender_model.pkl')\n" ] }, { "cell_type": "code", "execution_count": null, "id": "7b5ee717-664e-45d9-9671-eb52db072336", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 33, "id": "833b731e-999e-4578-92b8-f51c29ed4f30", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['The Princess Bride', 'Harry Potter and the Prisoner of Azkaban', \"Harry Potter and the Sorcerer's Stone\", 'Harry Potter and the Deathly Hallows: Part 2', 'Harry Potter and the Deathly Hallows: Part 1']\n" ] } ], "source": [ "model = joblib.load('movie_recommender_model.pkl')\n", "recommendations = model.recommend(\"Iron Man\")\n", "print(recommendations)" ] }, { "cell_type": "code", "execution_count": null, "id": "9b145455-992e-405d-9fdf-cc1cf24a19f4", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 5 }