# Movie Recommendation prediction using ML

In [14]:
movies = pd.read_csv('./imdb.csv')
contains_spider = movies['Movie_name'].str.contains('Spider', case=False, na=False)

# Filter movies with 'Spider' in the title
spider_movies = movies[contains_spider]

print(spider_movies)

 Rank Movie_name Year Certificate Runtime_in_min \
66 67 Spider-Man: Into the Spider-Verse 2018 PG 117 
157 158 Spider-Man: No Way Home 2021 PG-13 148 

 Genre Metascore Gross_in_$_M Rating_from_10 
66 Animation, Action, Adventure 87.0 190.24 8.4 
157 Action, Adventure, Fantasy 71.0 804.75 8.2 


In [32]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib

# Load the data
movies = pd.read_csv('./imdb.csv')

# Select the required columns
movies = movies[['Rank', 'Movie_name', 'Rating_from_10', 'Certificate', 'Genre', 'Year', 'Runtime_in_min']]

# Combine Certificate and Genre to create tags
movies['tags'] = movies['Certificate'] + ' ' + movies['Genre']

# Remove 'Certificate' and 'Genre'
new_data = movies.drop(columns=['Certificate', 'Genre'])

# Drop missing values
new_cleaned = new_data.dropna()

# Vectorizing the 'tags' column
cv = CountVectorizer(max_features=5000, stop_words='english')
vectorized_data = cv.fit_transform(new_cleaned['tags']).toarray()

# Calculate cosine similarities
similarity = cosine_similarity(vectorized_data)

# Define the Recommender class
class MovieRecommender:
 def __init__(self, similarity, movie_data):
 self.similarity = similarity
 self.movie_data = movie_data

 def recommend(self, movie_title):
 # Check if the movie title exists in the dataset
 if movie_title not in self.movie_data['Movie_name'].values:
 # Find similar movie names containing the keyword
 similar_movies = self.movie_data[self.movie_data['Movie_name'].str.contains(movie_title, case=False, na=False)]
 if not similar_movies.empty:
 suggestions = similar_movies['Movie_name'].tolist()
 return f"Movie '{movie_title}' not found. Did you mean one of these?\n" + "\n".join(suggestions)
 else:
 return f"Movie '{movie_title}' not found. Please recheck the movie name."
 
 # Find the index of the movie in the DataFrame using 'Movie_name' column
 index = self.movie_data[self.movie_data['Movie_name'] == movie_title].index[0]
 
 # Calculate similarity scores, sort them, and store the titles of the top 5 movies
 distances = sorted(list(enumerate(self.similarity[index])), reverse=True, key=lambda vector: vector[1])
 recommendations = [self.movie_data.iloc[i[0]]['Movie_name'] for i in distances[1:6]] # Skip the movie itself
 
 return recommendations

model = MovieRecommender(similarity, new_cleaned)

# recommendations = model.recommend("Spider")
# print(recommendations)

# Save the model to a file
joblib.dump(model, 'movie_recommender_model.pkl')


['movie_recommender_model.pkl']

In [33]:
model = joblib.load('movie_recommender_model.pkl')
recommendations = model.recommend("Iron Man")
print(recommendations)

['The Princess Bride', 'Harry Potter and the Prisoner of Azkaban', "Harry Potter and the Sorcerer's Stone", 'Harry Potter and the Deathly Hallows: Part 2', 'Harry Potter and the Deathly Hallows: Part 1']
