import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
import gradio as gr
import zipfile
import random

input_count = 300
result_count = 21

# Extract the MovieLens dataset
with zipfile.ZipFile('ml-latest-small.zip') as z:
    with z.open('ml-latest-small/movies.csv') as f:
        movies = pd.read_csv(f)
    with z.open('ml-latest-small/ratings.csv') as f:
        ratings = pd.read_csv(f)

######################################
#
# Content-based Filtering
#
######################################

# Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

# Replace NaN with an empty string
movies['genres'] = movies['genres'].fillna('')

# Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Construct a reverse map of indices and movie titles
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# Function that takes in movie title as input and outputs most similar movies
def get_cb_recommendations(title, cosine_sim=cosine_sim):
    
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 20 most similar movies
    sim_scores = sim_scores[1:result_count]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 20 most similar movies with their scores
    recommendations = [(movies['title'].iloc[i], sim_scores[idx][1]) for idx, i in enumerate(movie_indices)]
    return recommendations

# Gradio interface
def recommend_movies_cb(movie):
    if not movie:
        return "No movie selected. Please select one from the dropdown."
        
    recommendations = get_cb_recommendations(movie)
    format_string = "{:>5.2f}       {:<20}"
    return "Score     Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])


######################################
#
# Collaborative Filtering (Item-based)
#
######################################
# Create a movie-user matrix
movie_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)

# Compute the cosine similarity between movies
movie_similarity = cosine_similarity(movie_user_matrix)

# Create a DataFrame with movie similarities
movie_similarity_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)

# Function to get movie recommendations using item-based collaborative filtering
def get_cf_recommendations(movie_title, movie_similarity_df=movie_similarity_df, movies=movies, n=result_count):
    # Get the movieId for the input movie title
    movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]
    
    # Check if the movie is in our similarity matrix
    if movie_id not in movie_similarity_df.index:
        return []
    
    # Get the row of similarity scores for this movie
    similar_scores = movie_similarity_df.loc[movie_id]
    
    # Sort the scores in descending order
    similar_scores = similar_scores.sort_values(ascending=False)
    
    # Get the indices of the top-n most similar movies (excluding the input movie itself)
    similar_movie_indices = similar_scores.index[1:n+1]
    
    # Get the titles and similarity scores of the recommended movies
    recommendations = []
    for idx in similar_movie_indices:
        title = movies.loc[movies['movieId'] == idx, 'title'].values[0]
        score = similar_scores[idx]
        recommendations.append((title, score))
    
    return recommendations

# Function for Gradio interface
def recommend_movies_cf(movie_title):
    if not movie_title:
        return "No movie selected. Please select one from the dropdown."
    
    if movie_title not in movies['title'].values:
        return f"Movie '{movie_title}' not found in the dataset."
    
    recommendations = get_cf_recommendations(movie_title)
    format_string = "{:>5.2f}       {:<20}"
    return "Score     Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])


######################################
#
# Collaborative Filtering with Neural Network (Item-based)
#
######################################

# Normalize the ratings
scaler = MinMaxScaler()
movie_user_matrix_scaled = scaler.fit_transform(movie_user_matrix)

# Define the autoencoder model
input_dim = movie_user_matrix.shape[1]
encoding_dim = 32

input_layer = Input(shape=(input_dim,))
encoded = Dense(64, activation='relu')(input_layer)
encoded = Dense(encoding_dim, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# Train the autoencoder
autoencoder.fit(movie_user_matrix_scaled, movie_user_matrix_scaled, 
                epochs=50, batch_size=64, shuffle=True, validation_split=0.2, 
                verbose=0)

# Use the trained autoencoder to predict the complete matrix
predicted_matrix_scaled = autoencoder.predict(movie_user_matrix_scaled)
predicted_matrix = scaler.inverse_transform(predicted_matrix_scaled)

# Create a DataFrame with the predicted matrix
predicted_matrix_df = pd.DataFrame(predicted_matrix, index=movie_user_matrix.index, columns=movie_user_matrix.columns)

# Compute the cosine similarity between movies using the predicted matrix
movie_similarity_cfnn = cosine_similarity(predicted_matrix)

# Create a DataFrame with movie similarities
movie_similarity_cfnn_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)

# Function to get movie recommendations using item-based collaborative filtering
def get_cfnn_recommendations(movie_title, movie_similarity_df=movie_similarity_cfnn_df, movies=movies, n=result_count):
    # Get the movieId for the input movie title
    movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]
    
    # Check if the movie is in our similarity matrix
    if movie_id not in movie_similarity_df.index:
        return []
    
    # Get the row of similarity scores for this movie
    similar_scores = movie_similarity_df.loc[movie_id]
    
    # Sort the scores in descending order
    similar_scores = similar_scores.sort_values(ascending=False)
    
    # Get the indices of the top-n most similar movies (excluding the input movie itself)
    similar_movie_indices = similar_scores.index[1:n+1]
    
    # Get the titles and similarity scores of the recommended movies
    recommendations = []
    for idx in similar_movie_indices:
        title = movies.loc[movies['movieId'] == idx, 'title'].values[0]
        score = similar_scores[idx]
        recommendations.append((title, score))

    return recommendations

# Function for Gradio interface
def recommend_movies_cfnn(movie_title):
    if not movie_title:
        return "No movie selected. Please select one from the dropdown."
        
    if movie_title not in movies['title'].values:
        return f"Movie '{movie_title}' not found in the dataset."
    
    recommendations = get_cfnn_recommendations(movie_title)
    format_string = "{:>5.2f}       {:<20}"
    return "Score     Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])


######################################
#
# Gradio interface
#
######################################

# Create a list of movie titles for the dropdown
movie_list = random.sample(movies['title'].tolist(), input_count)
total_movies = len(movies)

with gr.Blocks() as iface:
    with gr.Tab("Content-Based Filtering"):
        gr.Markdown("""## Movie Recommender - Content-Based Filtering
        How it works:
        * Use the 'genres' feature of movies, and convert genres into numerical vectors.
        * For a given movie, find the most similar movies based on the genre similarity.
        * This approach uses genres of movies only, without considering user preferences or viewing history.
        * Simple to implement and computationally efficient.
        """)        
        gr.Interface(fn=recommend_movies_cb, 
                     inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"), 
                     outputs=[gr.Textbox(label="Recommended Movies:")],
                     # title="Movie Recommender - Content-Based Filtering", 
                     description="Select a movie to get recommendations based on content filtering.")
    
    with gr.Tab("Collaborative Filtering"):
        gr.Markdown("""## Movie Recommender - Item-Based Collaborative Filtering
        How it works:
        * Create a movie-user matrix where rows represent movies and columns represent users, each cell contains the rating a user gave to a movie, or 0 if no rating exists.
        * Calculate the cosine similarity between movies based on their rating patterns, results in a movie-movie similarity matrix.
        * For a given movie, find the most similar movies based on this similarity matrix, and recommend these movies.
        * Simple to implement and computationally efficient, but doesn't handle sparsity well (when many missing ratings).
        """)
        gr.Interface(fn=recommend_movies_cf,
                     inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
                     outputs=[gr.Textbox(label="Recommended Movies:")],
                     # title="Movie Recommender - Item-Based Collaborative Filtering",
                     description="Select a movie to get recommendations based on collaborative filtering.")

    with gr.Tab("Collaborative Filtering with Neural Network"):
        gr.Markdown("""## Movie Recommender - Item-Based Collaborative Filtering with Neural Network
        How it works:
        * Use a Neural Network to predict the missing values in the movie-user matrix to improve the collaborative filtering recommendations.
        * The NN model learns to reconstruct the movie-user matrix, effectively predicting missing ratings. This results in a dense, predicted movie-user matrix.
        * Calculate movie-movie similarities using the predicted matrix. And use this similarity matrix to find and recommend similar movies.
        * This approach often provides more accurate recommendations especially with large sparse datasets. But more complex to implement and require more computational resources.
        """)
        gr.Interface(fn=recommend_movies_cfnn,
                     inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
                     outputs=[gr.Textbox(label="Recommended Movies:")],
                     # title="Movie Recommender - Item-Based Collaborative Filtering",
                     description="Select a movie to get recommendations based on collaborative filtering.")

# Launch the app
iface.launch()