{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "SKRYfHwWyVaG"
      },
      "outputs": [],
      "source": [
        "# Importing Libraries\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import sklearn\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "import warnings\n",
        "warnings.simplefilter(action='ignore', category=FutureWarning)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#loading rating dataset\n",
        "ratings = pd.read_csv(\"https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv\")\n",
        "print(ratings.head())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "v_ZFn93Wy1ho",
        "outputId": "97f98476-d909-4050-bc37-68369391d756"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "   userId  movieId  rating  timestamp\n",
            "0       1        1     4.0  964982703\n",
            "1       1        3     4.0  964981247\n",
            "2       1        6     4.0  964982224\n",
            "3       1       47     5.0  964983815\n",
            "4       1       50     5.0  964982931\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# loading movie dataset\n",
        "movies = pd.read_csv(\"https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv\")\n",
        "print(movies.head())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_RCPOQWfy269",
        "outputId": "4c3c68a1-dbbb-4795-d96a-4f9c11d3731b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "   movieId                               title  \\\n",
            "0        1                    Toy Story (1995)   \n",
            "1        2                      Jumanji (1995)   \n",
            "2        3             Grumpier Old Men (1995)   \n",
            "3        4            Waiting to Exhale (1995)   \n",
            "4        5  Father of the Bride Part II (1995)   \n",
            "\n",
            "                                        genres  \n",
            "0  Adventure|Animation|Children|Comedy|Fantasy  \n",
            "1                   Adventure|Children|Fantasy  \n",
            "2                               Comedy|Romance  \n",
            "3                         Comedy|Drama|Romance  \n",
            "4                                       Comedy  \n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "n_ratings = len(ratings)\n",
        "n_movies = len(ratings['movieId'].unique())\n",
        "n_users = len(ratings['userId'].unique())\n",
        "\n",
        "print(f\"Number of ratings: {n_ratings}\")\n",
        "print(f\"Number of unique movieId's: {n_movies}\")\n",
        "print(f\"Number of unique users: {n_users}\")\n",
        "print(f\"Average ratings per user: {round(n_ratings/n_users, 2)}\")\n",
        "print(f\"Average ratings per movie: {round(n_ratings/n_movies, 2)}\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ypivRYgqy4kb",
        "outputId": "360eef9e-9186-4ed6-ed50-fe8e6a3fabf0"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Number of ratings: 100836\n",
            "Number of unique movieId's: 9724\n",
            "Number of unique users: 610\n",
            "Average ratings per user: 165.3\n",
            "Average ratings per movie: 10.37\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "user_freq = ratings[['userId', 'movieId']].groupby(\n",
        "    'userId').count().reset_index()\n",
        "user_freq.columns = ['userId', 'n_ratings']\n",
        "print(user_freq.head())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "PYZsye4-zAfi",
        "outputId": "fb38061d-d9bc-4552-de9b-de418780ec32"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "   userId  n_ratings\n",
            "0       1        232\n",
            "1       2         29\n",
            "2       3         39\n",
            "3       4        216\n",
            "4       5         44\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Find Lowest and Highest rated movies:\n",
        "mean_rating = ratings.groupby('movieId')[['rating']].mean()\n",
        "# Lowest rated movies\n",
        "lowest_rated = mean_rating['rating'].idxmin()\n",
        "movies.loc[movies['movieId'] == lowest_rated]\n",
        "# Highest rated movies\n",
        "highest_rated = mean_rating['rating'].idxmax()\n",
        "movies.loc[movies['movieId'] == highest_rated]\n",
        "# show number of people who rated movies rated movie highest\n",
        "ratings[ratings['movieId']==highest_rated]\n",
        "# show number of people who rated movies rated movie lowest\n",
        "ratings[ratings['movieId']==lowest_rated]\n",
        "\n",
        "## the above movies has very low dataset. We will use bayesian average\n",
        "movie_stats = ratings.groupby('movieId')[['rating']].agg(['count', 'mean'])\n",
        "movie_stats.columns = movie_stats.columns.droplevel()"
      ],
      "metadata": {
        "id": "H1s9d6QIzBzv"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Now, we create user-item matrix using scipy csr matrix\n",
        "from scipy.sparse import csr_matrix\n",
        "\n",
        "def create_matrix(df):\n",
        "\n",
        "    N = len(df['userId'].unique())\n",
        "    M = len(df['movieId'].unique())\n",
        "\n",
        "    # Map Ids to indices\n",
        "    user_mapper = dict(zip(np.unique(df[\"userId\"]), list(range(N))))\n",
        "    movie_mapper = dict(zip(np.unique(df[\"movieId\"]), list(range(M))))\n",
        "\n",
        "    # Map indices to IDs\n",
        "    user_inv_mapper = dict(zip(list(range(N)), np.unique(df[\"userId\"])))\n",
        "    movie_inv_mapper = dict(zip(list(range(M)), np.unique(df[\"movieId\"])))\n",
        "\n",
        "    user_index = [user_mapper[i] for i in df['userId']]\n",
        "    movie_index = [movie_mapper[i] for i in df['movieId']]\n",
        "\n",
        "    X = csr_matrix((df[\"rating\"], (movie_index, user_index)), shape=(M, N))\n",
        "\n",
        "    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper\n",
        "\n",
        "X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_matrix(ratings)"
      ],
      "metadata": {
        "id": "2tG23gzjzDLg"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\"\"\"\n",
        "Find similar movies using KNN\n",
        "\"\"\"\n",
        "from sklearn.neighbors import NearestNeighbors\n",
        "def find_similar_movies(movie_id, X, k, metric='cosine', show_distance=False):\n",
        "\n",
        "    neighbour_ids = []\n",
        "\n",
        "    movie_ind = movie_mapper[movie_id]\n",
        "    movie_vec = X[movie_ind]\n",
        "    k+=1\n",
        "    kNN = NearestNeighbors(n_neighbors=k, algorithm=\"brute\", metric=metric)\n",
        "    kNN.fit(X)\n",
        "    movie_vec = movie_vec.reshape(1,-1)\n",
        "    neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance)\n",
        "    for i in range(0,k):\n",
        "        n = neighbour.item(i)\n",
        "        neighbour_ids.append(movie_inv_mapper[n])\n",
        "    neighbour_ids.pop(0)\n",
        "    return neighbour_ids\n",
        "\n",
        "\n",
        "movie_titles = dict(zip(movies['movieId'], movies['title']))\n",
        "\n",
        "movie_id = 3\n",
        "\n",
        "similar_ids = find_similar_movies(movie_id, X, k=10)\n",
        "movie_title = movie_titles[movie_id]\n",
        "\n",
        "print(f\"Since you watched {movie_title}\")\n",
        "for i in similar_ids:\n",
        "    print(movie_titles[i])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "onBGmGk5zGAZ",
        "outputId": "c5a409ff-c16d-413f-9339-a2ff977eef69"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Since you watched Grumpier Old Men (1995)\n",
            "Grumpy Old Men (1993)\n",
            "Striptease (1996)\n",
            "Nutty Professor, The (1996)\n",
            "Twister (1996)\n",
            "Father of the Bride Part II (1995)\n",
            "Broken Arrow (1996)\n",
            "Bio-Dome (1996)\n",
            "Truth About Cats & Dogs, The (1996)\n",
            "Sabrina (1995)\n",
            "Birdcage, The (1996)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10):\n",
        "    df1 = ratings[ratings['userId'] == user_id]\n",
        "\n",
        "    if df1.empty:\n",
        "        print(f\"User with ID {user_id} does not exist.\")\n",
        "        return\n",
        "\n",
        "    movie_id = df1[df1['rating'] == max(df1['rating'])]['movieId'].iloc[0]\n",
        "\n",
        "    movie_titles = dict(zip(movies['movieId'], movies['title']))\n",
        "\n",
        "    similar_ids = find_similar_movies(movie_id, X, k)\n",
        "    movie_title = movie_titles.get(movie_id, \"Movie not found\")\n",
        "\n",
        "    if movie_title == \"Movie not found\":\n",
        "        print(f\"Movie with ID {movie_id} not found.\")\n",
        "        return\n",
        "\n",
        "    print(f\"Since you watched {movie_title}, you might also like:\")\n",
        "    for i in similar_ids:\n",
        "        print(movie_titles.get(i, \"Movie not found\"))"
      ],
      "metadata": {
        "id": "PrN_SjhMzHxy"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "user_id = 150  # Replace with the desired user ID\n",
        "recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "L13wNuTOzJry",
        "outputId": "5316c18d-8323-4fc8-8ed0-a503edf93f29"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Since you watched Twelve Monkeys (a.k.a. 12 Monkeys) (1995), you might also like:\n",
            "Pulp Fiction (1994)\n",
            "Terminator 2: Judgment Day (1991)\n",
            "Independence Day (a.k.a. ID4) (1996)\n",
            "Seven (a.k.a. Se7en) (1995)\n",
            "Fargo (1996)\n",
            "Fugitive, The (1993)\n",
            "Usual Suspects, The (1995)\n",
            "Jurassic Park (1993)\n",
            "Star Wars: Episode IV - A New Hope (1977)\n",
            "Heat (1995)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "user_id = 415  # Replace with the desired user ID\n",
        "recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pEZ5ISP8zLB1",
        "outputId": "e61b8c3f-db5d-4c49-b876-5bc19a490ce4"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Since you watched Pulp Fiction (1994), you might also like:\n",
            "Silence of the Lambs, The (1991)\n",
            "Shawshank Redemption, The (1994)\n",
            "Seven (a.k.a. Se7en) (1995)\n",
            "Forrest Gump (1994)\n",
            "Usual Suspects, The (1995)\n",
            "Braveheart (1995)\n",
            "Fight Club (1999)\n",
            "Fargo (1996)\n",
            "Terminator 2: Judgment Day (1991)\n",
            "Reservoir Dogs (1992)\n"
          ]
        }
      ]
    }
  ]
}