{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "DDADPl-phDUC" }, "source": [ "# **Music recommender**" ] }, { "cell_type": "markdown", "metadata": { "id": "E7Cu5Fmqct7J" }, "source": [ "# **Load Data**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 540 }, "id": "bI8bNavbajsv", "outputId": "7cba8b5d-4a63-433f-be3c-87ce794833ba" }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Saving music_data.csv to music_data.csv\n", " title \\\n", "0 100 Club 1996 ''We Love You Beatles'' - Live \n", "1 Yo Quiero Contigo \n", "4 Emerald \n", "6 Karma \n", "7 Money Blues \n", "\n", " release artist_name duration \\\n", "0 Sex Pistols - The Interviews Sex Pistols 88.73751 \n", "1 Sentenciados - Platinum Edition Baby Rasta & Gringo 167.36608 \n", "4 Emerald Bedrock 501.86404 \n", "6 The Diary Of Alicia Keys Alicia Keys 255.99955 \n", "7 Slidetime Joanna Connor 243.66975 \n", "\n", " artist_familiarity artist_hotttnesss year listeners playcount \\\n", "0 0.731184 0.549204 0 172 210 \n", "1 0.610186 0.355320 0 9753 16911 \n", "4 0.654039 0.390625 2004 973 2247 \n", "6 0.933916 0.778674 2003 250304 1028356 \n", "7 0.479218 0.332857 0 429 1008 \n", "\n", " tags \n", "0 The Beatles, title is a full sentence \n", "1 Reggaeton, alexis y fido, Eliana, mis videos, ... \n", "4 dance \n", "6 rnb, soul, Alicia Keys, female vocalists, Karma \n", "7 guitar girl, blues \n" ] } ], "source": [ "import pandas as pd\n", "from google.colab import files\n", "\n", "# Upload the file\n", "uploaded = files.upload()\n", "\n", "# Assuming the file is named \"music_data.csv\"\n", "data_path = \"music_data.csv\"\n", "\n", "# Load the data\n", "df = pd.read_csv(data_path)\n", "df.dropna(inplace=True)\n", "\n", "# Display the first few rows of the dataset\n", "print(df.head())\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "9E3in0U3dK5I", "outputId": "c1d5362a-6a33-4543-ff4d-4e11cf8220ec" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"df\",\n \"rows\": 5063,\n \"fields\": [\n {\n \"column\": \"title\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4854,\n \"samples\": [\n \"I Wish I Had A Girl\",\n \"Jump [Jacques Lu Cont Edit]\",\n \"Mulin' Around\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"release\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4187,\n \"samples\": [\n \"Le Bordel Magnifique\",\n \"Charlotte's Web (OST)\",\n \"X.O. Experience\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"artist_name\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2461,\n \"samples\": [\n \"Lee Ritenour\",\n \"Pennywise\",\n \"Anneli Drecker\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"duration\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 107.73289375974717,\n \"min\": 1.04444,\n \"max\": 1815.2224,\n \"num_unique_values\": 3939,\n \"samples\": [\n 294.24281,\n 240.79628,\n 115.53914\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"artist_familiarity\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.14886096792686204,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 2474,\n \"samples\": [\n 0.787098355481,\n 0.481771820142,\n 0.374024633035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"artist_hotttnesss\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1347303774485448,\n \"min\": 0.0,\n \"max\": 1.08250255673,\n \"num_unique_values\": 2398,\n \"samples\": [\n 0.376018761952,\n 0.355667956383,\n 0.289970666912\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 917,\n \"min\": 0,\n \"max\": 2010,\n \"num_unique_values\": 69,\n \"samples\": [\n 1979,\n 0,\n 1965\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"listeners\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150513,\n \"min\": 0,\n \"max\": 2451482,\n \"num_unique_values\": 3914,\n \"samples\": [\n 781546,\n 6216,\n 396579\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"playcount\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1115103,\n \"min\": 0,\n \"max\": 23182516,\n \"num_unique_values\": 4422,\n \"samples\": [\n 62736,\n 1305,\n 17033\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tags\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4583,\n \"samples\": [\n \"dance, 90s, trance, House, jungle\",\n \"country, favorite songs, classic country, linedance, Martina McBride\",\n \"90s, heavy metal, thrash metal, metal, punk\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "df" }, "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlereleaseartist_namedurationartist_familiarityartist_hotttnesssyearlistenersplaycounttags
0100 Club 1996 ''We Love You Beatles'' - LiveSex Pistols - The InterviewsSex Pistols88.737510.7311840.5492040172210The Beatles, title is a full sentence
1Yo Quiero ContigoSentenciados - Platinum EditionBaby Rasta & Gringo167.366080.6101860.3553200975316911Reggaeton, alexis y fido, Eliana, mis videos, ...
4EmeraldEmeraldBedrock501.864040.6540390.39062520049732247dance
6KarmaThe Diary Of Alicia KeysAlicia Keys255.999550.9339160.77867420032503041028356rnb, soul, Alicia Keys, female vocalists, Karma
7Money BluesSlidetimeJoanna Connor243.669750.4792180.33285704291008guitar girl, blues
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "text/plain": [ " title \\\n", "0 100 Club 1996 ''We Love You Beatles'' - Live \n", "1 Yo Quiero Contigo \n", "4 Emerald \n", "6 Karma \n", "7 Money Blues \n", "\n", " release artist_name duration \\\n", "0 Sex Pistols - The Interviews Sex Pistols 88.73751 \n", "1 Sentenciados - Platinum Edition Baby Rasta & Gringo 167.36608 \n", "4 Emerald Bedrock 501.86404 \n", "6 The Diary Of Alicia Keys Alicia Keys 255.99955 \n", "7 Slidetime Joanna Connor 243.66975 \n", "\n", " artist_familiarity artist_hotttnesss year listeners playcount \\\n", "0 0.731184 0.549204 0 172 210 \n", "1 0.610186 0.355320 0 9753 16911 \n", "4 0.654039 0.390625 2004 973 2247 \n", "6 0.933916 0.778674 2003 250304 1028356 \n", "7 0.479218 0.332857 0 429 1008 \n", "\n", " tags \n", "0 The Beatles, title is a full sentence \n", "1 Reggaeton, alexis y fido, Eliana, mis videos, ... \n", "4 dance \n", "6 rnb, soul, Alicia Keys, female vocalists, Karma \n", "7 guitar girl, blues " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "b_sSacbdHcn6", "outputId": "f745b028-fd97-4b19-b9f0-9e041621e5d3" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 5063 entries, 0 to 9530\n", "Data columns (total 10 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 title 5063 non-null object \n", " 1 release 5063 non-null object \n", " 2 artist_name 5063 non-null object \n", " 3 duration 5063 non-null float64\n", " 4 artist_familiarity 5063 non-null float64\n", " 5 artist_hotttnesss 5063 non-null float64\n", " 6 year 5063 non-null int64 \n", " 7 listeners 5063 non-null int64 \n", " 8 playcount 5063 non-null int64 \n", " 9 tags 5063 non-null object \n", "dtypes: float64(3), int64(3), object(4)\n", "memory usage: 435.1+ KB\n", "None\n", " duration artist_familiarity artist_hotttnesss year \\\n", "count 5063.000000 5063.000000 5063.000000 5063.000000 \n", "mean 243.156073 0.626861 0.439664 1392.483705 \n", "std 107.732894 0.148861 0.134730 917.360336 \n", "min 1.044440 0.000000 0.000000 0.000000 \n", "25% 183.535870 0.527033 0.363132 0.000000 \n", "50% 229.145670 0.619531 0.417819 1993.000000 \n", "75% 280.920365 0.731184 0.510325 2004.000000 \n", "max 1815.222400 1.000000 1.082503 2010.000000 \n", "\n", " listeners playcount \n", "count 5.063000e+03 5.063000e+03 \n", "mean 4.526352e+04 2.622274e+05 \n", "std 1.505135e+05 1.115104e+06 \n", "min 0.000000e+00 0.000000e+00 \n", "25% 7.545000e+02 1.894500e+03 \n", "50% 3.387000e+03 9.439000e+03 \n", "75% 1.787350e+04 6.269500e+04 \n", "max 2.451482e+06 2.318252e+07 \n", "Unique values in 'title': 4854\n", "Unique values in 'artist_name': 2461\n", "Unique values in 'tags': 4583\n" ] } ], "source": [ "# Display basic information about the dataset\n", "print(df.info())\n", "\n", "# Display summary statistics for numerical columns\n", "print(df.describe())\n", "\n", "# Display unique values for categorical columns\n", "print(\"Unique values in 'title':\", df['title'].nunique())\n", "print(\"Unique values in 'artist_name':\", df['artist_name'].nunique())\n", "print(\"Unique values in 'tags':\", df['tags'].nunique())" ] }, { "cell_type": "markdown", "metadata": { "id": "wPVFDtk9g9ox" }, "source": [ "# **Preprocessing**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3fsU1IvylyZg", "outputId": "c2ba3adc-c077-454a-94de-ca9bb0ba4807" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Label encoders and scaler saved successfully.\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.preprocessing import LabelEncoder, MinMaxScaler\n", "import joblib\n", "import re\n", "\n", "# Function to clean tags and artist names\n", "def clean_text(text):\n", " # Convert to lowercase\n", " text = text.lower()\n", " # Remove special characters and digits\n", " text = re.sub(r'[^a-zA-Z\\s]', '', text)\n", " # Remove extra white spaces\n", " text = re.sub(r'\\s+', ' ', text).strip()\n", " return text\n", "\n", "# Clean 'tags' and 'artist_name' columns\n", "df['tags'] = df['tags'].apply(clean_text)\n", "df['artist_name'] = df['artist_name'].apply(clean_text)\n", "\n", "def label_encode_data(df):\n", " df = df.copy(deep=True)\n", " label_encoders = {}\n", " unknown_label = 'unknown' # Define an unknown label\n", "\n", " for column in ['tags', 'title', 'artist_name']:\n", " le = LabelEncoder()\n", " unique_categories = df[column].unique().tolist()\n", " unique_categories.append(unknown_label)\n", " le.fit(unique_categories)\n", " df[column] = le.transform(df[column].astype(str))\n", " label_encoders[column] = le\n", "\n", " return df, label_encoders\n", "\n", "# Normalize numerical features\n", "scaler = MinMaxScaler()\n", "df[['listeners', 'playcount']] = scaler.fit_transform(df[['listeners', 'playcount']])\n", "\n", "# Label encode categorical features\n", "df_scaled, label_encoders = label_encode_data(df)\n", "\n", "# Save the encoders and scaler\n", "joblib.dump(label_encoders, \"/content/new_label_encoders.joblib\")\n", "joblib.dump(scaler, \"/content/new_scaler.joblib\")\n", "\n", "print(\"Label encoders and scaler saved successfully.\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JBWZWp_8Jr82", "outputId": "73a312c1-3615-4a87-965b-c2fc41fc50e7" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Data split into training and testing sets.\n", "Maximum value in y_train: 4854\n", "Maximum value in y_test: 4850\n", "Number of unique titles: 4855\n", "Maximum value in y_train after clipping: 4854\n", "Maximum value in y_test after clipping: 4850\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "# Split data into features and target\n", "X = df_scaled[['tags', 'artist_name']]\n", "y = df_scaled['title']\n", "\n", "# Split the dataset into training and testing sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "print(\"Data split into training and testing sets.\")\n", "\n", "# Number of unique titles\n", "num_unique_titles = len(label_encoders['title'].classes_)\n", "\n", "# Check for out-of-bounds indices in y_train and y_test\n", "print(\"Maximum value in y_train:\", y_train.max())\n", "print(\"Maximum value in y_test:\", y_test.max())\n", "print(\"Number of unique titles:\", num_unique_titles)\n", "\n", "# If any out-of-bounds values are found, print them\n", "out_of_bounds_train = y_train[y_train >= num_unique_titles]\n", "out_of_bounds_test = y_test[y_test >= num_unique_titles]\n", "\n", "if not out_of_bounds_train.empty:\n", " print(\"Out-of-bounds values in y_train:\", out_of_bounds_train)\n", "if not out_of_bounds_test.empty:\n", " print(\"Out-of-bounds values in y_test:\", out_of_bounds_test)\n", "\n", "# Fix out-of-bounds values by setting them to a valid index\n", "y_train = y_train.clip(upper=num_unique_titles - 1)\n", "y_test = y_test.clip(upper=num_unique_titles - 1)\n", "\n", "# Print the maximum values after clipping\n", "print(\"Maximum value in y_train after clipping:\", y_train.max())\n", "print(\"Maximum value in y_test after clipping:\", y_test.max())\n" ] }, { "cell_type": "markdown", "metadata": { "id": "syYhdUbxgA-K" }, "source": [ "# **Training**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "aaR1IGymKQq2", "outputId": "9e5115a5-1a75-4672-a0b3-4fdd314e1a79" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1, Training Loss: 8.921830113728841, Validation Loss: 8.836441385979747\n", "Epoch 2, Training Loss: 8.331391870239635, Validation Loss: 9.148561271966672\n", "Epoch 3, Training Loss: 7.494005516429007, Validation Loss: 10.484928570541681\n", "Epoch 4, Training Loss: 6.704833826606657, Validation Loss: 11.745069999320835\n", "Early stopping triggered\n", "Improved model trained and saved successfully.\n" ] } ], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "from torch.utils.data import DataLoader\n", "import numpy as np\n", "\n", "# Define the neural network model with Dropout and Batch Normalization\n", "class ImprovedSongRecommender(nn.Module):\n", " def __init__(self, input_size, num_titles):\n", " super(ImprovedSongRecommender, self).__init__()\n", " self.fc1 = nn.Linear(input_size, 128)\n", " self.bn1 = nn.BatchNorm1d(128)\n", " self.fc2 = nn.Linear(128, 256)\n", " self.bn2 = nn.BatchNorm1d(256)\n", " self.fc3 = nn.Linear(256, 128)\n", " self.bn3 = nn.BatchNorm1d(128)\n", " self.output = nn.Linear(128, num_titles)\n", " self.dropout = nn.Dropout(0.5)\n", "\n", " def forward(self, x):\n", " x = torch.relu(self.bn1(self.fc1(x)))\n", " x = self.dropout(x)\n", " x = torch.relu(self.bn2(self.fc2(x)))\n", " x = self.dropout(x)\n", " x = torch.relu(self.bn3(self.fc3(x)))\n", " x = self.dropout(x)\n", " x = self.output(x)\n", " return x\n", "\n", "# Adjusting input size for the model\n", "input_size = X_train.shape[1] # Number of features in the input\n", "num_unique_titles = len(label_encoders['title'].classes_) # Number of unique titles including 'unknown'\n", "\n", "# Initialize the model with the correct input size and output size\n", "model = ImprovedSongRecommender(input_size, num_unique_titles)\n", "\n", "# Initialize the optimizer and loss function\n", "optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)\n", "criterion = nn.CrossEntropyLoss()\n", "\n", "# Use a learning rate scheduler\n", "scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)\n", "\n", "# Early stopping parameters\n", "patience = 3\n", "min_delta = 0.01\n", "best_val_loss = np.inf\n", "patience_counter = 0\n", "\n", "# Function to train the model\n", "def train_model(model, X_train, y_train, X_test, y_test):\n", " global best_val_loss, patience_counter\n", " train_loader = DataLoader(list(zip(X_train.values.astype(float), y_train)), batch_size=10, shuffle=True)\n", " test_loader = DataLoader(list(zip(X_test.values.astype(float), y_test)), batch_size=10, shuffle=False)\n", "\n", " model.train()\n", " for epoch in range(20): # Increase the number of epochs\n", " train_loss = 0\n", " for features, labels in train_loader:\n", " optimizer.zero_grad()\n", " outputs = model(features.float())\n", " loss = criterion(outputs, labels.long())\n", " loss.backward()\n", " optimizer.step()\n", " train_loss += loss.item()\n", "\n", " # Step the scheduler\n", " scheduler.step()\n", "\n", " # Validation phase\n", " model.eval()\n", " validation_loss = 0\n", " with torch.no_grad():\n", " for features, labels in test_loader:\n", " outputs = model(features.float())\n", " loss = criterion(outputs, labels.long())\n", " validation_loss += loss.item()\n", "\n", " avg_val_loss = validation_loss / len(test_loader)\n", " print(f'Epoch {epoch+1}, Training Loss: {train_loss / len(train_loader)}, Validation Loss: {avg_val_loss}')\n", "\n", " # Early stopping\n", " if avg_val_loss < best_val_loss - min_delta:\n", " best_val_loss = avg_val_loss\n", " patience_counter = 0\n", " else:\n", " patience_counter += 1\n", " if patience_counter >= patience:\n", " print(\"Early stopping triggered\")\n", " break\n", "\n", "# Train the model\n", "train_model(model, X_train, y_train, X_test, y_test)\n", "\n", "# Save the trained model\n", "model_path = '/content/improved_model.pth'\n", "torch.save(model.state_dict(), model_path)\n", "\n", "print(\"Improved model trained and saved successfully.\")\n" ] }, { "cell_type": "markdown", "metadata": { "id": "g4hJVlNXf5Vu" }, "source": [ "# **Testing**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KwqV-HnCOvtz", "outputId": "d412ce92-3ab8-4f3d-df83-22ef9e857203" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Recommendations: ['Betrayal Is A Symptom', 'The Earth Will Shake', 'Saturday', 'Firehouse Rock', 'Breathe Easy']\n" ] } ], "source": [ "import torch\n", "from joblib import load\n", "\n", "# Define the same neural network model\n", "class ImprovedSongRecommender(nn.Module):\n", " def __init__(self, input_size, num_titles):\n", " super(ImprovedSongRecommender, self).__init__()\n", " self.fc1 = nn.Linear(input_size, 128)\n", " self.bn1 = nn.BatchNorm1d(128)\n", " self.fc2 = nn.Linear(128, 256)\n", " self.bn2 = nn.BatchNorm1d(256)\n", " self.fc3 = nn.Linear(256, 128)\n", " self.bn3 = nn.BatchNorm1d(128)\n", " self.output = nn.Linear(128, num_titles)\n", " self.dropout = nn.Dropout(0.5)\n", "\n", " def forward(self, x):\n", " x = torch.relu(self.bn1(self.fc1(x)))\n", " x = self.dropout(x)\n", " x = torch.relu(self.bn2(self.fc2(x)))\n", " x = self.dropout(x)\n", " x = torch.relu(self.bn3(self.fc3(x)))\n", " x = self.dropout(x)\n", " x = self.output(x)\n", " return x\n", "\n", "# Load the trained model\n", "model_path = '/content/improved_model.pth'\n", "num_unique_titles = 4855 # Update this to match your dataset\n", "\n", "model = ImprovedSongRecommender(input_size=2, num_titles=num_unique_titles) # Adjust input size accordingly\n", "model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))\n", "model.eval()\n", "\n", "# Load the label encoders and scaler\n", "label_encoders_path = '/content/new_label_encoders.joblib'\n", "scaler_path = '/content/new_scaler.joblib'\n", "\n", "label_encoders = load(label_encoders_path)\n", "scaler = load(scaler_path)\n", "\n", "# Create a mapping from encoded indices to actual song titles\n", "index_to_song_title = {index: title for index, title in enumerate(label_encoders['title'].classes_)}\n", "\n", "def encode_input(tags, artist_name):\n", " tags = tags.strip().replace('\\n', '')\n", " artist_name = artist_name.strip().replace('\\n', '')\n", "\n", " try:\n", " encoded_tags = label_encoders['tags'].transform([tags])[0]\n", " except ValueError:\n", " encoded_tags = label_encoders['tags'].transform(['unknown'])[0]\n", "\n", " try:\n", " encoded_artist = label_encoders['artist_name'].transform([artist_name])[0]\n", " except ValueError:\n", " encoded_artist = label_encoders['artist_name'].transform(['unknown'])[0]\n", "\n", " return [encoded_tags, encoded_artist]\n", "\n", "def recommend_songs(tags, artist_name):\n", " encoded_input = encode_input(tags, artist_name)\n", " input_tensor = torch.tensor([encoded_input]).float()\n", "\n", " with torch.no_grad():\n", " output = model(input_tensor)\n", "\n", " recommendations_indices = torch.topk(output, 5).indices.squeeze().tolist()\n", " recommendations = [index_to_song_title.get(idx, \"Unknown song\") for idx in recommendations_indices]\n", "\n", " return recommendations\n", "\n", "# Test the recommendation function\n", "tags = \"rock\"\n", "artist_name = \"The Beatles\"\n", "\n", "recommendations = recommend_songs(tags, artist_name)\n", "print(\"Recommendations:\", recommendations)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3HzLKv5mPxOv", "outputId": "62b37d04-4857-44fb-b5c4-8ead55db9b1a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Recommendations: ['Betrayal Is A Symptom', 'Carnival (from \"Black Orpheus\")', 'Saturday', 'The Earth Will Shake', 'Start!']\n", "Recommendations: ['Old Friends', 'Betrayal Is A Symptom', 'Between Love & Hate', 'Carnival (from \"Black Orpheus\")', 'Satin Doll']\n" ] } ], "source": [ "import torch\n", "from joblib import load\n", "\n", "# Define the same neural network model\n", "class ImprovedSongRecommender(nn.Module):\n", " def __init__(self, input_size, num_titles):\n", " super(ImprovedSongRecommender, self).__init__()\n", " self.fc1 = nn.Linear(input_size, 128)\n", " self.bn1 = nn.BatchNorm1d(128)\n", " self.fc2 = nn.Linear(128, 256)\n", " self.bn2 = nn.BatchNorm1d(256)\n", " self.fc3 = nn.Linear(256, 128)\n", " self.bn3 = nn.BatchNorm1d(128)\n", " self.output = nn.Linear(128, num_titles)\n", " self.dropout = nn.Dropout(0.5)\n", "\n", " def forward(self, x):\n", " x = torch.relu(self.bn1(self.fc1(x)))\n", " x = self.dropout(x)\n", " x = torch.relu(self.bn2(self.fc2(x)))\n", " x = self.dropout(x)\n", " x = torch.relu(self.bn3(self.fc3(x)))\n", " x = self.dropout(x)\n", " x = self.output(x)\n", " return x\n", "\n", "# Load the trained model\n", "model_path = '/content/improved_model.pth'\n", "num_unique_titles = 4855 # Update this to match your dataset\n", "\n", "model = ImprovedSongRecommender(input_size=2, num_titles=num_unique_titles) # Adjust input size accordingly\n", "model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))\n", "model.eval()\n", "\n", "# Load the label encoders and scaler\n", "label_encoders_path = '/content/new_label_encoders.joblib'\n", "scaler_path = '/content/new_scaler.joblib'\n", "\n", "label_encoders = load(label_encoders_path)\n", "scaler = load(scaler_path)\n", "\n", "# Create a mapping from encoded indices to actual song titles\n", "index_to_song_title = {index: title for index, title in enumerate(label_encoders['title'].classes_)}\n", "\n", "def encode_input(tags, artist_name):\n", " tags = tags.strip().replace('\\n', '')\n", " artist_name = artist_name.strip().replace('\\n', '')\n", "\n", " try:\n", " encoded_tags = label_encoders['tags'].transform([tags])[0]\n", " except ValueError:\n", " encoded_tags = label_encoders['tags'].transform(['unknown'])[0]\n", "\n", " try:\n", " encoded_artist = label_encoders['artist_name'].transform([artist_name])[0]\n", " except ValueError:\n", " encoded_artist = label_encoders['artist_name'].transform(['unknown'])[0]\n", "\n", " return [encoded_tags, encoded_artist]\n", "\n", "def recommend_songs(tags, artist_name):\n", " encoded_input = encode_input(tags, artist_name)\n", " input_tensor = torch.tensor([encoded_input]).float()\n", "\n", " with torch.no_grad():\n", " output = model(input_tensor)\n", "\n", " recommendations_indices = torch.topk(output, 5).indices.squeeze().tolist()\n", " recommendations = [index_to_song_title.get(idx, \"Unknown song\") for idx in recommendations_indices]\n", "\n", " return recommendations\n", "\n", "# Test the recommendation function with new inputs\n", "tags = \"pop\"\n", "artist_name = \"Adele\"\n", "\n", "recommendations = recommend_songs(tags, artist_name)\n", "print(\"Recommendations:\", recommendations)\n", "\n", "# Test with another set of inputs\n", "tags = \"jazz\"\n", "artist_name = \"Miles Davis\"\n", "\n", "recommendations = recommend_songs(tags, artist_name)\n", "print(\"Recommendations:\", recommendations)\n" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python", "version": "3.8.1" } }, "nbformat": 4, "nbformat_minor": 0 }