File size: 46,253 Bytes
9dd7d9c
1
2
{"cells":[{"cell_type":"markdown","metadata":{"id":"5ByvVHnFr-s1"},"source":["Get million song subset data song list\n","Get metadata and join the data\n","\n","use artist similarity and artists to train the model on similarity\n","\n","use last.fm to get additional data on each song to augment this"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1048,"status":"ok","timestamp":1715387243880,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"dO4mavAdsELi","outputId":"d0064229-e1a9-4875-e8f1-ee2b19e36855"},"outputs":[{"ename":"ModuleNotFoundError","evalue":"No module named 'google.colab'","output_type":"error","traceback":["\u001b[1;31m---------------------------------------------------------------------------\u001b[0m","\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)","Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgoogle\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcolab\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m drive\n\u001b[0;32m      2\u001b[0m drive\u001b[38;5;241m.\u001b[39mmount(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/content/drive\u001b[39m\u001b[38;5;124m'\u001b[39m)\n","\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'google.colab'"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":7109,"status":"ok","timestamp":1715387250988,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"RktUo1FTsTm4","outputId":"ef21ad0f-4c34-4693-f5de-e6f5617465d7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Requirement already satisfied: python-dotenv in /usr/local/lib/python3.10/dist-packages (1.0.1)\n"]}],"source":["!pip install python-dotenv"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":1591,"status":"ok","timestamp":1715387252577,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"AkHtP67Sr-s2"},"outputs":[],"source":["# imports\n","import pandas as pd\n","import h5py\n","import os\n","from sqlalchemy import create_engine\n","import requests\n","import time\n","from dotenv import load_dotenv"]},{"cell_type":"code","execution_count":4,"metadata":{"executionInfo":{"elapsed":3,"status":"ok","timestamp":1715387252577,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"vIJPMBNFr-s3"},"outputs":[],"source":["pd.set_option('display.max_rows', 100)"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"executionInfo":{"elapsed":2,"status":"ok","timestamp":1715387252577,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"kShsPDUoW0Tm","outputId":"5c3cb25e-e38f-4d9d-9102-1e18fd73618a"},"outputs":[{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'/content'"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["os.getcwd()"]},{"cell_type":"code","execution_count":6,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1715387252577,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"kmdF3bjKW1kz"},"outputs":[],"source":["os.chdir('/content/drive/MyDrive/CMPE-258: Team Neurobytes/Neurobytes/mlops/notebooks')"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":318,"status":"ok","timestamp":1715387252893,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"JKeiMiYMWwpM","outputId":"1eee2de4-7144-45fb-9269-a7eed567bc2d"},"outputs":[{"name":"stdout","output_type":"stream","text":["label_encoder.joblib  model_training.ipynb  scaler.joblib\t    tracks_eda.ipynb\n","model.pth\t      README.md\t\t    test_spotify_api.ipynb  users_eda.ipynb\n"]}],"source":["! ls"]},{"cell_type":"markdown","metadata":{"id":"sk1jv62kr-s3"},"source":["# Loading Data"]},{"cell_type":"markdown","metadata":{"id":"rTCKoervr-s3"},"source":["## Loading million song subset data"]},{"cell_type":"code","execution_count":9,"metadata":{"executionInfo":{"elapsed":162,"status":"ok","timestamp":1715364589548,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"zdtzGTb4r-s3"},"outputs":[],"source":["# load the data (only loading song_id, metadata contains the rest)\n","def read_song_features(file_path):\n","    with h5py.File(file_path, 'r') as f:\n","        song_id = f['metadata']['songs']['song_id'][0].decode('utf-8')\n","        return {'song_id': song_id}\n","\n","\n","# process all files in a directory into a df\n","def process_all_files_to_dataframe(root_dir):\n","    data = []\n","    print(f\"Checking directory: {root_dir}\")\n","\n","    for subdir, dirs, files in os.walk(root_dir):\n","        print(f\"Currently scanning {subdir} with {len(files)} files\")\n","        for file in files:\n","            if file.endswith('.h5'):\n","                file_path = os.path.join(subdir, file)\n","                print(f\"Processing file: {file_path}\")\n","                song_data = read_song_features(file_path)\n","                data.append(song_data)\n","\n","    if not data:\n","        print(\"No data to process.\")\n","\n","    df = pd.DataFrame(data)\n","    return df"]},{"cell_type":"code","execution_count":11,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":163,"status":"ok","timestamp":1715364643278,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"4WPOjZXOr-s3","outputId":"8ebedd7f-7741-4d5f-d46b-8f8399ff1c54"},"outputs":[{"name":"stdout","output_type":"stream","text":["Checking directory: ../../../data/\n","No data to process.\n"]}],"source":["root_dir = 'data/MillionSongSubset'\n","df = process_all_files_to_dataframe(root_dir)"]},{"cell_type":"markdown","metadata":{"id":"cM5Cf9MEr-s3"},"source":["### Loading million song subset metadata from sqlite db"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JBos93r4r-s3"},"outputs":[],"source":["# load metadata from sqlite\n","def load_data_from_sqlite(db_path, table_name):\n","    engine = create_engine(f'sqlite:///{db_path}')\n","    query = f\"SELECT * FROM {table_name}\"\n","    df = pd.read_sql_query(query, engine)\n","    return df\n","\n","# load metadata and merge with song data\n","db_path3 = 'data/MillionSongSubsetMetadata/track_metadata.db'\n","df3 = load_data_from_sqlite(db_path3, 'songs')\n","df = df.merge(df3, on='song_id', how='left')\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"B2zIZ4T6r-s4"},"outputs":[],"source":["columns_to_drop = ['track_id', 'artist_id', 'song_id', 'artist_mbid', 'track_7digitalid', 'shs_perf', 'shs_work']\n","\n","for column in columns_to_drop:\n","    if column in df.columns:\n","        df.drop(columns=[column], inplace=True)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"NWP5gZpzr-s4"},"outputs":[],"source":["df.columns"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"TcgnhEffr-s4"},"outputs":[],"source":["df.head()"]},{"cell_type":"markdown","metadata":{"id":"9XEtFgNrr-s4"},"source":["## Loading last.fm data"]},{"cell_type":"code","execution_count":8,"metadata":{"executionInfo":{"elapsed":214,"status":"ok","timestamp":1715387284033,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"ce8MTL8dr-s4"},"outputs":[],"source":["def fetch_data(api_key, method, params):\n","    base_url = \"http://ws.audioscrobbler.com/2.0/\"\n","    params['api_key'] = api_key\n","    params['method'] = method\n","    params['format'] = 'json'\n","    response = requests.get(base_url, params=params)\n","    return response.json()\n","\n","\n","def get_artist_info(api_key, artist_name):\n","    params = {'artist': artist_name}\n","    return fetch_data(api_key, 'artist.getInfo', params)\n","\n","\n","def get_track_info(api_key, artist_name, track_name):\n","    params = {'artist': artist_name, 'track': track_name}\n","    return fetch_data(api_key, 'track.getInfo', params)\n","\n","\n","def batch_fetch_data(api_key, items, fetch_function, sleep_time=1):\n","    results = []\n","    for item in items:\n","        result = fetch_function(api_key, *item)\n","        results.append(result)\n","        # time.sleep(sleep_time)\n","    return results"]},{"cell_type":"code","execution_count":9,"metadata":{"executionInfo":{"elapsed":1,"status":"ok","timestamp":1715387284241,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"4NqoJJOYr-s4"},"outputs":[],"source":["# load LASTFM_API_KEY from .env\n","import requests\n","load_dotenv()\n","api_key = os.getenv('LASTFM_API_KEY')\n","\n","\n","def fetch_lastfm_data(api_key, artist_name, track_name):\n","    base_url = \"http://ws.audioscrobbler.com/2.0/\"\n","    params = {\n","        'method': 'track.getInfo',\n","        'api_key': api_key,\n","        'artist': artist_name,\n","        'track': track_name,\n","        'format': 'json'\n","    }\n","    response = requests.get(base_url, params=params)\n","    if response.status_code == 200 and response.text.strip():\n","        return response.json()\n","    else:\n","        return None\n","\n","\n","def parse_lastfm_data(data):\n","    if data and 'track' in data:\n","        track = data['track']\n","        return {\n","            'listeners': track.get('listeners', '0'),\n","            'playcount': track.get('playcount', '0'),\n","            'tags': ', '.join(tag['name'] for tag in track.get('toptags', {}).get('tag', [])),\n","        }\n","    return None"]},{"cell_type":"code","execution_count":10,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":216},"executionInfo":{"elapsed":357,"status":"error","timestamp":1715387298868,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"prQYTEpGr-s4","outputId":"7f04e536-cd64-41d8-861f-1ef035e99cab"},"outputs":[{"ename":"NameError","evalue":"name 'df' is not defined","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)","\u001b[0;32m<ipython-input-10-da14a3af7ef7>\u001b[0m in \u001b[0;36m<cell line: 6>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mload_dotenv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mapi_key\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetenv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'LASTFM_API_KEY'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0msubset_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0mtracks_skipped\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"]}],"source":["from tqdm import tqdm\n","tqdm.pandas()\n","\n","load_dotenv()\n","api_key = os.getenv('LASTFM_API_KEY')\n","subset_df = df.head(1000)\n","\n","tracks_skipped = 0\n","\n","\n","def fetch_and_parse(row):\n","    global tracks_skipped\n","    data = fetch_lastfm_data(api_key, row['artist_name'], row['title'])\n","    if data is None:\n","        tracks_skipped += 1\n","        return None\n","    parsed_data = parse_lastfm_data(data)\n","    if parsed_data is None:\n","        tracks_skipped += 1\n","    return parsed_data\n","\n","\n","# Use progress_apply instead of apply\n","subset_df['lastfm_data'] = subset_df.progress_apply(fetch_and_parse, axis=1)\n","\n","# Remove rows where lastfm_data is None\n","subset_df = subset_df[subset_df['lastfm_data'].notna()]\n","\n","subset_df.reset_index(drop=True, inplace=True)\n","track_details_df = pd.json_normalize(subset_df['lastfm_data'])\n","mixed = pd.concat(\n","    [subset_df.drop(columns=['lastfm_data']), track_details_df], axis=1)\n","\n","print(f\"Tracks skipped: {tracks_skipped}\")\n","\n","mixed.to_csv('data/music_data_small.csv', index=False)"]},{"cell_type":"markdown","metadata":{"id":"0wxV_-P6r-s5"},"source":["## Data processing"]},{"cell_type":"code","execution_count":191,"metadata":{"executionInfo":{"elapsed":191,"status":"ok","timestamp":1715390440593,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"vMUDiJbjr-s5"},"outputs":[],"source":["import pandas as pd\n","\n","df = pd.read_csv(\"..\\..\\db\\data\\music_data.csv\")\n","df.dropna(inplace=True)"]},{"cell_type":"code","execution_count":192,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":258},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1715390440851,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"gHQ3NQr7rBN4","outputId":"6897914c-b362-4ea0-94e0-a5030f95fe88"},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>title</th>\n","      <th>release</th>\n","      <th>artist_name</th>\n","      <th>duration</th>\n","      <th>artist_familiarity</th>\n","      <th>artist_hotttnesss</th>\n","      <th>year</th>\n","      <th>listeners</th>\n","      <th>playcount</th>\n","      <th>tags</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>100 Club 1996 ''We Love You Beatles'' - Live</td>\n","      <td>Sex Pistols - The Interviews</td>\n","      <td>Sex Pistols</td>\n","      <td>88.73751</td>\n","      <td>0.731184</td>\n","      <td>0.549204</td>\n","      <td>0</td>\n","      <td>172</td>\n","      <td>210</td>\n","      <td>The Beatles, title is a full sentence</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Yo Quiero Contigo</td>\n","      <td>Sentenciados - Platinum Edition</td>\n","      <td>Baby Rasta &amp; Gringo</td>\n","      <td>167.36608</td>\n","      <td>0.610186</td>\n","      <td>0.355320</td>\n","      <td>0</td>\n","      <td>9753</td>\n","      <td>16911</td>\n","      <td>Reggaeton, alexis y fido, Eliana, mis videos, ...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Emerald</td>\n","      <td>Emerald</td>\n","      <td>Bedrock</td>\n","      <td>501.86404</td>\n","      <td>0.654039</td>\n","      <td>0.390625</td>\n","      <td>2004</td>\n","      <td>973</td>\n","      <td>2247</td>\n","      <td>dance</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>Karma</td>\n","      <td>The Diary Of Alicia Keys</td>\n","      <td>Alicia Keys</td>\n","      <td>255.99955</td>\n","      <td>0.933916</td>\n","      <td>0.778674</td>\n","      <td>2003</td>\n","      <td>250304</td>\n","      <td>1028356</td>\n","      <td>rnb, soul, Alicia Keys, female vocalists, Karma</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>Money Blues</td>\n","      <td>Slidetime</td>\n","      <td>Joanna Connor</td>\n","      <td>243.66975</td>\n","      <td>0.479218</td>\n","      <td>0.332857</td>\n","      <td>0</td>\n","      <td>429</td>\n","      <td>1008</td>\n","      <td>guitar girl, blues</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                          title  \\\n","0  100 Club 1996 ''We Love You Beatles'' - Live   \n","1                             Yo Quiero Contigo   \n","4                                       Emerald   \n","6                                         Karma   \n","7                                   Money Blues   \n","\n","                           release          artist_name   duration  \\\n","0     Sex Pistols - The Interviews          Sex Pistols   88.73751   \n","1  Sentenciados - Platinum Edition  Baby Rasta & Gringo  167.36608   \n","4                          Emerald              Bedrock  501.86404   \n","6         The Diary Of Alicia Keys          Alicia Keys  255.99955   \n","7                        Slidetime        Joanna Connor  243.66975   \n","\n","   artist_familiarity  artist_hotttnesss  year  listeners  playcount  \\\n","0            0.731184           0.549204     0        172        210   \n","1            0.610186           0.355320     0       9753      16911   \n","4            0.654039           0.390625  2004        973       2247   \n","6            0.933916           0.778674  2003     250304    1028356   \n","7            0.479218           0.332857     0        429       1008   \n","\n","                                                tags  \n","0              The Beatles, title is a full sentence  \n","1  Reggaeton, alexis y fido, Eliana, mis videos, ...  \n","4                                              dance  \n","6    rnb, soul, Alicia Keys, female vocalists, Karma  \n","7                                 guitar girl, blues  "]},"execution_count":192,"metadata":{},"output_type":"execute_result"}],"source":["df.head()"]},{"cell_type":"code","execution_count":193,"metadata":{"executionInfo":{"elapsed":142,"status":"ok","timestamp":1715390441226,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"ltW1gvc7r-s5"},"outputs":[],"source":["import pandas as pd\n","import torch\n","from torch.utils.data import DataLoader\n","import torch.nn as nn\n","import torch.nn.functional as F\n","from sklearn.preprocessing import LabelEncoder, MinMaxScaler\n","from sklearn.model_selection import train_test_split\n","import torch.optim as optim\n","\n","def label_encode_data(df):\n","  df = df.copy(deep=True)\n","  # Encode categorical data\n","  label_encoders = {}\n","  unknown_label = 'unknown'  # Define an unknown label\n","\n","  for column in ['tags', 'title']:\n","      le = LabelEncoder()\n","\n","      # Get unique categories plus an 'unknown' category\n","      unique_categories = df[column].unique().tolist()\n","      # Add 'unknown' to the list of categories\n","      unique_categories.append(unknown_label)\n","\n","      # Fit the LabelEncoder to these categories\n","      le.fit(unique_categories)\n","      df[column] = le.transform(df[column].astype(str))\n","\n","      # Store the encoder\n","      label_encoders[column] = le\n","\n","  return df, label_encoders\n","\n","\n","# Normalize numerical features\n","scaler = MinMaxScaler()\n","df[['listeners', 'playcount']] = scaler.fit_transform(\n","    df[['listeners', 'playcount']])\n","\n","# Label encode categorical features\n","df_scaled, label_encoder_training = label_encode_data(df)\n","\n","# Split data into features and target\n","X = df_scaled[['tags']]\n","y = df_scaled['title']\n","\n","# Split the dataset into training and testing sets\n","X_train, X_test, y_train, y_test = train_test_split(\n","    X, y, test_size=0.2, random_state=42)"]},{"cell_type":"code","execution_count":194,"metadata":{"executionInfo":{"elapsed":166,"status":"ok","timestamp":1715390465207,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"G3RFJN6Ur-s5"},"outputs":[],"source":["class SongRecommender(nn.Module):\n","    def __init__(self):\n","        super(SongRecommender, self).__init__()\n","        self.fc1 = nn.Linear(1, 128)  # Adjust input features if needed\n","        self.fc2 = nn.Linear(128, 256)\n","        self.fc3 = nn.Linear(256, 128)\n","        # Output size = number of unique titles including 'unknown'\n","        # Add 1 for the 'unknown' label\n","        self.output = nn.Linear(128, len(y.unique()) + 1)\n","\n","    def forward(self, x):\n","        x = F.relu(self.fc1(x))\n","        x = F.relu(self.fc2(x))\n","        x = F.relu(self.fc3(x))\n","        x = self.output(x)\n","        return x\n","\n","\n","model = SongRecommender()\n","optimizer = optim.Adam(model.parameters(), lr=0.001)\n","criterion = nn.CrossEntropyLoss()"]},{"cell_type":"code","execution_count":195,"metadata":{"executionInfo":{"elapsed":160,"status":"ok","timestamp":1715390466326,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"-HenNJLnr-s5"},"outputs":[],"source":["def train_model(model, X_train, y_train, X_test, y_test):\n","    train_loader = DataLoader(\n","        list(zip(X_train.values.astype(float), y_train)), batch_size=10, shuffle=True)\n","    test_loader = DataLoader(\n","        list(zip(X_test.values.astype(float), y_test)), batch_size=10, shuffle=False)\n","\n","    model.train()\n","    for epoch in range(10):  # Number of epochs\n","        train_loss = 0\n","        for features, labels in train_loader:\n","            optimizer.zero_grad()\n","            outputs = model(torch.tensor(features).float())\n","            # Ensure labels are long type\n","            loss = criterion(outputs, torch.tensor(labels).long())\n","            loss.backward()\n","            optimizer.step()\n","            train_loss += loss.item()\n","\n","        # Validation phase\n","        model.eval()\n","        validation_loss = 0\n","        for features, labels in test_loader:\n","            outputs = model(torch.tensor(features).float())\n","            loss = criterion(outputs, torch.tensor(labels).long())\n","            validation_loss += loss.item()\n","\n","        print(f'Epoch {epoch+1}, Training Loss: {train_loss / len(train_loader)}, Validation Loss: {validation_loss / len(test_loader)}')"]},{"cell_type":"code","execution_count":196,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":138831,"status":"ok","timestamp":1715390606602,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"gNpxg0ANr-s5","outputId":"c7e9ce0c-3653-4e9a-b3ee-2b9d88da4364"},"outputs":[{"name":"stderr","output_type":"stream","text":["C:\\Users\\Nickk\\AppData\\Local\\Temp\\ipykernel_13264\\1321601871.py:12: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n","  outputs = model(torch.tensor(features).float())\n","C:\\Users\\Nickk\\AppData\\Local\\Temp\\ipykernel_13264\\1321601871.py:14: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n","  loss = criterion(outputs, torch.tensor(labels).long())\n","C:\\Users\\Nickk\\AppData\\Local\\Temp\\ipykernel_13264\\1321601871.py:23: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n","  outputs = model(torch.tensor(features).float())\n","C:\\Users\\Nickk\\AppData\\Local\\Temp\\ipykernel_13264\\1321601871.py:24: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n","  loss = criterion(outputs, torch.tensor(labels).long())\n"]},{"name":"stdout","output_type":"stream","text":["Epoch 1, Training Loss: 14.161600421387472, Validation Loss: 8.646175272324506\n","Epoch 2, Training Loss: 8.468926938374837, Validation Loss: 8.906991397633272\n","Epoch 3, Training Loss: 8.42033219749545, Validation Loss: 9.14518429251278\n","Epoch 4, Training Loss: 8.428513119544512, Validation Loss: 9.366180943507775\n","Epoch 5, Training Loss: 8.350075204872791, Validation Loss: 9.573424189698462\n","Epoch 6, Training Loss: 8.334989405267033, Validation Loss: 9.770331466899199\n","Epoch 7, Training Loss: 8.404972361340935, Validation Loss: 9.958629150016636\n","Epoch 8, Training Loss: 8.490517691624017, Validation Loss: 10.354363404068293\n","Epoch 9, Training Loss: 8.405202573611412, Validation Loss: 10.315738350737329\n","Epoch 10, Training Loss: 8.300552919175889, Validation Loss: 10.487916422825233\n"]}],"source":["train_model(model, X_train, y_train, X_test, y_test)"]},{"cell_type":"code","execution_count":197,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["SongRecommender(\n","  (fc1): Linear(in_features=1, out_features=128, bias=True)\n","  (fc2): Linear(in_features=128, out_features=256, bias=True)\n","  (fc3): Linear(in_features=256, out_features=128, bias=True)\n","  (output): Linear(in_features=128, out_features=4855, bias=True)\n",")\n"]}],"source":["print(model)"]},{"cell_type":"code","execution_count":198,"metadata":{"executionInfo":{"elapsed":138,"status":"ok","timestamp":1715390703802,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"1i30qNdCr-s5"},"outputs":[],"source":["# save the model\n","torch.save(model.state_dict(), './model.pth')"]},{"cell_type":"code","execution_count":199,"metadata":{"executionInfo":{"elapsed":1,"status":"ok","timestamp":1715390703994,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"iCAMAEj5r-s5"},"outputs":[],"source":["# load the model\n","model = SongRecommender()"]},{"cell_type":"code","execution_count":200,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"elapsed":292,"status":"ok","timestamp":1715390704465,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"jjkMlHqDHS-Z","outputId":"469d9395-1aa0-4695-98e0-ee867cd31e6b"},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>artist_name</th>\n","      <th>title</th>\n","      <th>tags</th>\n","      <th>listeners</th>\n","      <th>playcount</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Sex Pistols</td>\n","      <td>100 Club 1996 ''We Love You Beatles'' - Live</td>\n","      <td>The Beatles, title is a full sentence</td>\n","      <td>0.000070</td>\n","      <td>0.000009</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Baby Rasta &amp; Gringo</td>\n","      <td>Yo Quiero Contigo</td>\n","      <td>Reggaeton, alexis y fido, Eliana, mis videos, ...</td>\n","      <td>0.003978</td>\n","      <td>0.000729</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Bedrock</td>\n","      <td>Emerald</td>\n","      <td>dance</td>\n","      <td>0.000397</td>\n","      <td>0.000097</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>Alicia Keys</td>\n","      <td>Karma</td>\n","      <td>rnb, soul, Alicia Keys, female vocalists, Karma</td>\n","      <td>0.102103</td>\n","      <td>0.044359</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>Joanna Connor</td>\n","      <td>Money Blues</td>\n","      <td>guitar girl, blues</td>\n","      <td>0.000175</td>\n","      <td>0.000043</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["           artist_name                                         title  \\\n","0          Sex Pistols  100 Club 1996 ''We Love You Beatles'' - Live   \n","1  Baby Rasta & Gringo                             Yo Quiero Contigo   \n","4              Bedrock                                       Emerald   \n","6          Alicia Keys                                         Karma   \n","7        Joanna Connor                                   Money Blues   \n","\n","                                                tags  listeners  playcount  \n","0              The Beatles, title is a full sentence   0.000070   0.000009  \n","1  Reggaeton, alexis y fido, Eliana, mis videos, ...   0.003978   0.000729  \n","4                                              dance   0.000397   0.000097  \n","6    rnb, soul, Alicia Keys, female vocalists, Karma   0.102103   0.044359  \n","7                                 guitar girl, blues   0.000175   0.000043  "]},"execution_count":200,"metadata":{},"output_type":"execute_result"}],"source":["df.loc[:, ['artist_name', 'title', 'tags', 'listeners', 'playcount']].head()"]},{"cell_type":"code","execution_count":201,"metadata":{"executionInfo":{"elapsed":186,"status":"ok","timestamp":1715390829249,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"-W3SwScgr-s5"},"outputs":[],"source":["def label_encode_data(df):\n","    df = df.copy(deep=True)\n","    # Encode categorical data\n","    label_encoders = {}\n","    unknown_label = 'unknown'  # Define an unknown label\n","\n","    for column in ['tags']:\n","        le = LabelEncoder()\n","\n","        # Get unique categories plus an 'unknown' category\n","        unique_categories = df[column].unique().tolist()\n","        # Add 'unknown' to the list of categories\n","        unique_categories.append(unknown_label)\n","\n","        # Fit the LabelEncoder to these categories\n","        le.fit(unique_categories)\n","        df[column] = le.transform(df[column].astype(str))\n","\n","        # Store the encoder\n","        label_encoders[column] = le\n","\n","    return df, label_encoders\n","\n","\n","def recommend_songs(model, user_data, full_data=df, train_encoder=label_encoder_training):\n","    model.eval()\n","    full_data = full_data.copy(deep=True)\n","    with torch.no_grad():\n","\n","        # Create a DataFrame with feature names\n","        text_features = user_data.loc[:, ['tags']]\n","\n","        # encoding using concatenated full dataset and evaluation set for inference\n","        df = full_data.loc[:, ['tags']]\n","        text_features_full = df.loc[:, ['tags']]\n","\n","        all_labels = pd.concat([text_features, text_features_full], axis=0)\n","        all_labels.reset_index(drop=True, inplace=True)\n","\n","        # Get the encoder based on all categorical features\n","        _, label_encoders = label_encode_data(all_labels)\n","\n","        # encode the user data\n","        label_encoded_data = text_features.copy(deep=True)\n","        for column in ['tags']:\n","            label_encoded_data[column] = label_encoders[column].transform(\n","                label_encoded_data[column].astype(str))\n","\n","        # converting label_encoded_data into a torch tensor as float dtype\n","        all_features = torch.tensor(\n","            label_encoded_data.to_numpy()).float().unsqueeze(0)\n","\n","        # Make predictions\n","        predictions = model(all_features)\n","\n","        predictions = predictions[0, :5, :]  # selecting top 5\n","        for row in predictions:\n","            top_5_values, top_5_indices = row.topk(5)\n","            recommended_song_ids = top_5_indices.squeeze().tolist()\n","\n","        try:\n","            recommended_titles = label_encoders['title'].inverse_transform(\n","                recommended_song_ids)\n","            recommended_tags = label_encoders['tags'].inverse_transform(\n","                recommended_song_ids)\n","        except:\n","            recommended_titles = train_encoder['title'].inverse_transform(\n","                recommended_song_ids)[:5]\n","            recommended_tags = train_encoder['tags'].inverse_transform(recommended_song_ids)[\n","                :5]\n","\n","        return list(zip(recommended_titles, recommended_tags))"]},{"cell_type":"code","execution_count":202,"metadata":{},"outputs":[],"source":["user_preferences = pd.read_csv(\"..\\\\..\\\\db\\\\data\\\\user_preferences.csv\")\n","user_preferences.drop('level_0', axis=1, inplace=True)"]},{"cell_type":"code","execution_count":203,"metadata":{"executionInfo":{"elapsed":180,"status":"ok","timestamp":1715388203675,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"o89SnI9-r-s5"},"outputs":[],"source":["import requests\n","\n","\n","def fetch_song_data(api_key, artist_name, track_name):\n","    url = \"http://ws.audioscrobbler.com/2.0/\"\n","    params = {\n","        'method': 'track.getInfo',\n","        'api_key': api_key,\n","        'artist': artist_name,\n","        'track': track_name,\n","        'format': 'json'\n","    }\n","    response = requests.get(url, params=params)\n","    print(response.content)\n","    return response.json() if response.status_code == 200 else {}\n","\n","\n","def parse_song_data(song_data):\n","    if song_data and 'track' in song_data:\n","        track = song_data['track']\n","        return {\n","            'artist_name': track['artist']['name'],\n","            'tags': ', '.join([tag['name'] for tag in track.get('toptags', {}).get('tag', [])]),\n","            'duration': float(track.get('duration', 0)),\n","            'listeners': int(track.get('listeners', 0)),\n","            'playcount': int(track.get('playcount', 0)),\n","            'album': track.get('album', {}).get('title', 'Unknown')\n","        }\n","    return {}"]},{"cell_type":"markdown","metadata":{"id":"xm89R7m8Xh-G"},"source":["# Importing the User Data and Making Recommendations\n","Let's make recommendations using the sample user's preferences."]},{"cell_type":"code","execution_count":204,"metadata":{"executionInfo":{"elapsed":171,"status":"ok","timestamp":1715388938035,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"ufzxTjO3YeXT"},"outputs":[],"source":["import numpy as np"]},{"cell_type":"code","execution_count":205,"metadata":{"executionInfo":{"elapsed":379,"status":"ok","timestamp":1715388938616,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"y3hXSZHnXhA7"},"outputs":[],"source":["user_preferences = pd.read_csv(\"..\\\\..\\\\db\\\\data\\\\user_preferences.csv\")\n","user_preferences.drop('level_0', axis=1, inplace=True)"]},{"cell_type":"code","execution_count":206,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":293},"executionInfo":{"elapsed":232,"status":"ok","timestamp":1715388938847,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"OU7aafogGu2t","outputId":"27703100-6719-44ca-8429-cec20e133bbe"},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>songID</th>\n","      <th>artist</th>\n","      <th>song</th>\n","      <th>link</th>\n","      <th>text</th>\n","      <th>userID</th>\n","      <th>listeners</th>\n","      <th>playcount</th>\n","      <th>tags</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>19632</td>\n","      <td>Toto</td>\n","      <td>You Are The Flower</td>\n","      <td>/t/toto/you+are+the+flower_20139737.html</td>\n","      <td>You never lose a minute, if in it there is lov...</td>\n","      <td>0</td>\n","      <td>25307</td>\n","      <td>87344</td>\n","      <td>AOR, rock, soft rock, 70s, pop rock</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>19632</td>\n","      <td>Toto</td>\n","      <td>You Are The Flower</td>\n","      <td>/t/toto/you+are+the+flower_20139737.html</td>\n","      <td>You never lose a minute, if in it there is lov...</td>\n","      <td>0</td>\n","      <td>25307</td>\n","      <td>87344</td>\n","      <td>AOR, rock, soft rock, 70s, pop rock</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>25284</td>\n","      <td>Billie Holiday</td>\n","      <td>I Only Have Eyes For You</td>\n","      <td>/b/billie+holiday/i+only+have+eyes+for+you_200...</td>\n","      <td>My love must be a kind of blind love,  \\r\\nI c...</td>\n","      <td>0</td>\n","      <td>60356</td>\n","      <td>178625</td>\n","      <td>jazz, female vocal, vocal jazz, blues, female ...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>43594</td>\n","      <td>Michael Bolton</td>\n","      <td>Only A Woman Like You</td>\n","      <td>/m/michael+bolton/only+a+woman+like+you_101792...</td>\n","      <td>It's beautiful, your honesty  \\r\\nYou cry when...</td>\n","      <td>0</td>\n","      <td>4595</td>\n","      <td>13266</td>\n","      <td>Ballad, romantic, soul, pop, cool</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>50200</td>\n","      <td>Rascal Flatts</td>\n","      <td>The Day Before You</td>\n","      <td>/r/rascal+flatts/the+day+before+you_10238985.html</td>\n","      <td>I had all but given up  \\r\\nOn finding the one...</td>\n","      <td>0</td>\n","      <td>22077</td>\n","      <td>86012</td>\n","      <td>country, rock, contemporary country, seen live...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   songID          artist                      song  \\\n","0   19632            Toto        You Are The Flower   \n","1   19632            Toto        You Are The Flower   \n","2   25284  Billie Holiday  I Only Have Eyes For You   \n","3   43594  Michael Bolton     Only A Woman Like You   \n","4   50200   Rascal Flatts        The Day Before You   \n","\n","                                                link  \\\n","0           /t/toto/you+are+the+flower_20139737.html   \n","1           /t/toto/you+are+the+flower_20139737.html   \n","2  /b/billie+holiday/i+only+have+eyes+for+you_200...   \n","3  /m/michael+bolton/only+a+woman+like+you_101792...   \n","4  /r/rascal+flatts/the+day+before+you_10238985.html   \n","\n","                                                text  userID  listeners  \\\n","0  You never lose a minute, if in it there is lov...       0      25307   \n","1  You never lose a minute, if in it there is lov...       0      25307   \n","2  My love must be a kind of blind love,  \\r\\nI c...       0      60356   \n","3  It's beautiful, your honesty  \\r\\nYou cry when...       0       4595   \n","4  I had all but given up  \\r\\nOn finding the one...       0      22077   \n","\n","   playcount                                               tags  \n","0      87344                AOR, rock, soft rock, 70s, pop rock  \n","1      87344                AOR, rock, soft rock, 70s, pop rock  \n","2     178625  jazz, female vocal, vocal jazz, blues, female ...  \n","3      13266                  Ballad, romantic, soul, pop, cool  \n","4      86012  country, rock, contemporary country, seen live...  "]},"execution_count":206,"metadata":{},"output_type":"execute_result"}],"source":["user_preferences.head()"]},{"cell_type":"code","execution_count":216,"metadata":{"executionInfo":{"elapsed":166,"status":"ok","timestamp":1715388941345,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"jVeX1VX9YXj9"},"outputs":[],"source":["sample_user = user_preferences.where(user_preferences['userID'] == np.random.randint(*(0, 9))).dropna()"]},{"cell_type":"markdown","metadata":{"id":"Yu9C90x4Y9lY"},"source":["Hopefully, the neural network makes recommendations of artists that fall into the top 5 for the user."]},{"cell_type":"code","execution_count":217,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":148,"status":"ok","timestamp":1715391053989,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"wnANm0R3YrVa","outputId":"9f890890-055b-41b9-c630-8986442dcaf9"},"outputs":[{"data":{"text/plain":["tags\n","romantic, Love, pop, easy listening, michael bolton                    5.0\n","pop                                                                    4.0\n","loneliness after dusk, Madonna, demo, never let you go, rebel heart    3.0\n","alternative rock, pop, alternative, pop rock, OneRepublic              3.0\n","pop, boybands, dance, backstreet boys, love at first listen            3.0\n","dtype: float64"]},"execution_count":217,"metadata":{},"output_type":"execute_result"}],"source":["top_5 = sample_user.groupby('tags').count().mean(axis=1).sort_values(ascending=False)[:5]\n","top_5"]},{"cell_type":"code","execution_count":218,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":397,"status":"ok","timestamp":1715390877851,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"m9k_0jVCr-s5","outputId":"9610f359-503f-4b06-d36a-f07d4a79a687"},"outputs":[{"name":"stdout","output_type":"stream","text":["#### RECOMMENDATIONS ###\n"]},{"data":{"text/plain":["[('Blueberry Hill', 'Hip-Hop, hip hop, rap, underground hip-hop, political'),\n"," ('Prognosis', 'mpb, pop, 80s, latin, California'),\n"," ('Money Blues', 'hip hop, rap, Hip-Hop, LL Cool J, Timbaland'),\n"," ('Facedown', 'blues, Old Blues, guitar, slide guitar, gospel'),\n"," ('CB4', 'Kanye West, rnb, 00s, janet jackson, pop')]"]},"execution_count":218,"metadata":{},"output_type":"execute_result"}],"source":["print(\"#### RECOMMENDATIONS ###\")\n","song_recs = recommend_songs(model, sample_user, df) # requires giving main song df for finding embeddings\n","song_recs"]},{"cell_type":"code","execution_count":210,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":276},"executionInfo":{"elapsed":219,"status":"ok","timestamp":1715390907498,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"Klx_gv2v4i6x","outputId":"c2b4dd3e-a48e-411b-9a2c-7963ef922075"},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>title</th>\n","      <th>release</th>\n","      <th>artist_name</th>\n","      <th>duration</th>\n","      <th>artist_familiarity</th>\n","      <th>artist_hotttnesss</th>\n","      <th>year</th>\n","      <th>listeners</th>\n","      <th>playcount</th>\n","      <th>tags</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["Empty DataFrame\n","Columns: [title, release, artist_name, duration, artist_familiarity, artist_hotttnesss, year, listeners, playcount, tags]\n","Index: []"]},"execution_count":210,"metadata":{},"output_type":"execute_result"}],"source":["# finding the song artist in the main dataset\n","df.loc[df['title'].isin(song_recs)]"]},{"cell_type":"code","execution_count":211,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"elapsed":428,"status":"ok","timestamp":1715391084128,"user":{"displayName":"Bryan Alexis Ambriz","userId":"16154433038435291108"},"user_tz":420},"id":"AmOx_KyQRALU","outputId":"2010ab1c-ea64-4a00-8dc6-186d584d4868"},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>songID</th>\n","      <th>artist</th>\n","      <th>song</th>\n","      <th>link</th>\n","      <th>text</th>\n","      <th>userID</th>\n","      <th>listeners</th>\n","      <th>playcount</th>\n","      <th>tags</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["Empty DataFrame\n","Columns: [songID, artist, song, link, text, userID, listeners, playcount, tags]\n","Index: []"]},"execution_count":211,"metadata":{},"output_type":"execute_result"}],"source":["# lets see how it compares to sample user\n","sample_user.where(sample_user['artist'].isin(top_5.index)).dropna()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"rqkW9oKPRe05"},"outputs":[],"source":[]}],"metadata":{"colab":{"collapsed_sections":["rTCKoervr-s3","9XEtFgNrr-s4"],"provenance":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}