Spaces:
Runtime error
Runtime error
!pip install python-dotenv | |
# imports | |
import pandas as pd | |
import h5py | |
import os | |
from sqlalchemy import create_engine | |
import requests | |
import time | |
from dotenv import load_dotenv | |
import pandas as pd | |
df = pd.read_csv('/content/drive/MyDrive/CMPE-258: Team Neurobytes/Neurobytes/db/data/music_data.csv') | |
df.dropna(inplace=True) | |
import pandas as pd | |
import torch | |
from torch.utils.data import DataLoader | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from sklearn.preprocessing import LabelEncoder, MinMaxScaler | |
from sklearn.model_selection import train_test_split | |
import torch.optim as optim | |
# Encode categorical data | |
label_encoders = {} | |
unknown_label = 'unknown' # Define an unknown label | |
for column in ['artist_name', 'tags', 'title']: | |
le = LabelEncoder() | |
# Get unique categories plus an 'unknown' category | |
unique_categories = df[column].unique().tolist() | |
# Add 'unknown' to the list of categories | |
unique_categories.append(unknown_label) | |
# Fit the LabelEncoder to these categories | |
le.fit(unique_categories) | |
df[column] = le.transform(df[column].astype(str)) | |
# Store the encoder | |
label_encoders[column] = le | |
# Normalize numerical features | |
scaler = MinMaxScaler() | |
df[['duration', 'listeners', 'playcount']] = scaler.fit_transform( | |
df[['duration', 'listeners', 'playcount']]) | |
# Split data into features and target | |
X = df[['artist_name', 'tags', 'duration', 'listeners', 'playcount']] | |
y = df['title'] | |
# Split the dataset into training and testing sets | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, y, test_size=0.2, random_state=42) | |
class SongRecommender(nn.Module): | |
def __init__(self): | |
super(SongRecommender, self).__init__() | |
self.fc1 = nn.Linear(5, 128) # Adjust input features if needed | |
self.fc2 = nn.Linear(128, 256) | |
self.fc3 = nn.Linear(256, 128) | |
# Output size = number of unique titles including 'unknown' | |
# Add 1 for the 'unknown' label | |
self.output = nn.Linear(128, len(y.unique()) + 1) | |
def forward(self, x): | |
x = F.relu(self.fc1(x)) | |
x = F.relu(self.fc2(x)) | |
x = F.relu(self.fc3(x)) | |
x = self.output(x) | |
return x | |
model = SongRecommender() | |
optimizer = optim.Adam(model.parameters(), lr=0.001) | |
criterion = nn.CrossEntropyLoss() | |
def train_model(model, X_train, y_train, X_test, y_test): | |
train_loader = DataLoader( | |
list(zip(X_train.values.astype(float), y_train)), batch_size=50, shuffle=True) | |
test_loader = DataLoader( | |
list(zip(X_test.values.astype(float), y_test)), batch_size=50, shuffle=False) | |
model.train() | |
for epoch in range(50): # Number of epochs | |
train_loss = 0 | |
for features, labels in train_loader: | |
optimizer.zero_grad() | |
outputs = model(torch.tensor(features).float()) | |
# Ensure labels are long type | |
loss = criterion(outputs, torch.tensor(labels).long()) | |
loss.backward() | |
optimizer.step() | |
train_loss += loss.item() | |
# Validation phase | |
model.eval() | |
validation_loss = 0 | |
for features, labels in test_loader: | |
outputs = model(torch.tensor(features).float()) | |
loss = criterion(outputs, torch.tensor(labels).long()) | |
validation_loss += loss.item() | |
print(f'Epoch {epoch+1}, Training Loss: {train_loss / len(train_loader)}, Validation Loss: {validation_loss / len(test_loader)}') | |
train_model(model, X_train, y_train, X_test, y_test) | |
# save the model | |
torch.save(model.state_dict(), 'model.pth') | |
# load the model | |
model = SongRecommender() | |
def recommend_songs(model, input_features): | |
model.eval() | |
print(input_features) | |
with torch.no_grad(): | |
try: | |
artist_index = label_encoders['artist_name'].transform( | |
[input_features['artist_name']]) | |
except ValueError: | |
artist_index = label_encoders['artist_name'].transform(['unknown']) | |
try: | |
tags_index = label_encoders['tags'].transform( | |
[input_features['tags']]) | |
except ValueError: | |
tags_index = label_encoders['tags'].transform(['unknown']) | |
# Create a DataFrame with feature names | |
scaled_features = pd.DataFrame( | |
[[input_features['duration'], input_features['listeners'], | |
input_features['playcount']]], | |
columns=['duration', 'listeners', 'playcount'] | |
) | |
scaled_features = scaler.transform(scaled_features)[0] | |
features = torch.tensor( | |
[artist_index[0], tags_index[0], *scaled_features]).float().unsqueeze(0) | |
predictions = model(features) | |
top_5_values, top_5_indices = predictions.topk(5) | |
recommended_song_ids = top_5_indices.squeeze().tolist() | |
return label_encoders['title'].inverse_transform(recommended_song_ids) | |
import requests | |
def fetch_song_data(api_key, artist_name, track_name): | |
url = "http://ws.audioscrobbler.com/2.0/" | |
params = { | |
'method': 'track.getInfo', | |
'api_key': api_key, | |
'artist': artist_name, | |
'track': track_name, | |
'format': 'json' | |
} | |
response = requests.get(url, params=params) | |
print(response.content) | |
return response.json() if response.status_code == 200 else {} | |
def parse_song_data(song_data): | |
if song_data and 'track' in song_data: | |
track = song_data['track'] | |
return { | |
'artist_name': track['artist']['name'], | |
'tags': ', '.join([tag['name'] for tag in track.get('toptags', {}).get('tag', [])]), | |
'duration': float(track.get('duration', 0)), | |
'listeners': int(track.get('listeners', 0)), | |
'playcount': int(track.get('playcount', 0)), | |
'album': track.get('album', {}).get('title', 'Unknown') | |
} | |
return {} | |
from dotenv import load_dotenv | |
import os | |
load_dotenv() | |
api_key = os.getenv('LASTFM_API_KEY') | |
artist_name = 'Lagy Gaga' | |
track_name = 'Poker Face' | |
# Fetch and parse song data | |
song_data = fetch_song_data(api_key, artist_name, track_name) | |
parsed_data = parse_song_data(song_data) | |
print(song_data) | |
# if the song is not found, or the tags column is empty, print a message | |
if not parsed_data or not parsed_data['tags']: | |
print("Song not found or tags not available.") | |
else: | |
recommend_songs(model, parsed_data) |