Clonkz's picture
fixed errors, added logs to debug
bbaa90e
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.sparse import csr_matrix
from rapidfuzz import process, fuzz
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os
import logging
import psutil
# Configure logging
logging.basicConfig(level=logging.INFO)
logging.info("Application started")
def log_memory_usage():
process = psutil.Process()
memory_info = process.memory_info()
logging.info(f"Memory Usage: {memory_info.rss / 1024 ** 2:.2f} MB")
# Spotify API setup
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
client_id=os.environ['sp_client_id'],
client_secret=os.environ['sp_client_secret']))
# Define features for scaling and calculations
features = ['popularity', 'danceability', 'energy', 'loudness', 'speechiness',
'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
default_weights = [1/len(features)] * len(features)
# Read and preprocess the data
logging.info("Reading and preprocessing track data")
tracks_data = pd.read_csv('filtered_songs.csv')
tracks_data = tracks_data[(tracks_data['popularity'] > 40) & (tracks_data['instrumentalness'] <= 0.85)]
logging.info("Track data loaded and processed")
log_memory_usage()
# Function to fetch a song from Spotify
def get_song_from_spotify(song_name, artist_name=None):
try:
search_query = song_name if not artist_name else f"{song_name} artist:{artist_name}"
logging.info(f"Searching Spotify for: {search_query}")
results = sp.search(q=search_query, limit=1, type='track')
if results['tracks']['items']:
track = results['tracks']['items'][0]
logging.info(f"Found track on Spotify: {track['name']} by {', '.join(artist['name'] for artist in track['artists'])}")
audio_features = sp.audio_features(track['id'])[0]
song_details = {
'id': track['id'],
'name': track['name'],
'popularity': track['popularity'],
'duration_ms': track['duration_ms'],
'explicit': int(track['explicit']),
'artists': ', '.join([artist['name'] for artist in track['artists']]),
'danceability': audio_features['danceability'],
'energy': audio_features['energy'],
'key': audio_features['key'],
'loudness': audio_features['loudness'],
'mode': audio_features['mode'],
'speechiness': audio_features['speechiness'],
'acousticness': audio_features['acousticness'],
'instrumentalness': audio_features['instrumentalness'],
'liveness': audio_features['liveness'],
'valence': audio_features['valence'],
'tempo': audio_features['tempo'],
'time_signature': audio_features['time_signature'],
}
return song_details
else:
logging.warning(f"No results found on Spotify for: {search_query}")
return None
except Exception as e:
logging.error(f"Error fetching song from Spotify: {e}")
return None
# Enhanced Fuzzy Matching Function
def enhanced_fuzzy_matching(song_name, artist_name, df):
logging.info(f"Performing fuzzy matching for: {song_name}, {artist_name}")
# Existing code
combined_query = f"{song_name} {artist_name}".strip()
df['combined'] = df['name'] + ' ' + df['artists']
matches = process.extractOne(combined_query, df['combined'], scorer=fuzz.token_sort_ratio)
return df.index[df['combined'] == matches[0]].tolist()[0] if matches else None
# Function to apply the selected scaler and calculate weighted cosine similarity
def calculate_weighted_cosine_similarity(input_song_index, weights, num_songs_to_output, tracks_data, scaler_choice):
logging.info("Calculating weighted cosine similarity")
# Apply the selected scaler
if scaler_choice == 'Standard Scaler':
scaler = StandardScaler()
else: # MinMaxScaler
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(tracks_data[features]) * weights
tracks_sparse = csr_matrix(scaled_features)
# Calculate cosine similarities
cosine_similarities = cosine_similarity(tracks_sparse[input_song_index], tracks_sparse).flatten()
similar_song_indices = np.argsort(-cosine_similarities)[1:num_songs_to_output+1]
return similar_song_indices
# Function to recommend songs
def recommend_songs_interface(song_name, artist_name, num_songs_to_output, scaler_choice, tracks_data, *input_weights):
num_songs_to_output = int(num_songs_to_output)
weights = np.array([float(weight) for weight in input_weights]) if input_weights else default_weights
weights /= np.sum(weights) # Normalize weights
song_index = enhanced_fuzzy_matching(song_name, artist_name, tracks_data)
if song_index is not None:
similar_indices = calculate_weighted_cosine_similarity(song_index, weights, num_songs_to_output, tracks_data, scaler_choice)
similar_songs = tracks_data.iloc[similar_indices][['name', 'artists']]
return similar_songs
else:
return pd.DataFrame(columns=['name', 'artists'])
# Gradio interface setup
logging.info("Setting up Gradio interface")
description = "Enter a song name and artist name (optional) to get song recommendations. Adjust the feature weights using the sliders. The system will automatically normalize the weights."
inputs = [
gr.components.Textbox(label="Song Name", placeholder="Enter a song name..."),
gr.components.Textbox(label="Artist Name (optional)", placeholder="Enter artist name (if known)..."),
gr.components.Number(label="Number of Songs to Output", value=5),
gr.components.Dropdown(choices=["Standard Scaler", "MinMax Scaler"], label="Select Scaler", value="Standard Scaler")
]
# Add sliders for each feature weight
for feature in features:
inputs.append(gr.components.Slider(minimum=0, maximum=1, value=1/len(features), label=f"Weight for {feature}"))
# Gradio interface setup
iface = gr.Interface(
fn=lambda song_name, artist_name, num_songs_to_output, scaler_choice, *input_weights: recommend_songs_interface(song_name, artist_name, num_songs_to_output, scaler_choice, tracks_data, *input_weights),
inputs=inputs,
outputs=gr.components.Dataframe(),
title="Song Recommender",
description=description
)
# Run the Gradio app
if __name__ == "__main__":
logging.info("Setting up Gradio interface")
logging.info("Launching Gradio interface")
iface.launch()
logging.info("Application finished")