import gradio as gr import pandas as pd import numpy as np from sklearn.metrics.pairwise import cosine_similarity from sklearn.preprocessing import StandardScaler, MinMaxScaler from scipy.sparse import csr_matrix from rapidfuzz import process, fuzz import spotipy from spotipy.oauth2 import SpotifyClientCredentials import os import logging import psutil # Configure logging logging.basicConfig(level=logging.INFO) logging.info("Application started") def log_memory_usage(): process = psutil.Process() memory_info = process.memory_info() logging.info(f"Memory Usage: {memory_info.rss / 1024 ** 2:.2f} MB") # Spotify API setup sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials( client_id=os.environ['sp_client_id'], client_secret=os.environ['sp_client_secret'])) # Define features for scaling and calculations features = ['popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo'] default_weights = [1/len(features)] * len(features) # Read and preprocess the data logging.info("Reading and preprocessing track data") tracks_data = pd.read_csv('filtered_songs.csv') tracks_data = tracks_data[(tracks_data['popularity'] > 40) & (tracks_data['instrumentalness'] <= 0.85)] logging.info("Track data loaded and processed") log_memory_usage() # Function to fetch a song from Spotify def get_song_from_spotify(song_name, artist_name=None): try: search_query = song_name if not artist_name else f"{song_name} artist:{artist_name}" logging.info(f"Searching Spotify for: {search_query}") results = sp.search(q=search_query, limit=1, type='track') if results['tracks']['items']: track = results['tracks']['items'][0] logging.info(f"Found track on Spotify: {track['name']} by {', '.join(artist['name'] for artist in track['artists'])}") audio_features = sp.audio_features(track['id'])[0] song_details = { 'id': track['id'], 'name': track['name'], 'popularity': track['popularity'], 'duration_ms': track['duration_ms'], 'explicit': int(track['explicit']), 'artists': ', '.join([artist['name'] for artist in track['artists']]), 'danceability': audio_features['danceability'], 'energy': audio_features['energy'], 'key': audio_features['key'], 'loudness': audio_features['loudness'], 'mode': audio_features['mode'], 'speechiness': audio_features['speechiness'], 'acousticness': audio_features['acousticness'], 'instrumentalness': audio_features['instrumentalness'], 'liveness': audio_features['liveness'], 'valence': audio_features['valence'], 'tempo': audio_features['tempo'], 'time_signature': audio_features['time_signature'], } return song_details else: logging.warning(f"No results found on Spotify for: {search_query}") return None except Exception as e: logging.error(f"Error fetching song from Spotify: {e}") return None # Enhanced Fuzzy Matching Function def enhanced_fuzzy_matching(song_name, artist_name, df): logging.info(f"Performing fuzzy matching for: {song_name}, {artist_name}") # Existing code combined_query = f"{song_name} {artist_name}".strip() df['combined'] = df['name'] + ' ' + df['artists'] matches = process.extractOne(combined_query, df['combined'], scorer=fuzz.token_sort_ratio) return df.index[df['combined'] == matches[0]].tolist()[0] if matches else None # Function to apply the selected scaler and calculate weighted cosine similarity def calculate_weighted_cosine_similarity(input_song_index, weights, num_songs_to_output, tracks_data, scaler_choice): logging.info("Calculating weighted cosine similarity") # Apply the selected scaler if scaler_choice == 'Standard Scaler': scaler = StandardScaler() else: # MinMaxScaler scaler = MinMaxScaler() scaled_features = scaler.fit_transform(tracks_data[features]) * weights tracks_sparse = csr_matrix(scaled_features) # Calculate cosine similarities cosine_similarities = cosine_similarity(tracks_sparse[input_song_index], tracks_sparse).flatten() similar_song_indices = np.argsort(-cosine_similarities)[1:num_songs_to_output+1] return similar_song_indices # Function to recommend songs def recommend_songs_interface(song_name, artist_name, num_songs_to_output, scaler_choice, tracks_data, *input_weights): num_songs_to_output = int(num_songs_to_output) weights = np.array([float(weight) for weight in input_weights]) if input_weights else default_weights weights /= np.sum(weights) # Normalize weights song_index = enhanced_fuzzy_matching(song_name, artist_name, tracks_data) if song_index is not None: similar_indices = calculate_weighted_cosine_similarity(song_index, weights, num_songs_to_output, tracks_data, scaler_choice) similar_songs = tracks_data.iloc[similar_indices][['name', 'artists']] return similar_songs else: return pd.DataFrame(columns=['name', 'artists']) # Gradio interface setup logging.info("Setting up Gradio interface") description = "Enter a song name and artist name (optional) to get song recommendations. Adjust the feature weights using the sliders. The system will automatically normalize the weights." inputs = [ gr.components.Textbox(label="Song Name", placeholder="Enter a song name..."), gr.components.Textbox(label="Artist Name (optional)", placeholder="Enter artist name (if known)..."), gr.components.Number(label="Number of Songs to Output", value=5), gr.components.Dropdown(choices=["Standard Scaler", "MinMax Scaler"], label="Select Scaler", value="Standard Scaler") ] # Add sliders for each feature weight for feature in features: inputs.append(gr.components.Slider(minimum=0, maximum=1, value=1/len(features), label=f"Weight for {feature}")) # Gradio interface setup iface = gr.Interface( fn=lambda song_name, artist_name, num_songs_to_output, scaler_choice, *input_weights: recommend_songs_interface(song_name, artist_name, num_songs_to_output, scaler_choice, tracks_data, *input_weights), inputs=inputs, outputs=gr.components.Dataframe(), title="Song Recommender", description=description ) # Run the Gradio app if __name__ == "__main__": logging.info("Setting up Gradio interface") logging.info("Launching Gradio interface") iface.launch() logging.info("Application finished")