Spaces:

brendabor
/

SpotifyProject

Runtime error

App Files Files Community

brendabor commited on Dec 16, 2023

Commit

78e9b61

1 Parent(s): 28336f5

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -48

app.py CHANGED Viewed

@@ -20,54 +20,58 @@ tokenizer = joblib.load('tokenizer.pkl')
 # Load the dataset
 df = pd.read_csv('df1.csv')
-# Load the scaler for KNN
 scaler_knn = StandardScaler()
 # Function for hybrid recommendation
-def hybrid_recommendation(song_index):
-    # Get data for the query song
-    query_data = df.iloc[song_index]
-    # Process the lyrics for emotion prediction using LSTM
-    sequence = tokenizer.texts_to_sequences([query_data['lyrics']])
-    padded_sequence = pad_sequences(sequence, maxlen=50)
-    predicted_emotion = emotion_model.predict(padded_sequence).flatten()
-    # Preprocess for KNN
-    audio_features_knn = df[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
-                              'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
-                              'duration_ms', 'time_signature']].values.reshape(1, -1)
-    mood_cats = df[['mood_cats']]
-    mood_cats_df = pd.DataFrame(mood_cats)
-    audio_features_scaled_knn = scaler_knn.fit_transform(audio_features_knn)
-    audio_features_df_knn = pd.DataFrame(audio_features_scaled_knn, columns=audio_features_knn.columns.tolist())
-    #audio_features_df = pd.DataFrame(audio_features_scaled_knn, columns=audio_features_knn.columns)
-    # Combine mood and audio features
-    combined_features = pd.concat([mood_cats_df, audio_features_df_knn], axis=1)
-    # Predict using the KNN model
-    knn_recommendations = knn_model.kneighbors(combined_features, n_neighbors=5, return_distance=False)[0]
-    # Mapping emotion predictions to encoded categories
-    emotion_mapping = {0: 'happy', 1: 'sad', 2: 'calm', 3: 'anger'}
-    encoded_emotion = pd.Series(predicted_emotion).idxmax()
-    emotion_category = emotion_mapping[encoded_emotion]
-    # Compute cosine similarity for content-based recommendation
-    features_for_similarity = df[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
-                                  'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
-                                  'duration_ms', 'time_signature']].values
-    scaler_cb = StandardScaler()
-    audio_features_scaled_cb = scaler_cb.fit_transform(features_for_similarity)
-    # Combine mood and audio features
-    combined_features_cb = np.concatenate([np.array([emotion_category]), audio_features_scaled_cb])
-    cosine_similarities = cosine_similarity([combined_features_cb])
-    # Combine recommendations from both models
-    combined_indices = np.argsort(-np.concatenate([knn_recommendations, cosine_similarities]))
-    hybrid_recs_sorted = combined_indices[:5]  # Select top 5 recommendations
-    return hybrid_recs_sorted
 # Set up the title of the app
 st.title('Hybrid Recommender App')
@@ -75,10 +79,22 @@ st.title('Hybrid Recommender App')
 # Get song index from user input
 song_index_to_recommend = st.number_input('Enter song index:', min_value=0, max_value=len(df)-1, value=0)
 # Get hybrid recommendations
 hybrid_recs = hybrid_recommendation(song_index_to_recommend)
-# Display the recommendations
 st.write("Hybrid Recommendations:")
-for index in hybrid_recs:
-    st.write(f"Song Index: {index}, Title: {df.iloc[index]['title']}, Artist: {df.iloc[index]['artist']}")

 # Load the dataset
 df = pd.read_csv('df1.csv')
+# Preprocess for content-based
+audio_feature_columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
+       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
+       'duration_ms', 'time_signature']
+audio_features = df[audio_feature_columns]
+mood_cats = df[['mood_cats']]
+mood_cats_df = pd.DataFrame(mood_cats)
+# Normalize audio features for content-based
+scaler_cb = StandardScaler()
+audio_features_scaled_cb = scaler_cb.fit_transform(audio_features)
+audio_features_df_cb = pd.DataFrame(audio_features_scaled_cb, columns=audio_feature_columns)
+combined_features_cb = pd.concat([mood_cats, audio_features_df_cb], axis=1)
+# Preprocessing for KNN
 scaler_knn = StandardScaler()
+audio_features_scaled_knn = scaler_knn.fit_transform(audio_features)
+audio_features_df_knn = pd.DataFrame(audio_features_scaled_knn, columns=audio_feature_columns)
+combined_features_knn = pd.concat([mood_cats_df, audio_features_df_knn], axis=1)
+# Function for content-based recommendation
+def recommend_cont(song_index, num_recommendations=5):
+    song_similarity = similarity_matrix[song_index]
+    # Get indices and similarity scores of top similar songs
+    similar_songs = sorted(list(enumerate(song_similarity)), key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
+    recommended_song_indices = [idx for idx, similarity in similar_songs]
+    recommended_songs = df.iloc[recommended_song_indices].copy()
+    recommended_songs['score'] = [similarity for idx, similarity in similar_songs]
+    return recommended_songs
+# Function for KNN-based recommendation
+def recommend_knn(query_index, n_recommendations=5):
+    distances, indices = knn_model.kneighbors(combined_features_knn.iloc[query_index].values.reshape(1, -1), n_neighbors=n_recommendations)
+    recommended_songs = df.iloc[indices.flatten()].copy()
+    # Convert distances to scores
+    recommended_songs['score'] = 1 / (1 + distances.flatten())  # Inverse of distance
+    return recommended_songs
 # Function for hybrid recommendation
+def hybrid_recommendation(song_index, top_n=10):
+    # Get recommendations from both models
+    content_based_recs = recommend_cont(song_index, top_n)
+    knn_based_recs = recommend_knn(song_index, top_n)
+    # Combine recommendations
+    combined_recs = pd.concat([content_based_recs, knn_based_recs])
+    # Group by song index (or identifier) and average scores
+    hybrid_recs = combined_recs.groupby(combined_recs.index).mean().sort_values(by='score', ascending=False).head(top_n)
+    return hybrid_recs
 # Set up the title of the app
 st.title('Hybrid Recommender App')
 # Get song index from user input
 song_index_to_recommend = st.number_input('Enter song index:', min_value=0, max_value=len(df)-1, value=0)
+# Get lyrics for emotion prediction
+lyrics = df.iloc[song_index_to_recommend]['lyrics']
+# Process the lyrics
+sequence = tokenizer.texts_to_sequences([lyrics])
+padded_sequence = pad_sequences(sequence, maxlen=50)  # Adjust the maxlen to match the expected input size
+emotion = emotion_model.predict(padded_sequence).flatten()
+# Combine emotion and audio features for recommendation
+combined_features = np.concatenate([emotion, audio_features_scaled_knn[song_index_to_recommend]])
 # Get hybrid recommendations
 hybrid_recs = hybrid_recommendation(song_index_to_recommend)
+# Display the predicted emotion and recommendations
+st.write(f"Predicted Emotion: {emotion}")
 st.write("Hybrid Recommendations:")
+for index in hybrid_recs.index:
+    st.write(f"Song Index: {index}, Title: {df.iloc[index]['title']}, Artist: {df.iloc[index]['artist']}, Score: {hybrid_recs.loc[index, 'score']}")