brendabor commited on
Commit
78e9b61
1 Parent(s): 28336f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -48
app.py CHANGED
@@ -20,54 +20,58 @@ tokenizer = joblib.load('tokenizer.pkl')
20
  # Load the dataset
21
  df = pd.read_csv('df1.csv')
22
 
23
- # Load the scaler for KNN
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  scaler_knn = StandardScaler()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # Function for hybrid recommendation
27
- def hybrid_recommendation(song_index):
28
- # Get data for the query song
29
- query_data = df.iloc[song_index]
30
-
31
- # Process the lyrics for emotion prediction using LSTM
32
- sequence = tokenizer.texts_to_sequences([query_data['lyrics']])
33
- padded_sequence = pad_sequences(sequence, maxlen=50)
34
- predicted_emotion = emotion_model.predict(padded_sequence).flatten()
35
-
36
- # Preprocess for KNN
37
- audio_features_knn = df[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
38
- 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
39
- 'duration_ms', 'time_signature']].values.reshape(1, -1)
40
- mood_cats = df[['mood_cats']]
41
- mood_cats_df = pd.DataFrame(mood_cats)
42
- audio_features_scaled_knn = scaler_knn.fit_transform(audio_features_knn)
43
- audio_features_df_knn = pd.DataFrame(audio_features_scaled_knn, columns=audio_features_knn.columns.tolist())
44
- #audio_features_df = pd.DataFrame(audio_features_scaled_knn, columns=audio_features_knn.columns)
45
- # Combine mood and audio features
46
- combined_features = pd.concat([mood_cats_df, audio_features_df_knn], axis=1)
47
-
48
- # Predict using the KNN model
49
- knn_recommendations = knn_model.kneighbors(combined_features, n_neighbors=5, return_distance=False)[0]
50
-
51
- # Mapping emotion predictions to encoded categories
52
- emotion_mapping = {0: 'happy', 1: 'sad', 2: 'calm', 3: 'anger'}
53
- encoded_emotion = pd.Series(predicted_emotion).idxmax()
54
- emotion_category = emotion_mapping[encoded_emotion]
55
-
56
- # Compute cosine similarity for content-based recommendation
57
- features_for_similarity = df[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
58
- 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
59
- 'duration_ms', 'time_signature']].values
60
- scaler_cb = StandardScaler()
61
- audio_features_scaled_cb = scaler_cb.fit_transform(features_for_similarity)
62
- # Combine mood and audio features
63
- combined_features_cb = np.concatenate([np.array([emotion_category]), audio_features_scaled_cb])
64
- cosine_similarities = cosine_similarity([combined_features_cb])
65
-
66
- # Combine recommendations from both models
67
- combined_indices = np.argsort(-np.concatenate([knn_recommendations, cosine_similarities]))
68
- hybrid_recs_sorted = combined_indices[:5] # Select top 5 recommendations
69
-
70
- return hybrid_recs_sorted
71
 
72
  # Set up the title of the app
73
  st.title('Hybrid Recommender App')
@@ -75,10 +79,22 @@ st.title('Hybrid Recommender App')
75
  # Get song index from user input
76
  song_index_to_recommend = st.number_input('Enter song index:', min_value=0, max_value=len(df)-1, value=0)
77
 
 
 
 
 
 
 
 
 
 
 
 
78
  # Get hybrid recommendations
79
  hybrid_recs = hybrid_recommendation(song_index_to_recommend)
80
 
81
- # Display the recommendations
 
82
  st.write("Hybrid Recommendations:")
83
- for index in hybrid_recs:
84
- st.write(f"Song Index: {index}, Title: {df.iloc[index]['title']}, Artist: {df.iloc[index]['artist']}")
 
20
  # Load the dataset
21
  df = pd.read_csv('df1.csv')
22
 
23
+ # Preprocess for content-based
24
+ audio_feature_columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
25
+ 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
26
+ 'duration_ms', 'time_signature']
27
+
28
+ audio_features = df[audio_feature_columns]
29
+ mood_cats = df[['mood_cats']]
30
+ mood_cats_df = pd.DataFrame(mood_cats)
31
+
32
+ # Normalize audio features for content-based
33
+ scaler_cb = StandardScaler()
34
+ audio_features_scaled_cb = scaler_cb.fit_transform(audio_features)
35
+ audio_features_df_cb = pd.DataFrame(audio_features_scaled_cb, columns=audio_feature_columns)
36
+ combined_features_cb = pd.concat([mood_cats, audio_features_df_cb], axis=1)
37
+
38
+ # Preprocessing for KNN
39
  scaler_knn = StandardScaler()
40
+ audio_features_scaled_knn = scaler_knn.fit_transform(audio_features)
41
+ audio_features_df_knn = pd.DataFrame(audio_features_scaled_knn, columns=audio_feature_columns)
42
+ combined_features_knn = pd.concat([mood_cats_df, audio_features_df_knn], axis=1)
43
+
44
+ # Function for content-based recommendation
45
+ def recommend_cont(song_index, num_recommendations=5):
46
+ song_similarity = similarity_matrix[song_index]
47
+ # Get indices and similarity scores of top similar songs
48
+ similar_songs = sorted(list(enumerate(song_similarity)), key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
49
+ recommended_song_indices = [idx for idx, similarity in similar_songs]
50
+ recommended_songs = df.iloc[recommended_song_indices].copy()
51
+ recommended_songs['score'] = [similarity for idx, similarity in similar_songs]
52
+ return recommended_songs
53
+
54
+ # Function for KNN-based recommendation
55
+ def recommend_knn(query_index, n_recommendations=5):
56
+ distances, indices = knn_model.kneighbors(combined_features_knn.iloc[query_index].values.reshape(1, -1), n_neighbors=n_recommendations)
57
+ recommended_songs = df.iloc[indices.flatten()].copy()
58
+ # Convert distances to scores
59
+ recommended_songs['score'] = 1 / (1 + distances.flatten()) # Inverse of distance
60
+ return recommended_songs
61
 
62
  # Function for hybrid recommendation
63
+ def hybrid_recommendation(song_index, top_n=10):
64
+ # Get recommendations from both models
65
+ content_based_recs = recommend_cont(song_index, top_n)
66
+ knn_based_recs = recommend_knn(song_index, top_n)
67
+
68
+ # Combine recommendations
69
+ combined_recs = pd.concat([content_based_recs, knn_based_recs])
70
+
71
+ # Group by song index (or identifier) and average scores
72
+ hybrid_recs = combined_recs.groupby(combined_recs.index).mean().sort_values(by='score', ascending=False).head(top_n)
73
+
74
+ return hybrid_recs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  # Set up the title of the app
77
  st.title('Hybrid Recommender App')
 
79
  # Get song index from user input
80
  song_index_to_recommend = st.number_input('Enter song index:', min_value=0, max_value=len(df)-1, value=0)
81
 
82
+ # Get lyrics for emotion prediction
83
+ lyrics = df.iloc[song_index_to_recommend]['lyrics']
84
+
85
+ # Process the lyrics
86
+ sequence = tokenizer.texts_to_sequences([lyrics])
87
+ padded_sequence = pad_sequences(sequence, maxlen=50) # Adjust the maxlen to match the expected input size
88
+ emotion = emotion_model.predict(padded_sequence).flatten()
89
+
90
+ # Combine emotion and audio features for recommendation
91
+ combined_features = np.concatenate([emotion, audio_features_scaled_knn[song_index_to_recommend]])
92
+
93
  # Get hybrid recommendations
94
  hybrid_recs = hybrid_recommendation(song_index_to_recommend)
95
 
96
+ # Display the predicted emotion and recommendations
97
+ st.write(f"Predicted Emotion: {emotion}")
98
  st.write("Hybrid Recommendations:")
99
+ for index in hybrid_recs.index:
100
+ st.write(f"Song Index: {index}, Title: {df.iloc[index]['title']}, Artist: {df.iloc[index]['artist']}, Score: {hybrid_recs.loc[index, 'score']}")