Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -20,54 +20,58 @@ tokenizer = joblib.load('tokenizer.pkl')
|
|
20 |
# Load the dataset
|
21 |
df = pd.read_csv('df1.csv')
|
22 |
|
23 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
scaler_knn = StandardScaler()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
# Function for hybrid recommendation
|
27 |
-
def hybrid_recommendation(song_index):
|
28 |
-
# Get
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
'duration_ms', 'time_signature']].values.reshape(1, -1)
|
40 |
-
mood_cats = df[['mood_cats']]
|
41 |
-
mood_cats_df = pd.DataFrame(mood_cats)
|
42 |
-
audio_features_scaled_knn = scaler_knn.fit_transform(audio_features_knn)
|
43 |
-
audio_features_df_knn = pd.DataFrame(audio_features_scaled_knn, columns=audio_features_knn.columns.tolist())
|
44 |
-
#audio_features_df = pd.DataFrame(audio_features_scaled_knn, columns=audio_features_knn.columns)
|
45 |
-
# Combine mood and audio features
|
46 |
-
combined_features = pd.concat([mood_cats_df, audio_features_df_knn], axis=1)
|
47 |
-
|
48 |
-
# Predict using the KNN model
|
49 |
-
knn_recommendations = knn_model.kneighbors(combined_features, n_neighbors=5, return_distance=False)[0]
|
50 |
-
|
51 |
-
# Mapping emotion predictions to encoded categories
|
52 |
-
emotion_mapping = {0: 'happy', 1: 'sad', 2: 'calm', 3: 'anger'}
|
53 |
-
encoded_emotion = pd.Series(predicted_emotion).idxmax()
|
54 |
-
emotion_category = emotion_mapping[encoded_emotion]
|
55 |
-
|
56 |
-
# Compute cosine similarity for content-based recommendation
|
57 |
-
features_for_similarity = df[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
|
58 |
-
'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
|
59 |
-
'duration_ms', 'time_signature']].values
|
60 |
-
scaler_cb = StandardScaler()
|
61 |
-
audio_features_scaled_cb = scaler_cb.fit_transform(features_for_similarity)
|
62 |
-
# Combine mood and audio features
|
63 |
-
combined_features_cb = np.concatenate([np.array([emotion_category]), audio_features_scaled_cb])
|
64 |
-
cosine_similarities = cosine_similarity([combined_features_cb])
|
65 |
-
|
66 |
-
# Combine recommendations from both models
|
67 |
-
combined_indices = np.argsort(-np.concatenate([knn_recommendations, cosine_similarities]))
|
68 |
-
hybrid_recs_sorted = combined_indices[:5] # Select top 5 recommendations
|
69 |
-
|
70 |
-
return hybrid_recs_sorted
|
71 |
|
72 |
# Set up the title of the app
|
73 |
st.title('Hybrid Recommender App')
|
@@ -75,10 +79,22 @@ st.title('Hybrid Recommender App')
|
|
75 |
# Get song index from user input
|
76 |
song_index_to_recommend = st.number_input('Enter song index:', min_value=0, max_value=len(df)-1, value=0)
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
# Get hybrid recommendations
|
79 |
hybrid_recs = hybrid_recommendation(song_index_to_recommend)
|
80 |
|
81 |
-
# Display the recommendations
|
|
|
82 |
st.write("Hybrid Recommendations:")
|
83 |
-
for index in hybrid_recs:
|
84 |
-
st.write(f"Song Index: {index}, Title: {df.iloc[index]['title']}, Artist: {df.iloc[index]['artist']}")
|
|
|
20 |
# Load the dataset
|
21 |
df = pd.read_csv('df1.csv')
|
22 |
|
23 |
+
# Preprocess for content-based
|
24 |
+
audio_feature_columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
|
25 |
+
'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
|
26 |
+
'duration_ms', 'time_signature']
|
27 |
+
|
28 |
+
audio_features = df[audio_feature_columns]
|
29 |
+
mood_cats = df[['mood_cats']]
|
30 |
+
mood_cats_df = pd.DataFrame(mood_cats)
|
31 |
+
|
32 |
+
# Normalize audio features for content-based
|
33 |
+
scaler_cb = StandardScaler()
|
34 |
+
audio_features_scaled_cb = scaler_cb.fit_transform(audio_features)
|
35 |
+
audio_features_df_cb = pd.DataFrame(audio_features_scaled_cb, columns=audio_feature_columns)
|
36 |
+
combined_features_cb = pd.concat([mood_cats, audio_features_df_cb], axis=1)
|
37 |
+
|
38 |
+
# Preprocessing for KNN
|
39 |
scaler_knn = StandardScaler()
|
40 |
+
audio_features_scaled_knn = scaler_knn.fit_transform(audio_features)
|
41 |
+
audio_features_df_knn = pd.DataFrame(audio_features_scaled_knn, columns=audio_feature_columns)
|
42 |
+
combined_features_knn = pd.concat([mood_cats_df, audio_features_df_knn], axis=1)
|
43 |
+
|
44 |
+
# Function for content-based recommendation
|
45 |
+
def recommend_cont(song_index, num_recommendations=5):
|
46 |
+
song_similarity = similarity_matrix[song_index]
|
47 |
+
# Get indices and similarity scores of top similar songs
|
48 |
+
similar_songs = sorted(list(enumerate(song_similarity)), key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
|
49 |
+
recommended_song_indices = [idx for idx, similarity in similar_songs]
|
50 |
+
recommended_songs = df.iloc[recommended_song_indices].copy()
|
51 |
+
recommended_songs['score'] = [similarity for idx, similarity in similar_songs]
|
52 |
+
return recommended_songs
|
53 |
+
|
54 |
+
# Function for KNN-based recommendation
|
55 |
+
def recommend_knn(query_index, n_recommendations=5):
|
56 |
+
distances, indices = knn_model.kneighbors(combined_features_knn.iloc[query_index].values.reshape(1, -1), n_neighbors=n_recommendations)
|
57 |
+
recommended_songs = df.iloc[indices.flatten()].copy()
|
58 |
+
# Convert distances to scores
|
59 |
+
recommended_songs['score'] = 1 / (1 + distances.flatten()) # Inverse of distance
|
60 |
+
return recommended_songs
|
61 |
|
62 |
# Function for hybrid recommendation
|
63 |
+
def hybrid_recommendation(song_index, top_n=10):
|
64 |
+
# Get recommendations from both models
|
65 |
+
content_based_recs = recommend_cont(song_index, top_n)
|
66 |
+
knn_based_recs = recommend_knn(song_index, top_n)
|
67 |
+
|
68 |
+
# Combine recommendations
|
69 |
+
combined_recs = pd.concat([content_based_recs, knn_based_recs])
|
70 |
+
|
71 |
+
# Group by song index (or identifier) and average scores
|
72 |
+
hybrid_recs = combined_recs.groupby(combined_recs.index).mean().sort_values(by='score', ascending=False).head(top_n)
|
73 |
+
|
74 |
+
return hybrid_recs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
# Set up the title of the app
|
77 |
st.title('Hybrid Recommender App')
|
|
|
79 |
# Get song index from user input
|
80 |
song_index_to_recommend = st.number_input('Enter song index:', min_value=0, max_value=len(df)-1, value=0)
|
81 |
|
82 |
+
# Get lyrics for emotion prediction
|
83 |
+
lyrics = df.iloc[song_index_to_recommend]['lyrics']
|
84 |
+
|
85 |
+
# Process the lyrics
|
86 |
+
sequence = tokenizer.texts_to_sequences([lyrics])
|
87 |
+
padded_sequence = pad_sequences(sequence, maxlen=50) # Adjust the maxlen to match the expected input size
|
88 |
+
emotion = emotion_model.predict(padded_sequence).flatten()
|
89 |
+
|
90 |
+
# Combine emotion and audio features for recommendation
|
91 |
+
combined_features = np.concatenate([emotion, audio_features_scaled_knn[song_index_to_recommend]])
|
92 |
+
|
93 |
# Get hybrid recommendations
|
94 |
hybrid_recs = hybrid_recommendation(song_index_to_recommend)
|
95 |
|
96 |
+
# Display the predicted emotion and recommendations
|
97 |
+
st.write(f"Predicted Emotion: {emotion}")
|
98 |
st.write("Hybrid Recommendations:")
|
99 |
+
for index in hybrid_recs.index:
|
100 |
+
st.write(f"Song Index: {index}, Title: {df.iloc[index]['title']}, Artist: {df.iloc[index]['artist']}, Score: {hybrid_recs.loc[index, 'score']}")
|