File size: 4,236 Bytes
edc4276
946af09
01e4bba
 
c4d5407
 
946af09
6868cdb
946af09
401d1a7
946af09
ff3aa08
c4d5407
946af09
 
edc4276
946af09
 
6868cdb
946af09
 
a4492f7
78e9b61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff3aa08
78e9b61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4492f7
946af09
78e9b61
 
 
 
 
 
 
 
 
 
 
 
577a126
ff3aa08
946af09
c18e3ad
946af09
 
577a126
78e9b61
 
 
 
 
 
 
 
 
 
 
946af09
 
1f9b2ed
78e9b61
 
946af09
78e9b61
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import streamlit as st
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import joblib
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load the LSTM model for emotion prediction
emotion_model = load_model('lstm_model.h5')

# Load the KNN model
knn_model = joblib.load('knn_model.joblib')

# Load the tokenizer
tokenizer = joblib.load('tokenizer.pkl')

# Load the dataset
df = pd.read_csv('df1.csv')

# Preprocess for content-based
audio_feature_columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms', 'time_signature']

audio_features = df[audio_feature_columns]
mood_cats = df[['mood_cats']]
mood_cats_df = pd.DataFrame(mood_cats)

# Normalize audio features for content-based
scaler_cb = StandardScaler()
audio_features_scaled_cb = scaler_cb.fit_transform(audio_features)
audio_features_df_cb = pd.DataFrame(audio_features_scaled_cb, columns=audio_feature_columns)
combined_features_cb = pd.concat([mood_cats, audio_features_df_cb], axis=1)

# Preprocessing for KNN
scaler_knn = StandardScaler()
audio_features_scaled_knn = scaler_knn.fit_transform(audio_features)
audio_features_df_knn = pd.DataFrame(audio_features_scaled_knn, columns=audio_feature_columns)
combined_features_knn = pd.concat([mood_cats_df, audio_features_df_knn], axis=1)

# Function for content-based recommendation
def recommend_cont(song_index, num_recommendations=5):
    song_similarity = similarity_matrix[song_index]
    # Get indices and similarity scores of top similar songs
    similar_songs = sorted(list(enumerate(song_similarity)), key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    recommended_song_indices = [idx for idx, similarity in similar_songs]
    recommended_songs = df.iloc[recommended_song_indices].copy()
    recommended_songs['score'] = [similarity for idx, similarity in similar_songs]
    return recommended_songs

# Function for KNN-based recommendation
def recommend_knn(query_index, n_recommendations=5):
    distances, indices = knn_model.kneighbors(combined_features_knn.iloc[query_index].values.reshape(1, -1), n_neighbors=n_recommendations)
    recommended_songs = df.iloc[indices.flatten()].copy()
    # Convert distances to scores
    recommended_songs['score'] = 1 / (1 + distances.flatten())  # Inverse of distance
    return recommended_songs

# Function for hybrid recommendation
def hybrid_recommendation(song_index, top_n=10):
    # Get recommendations from both models
    content_based_recs = recommend_cont(song_index, top_n)
    knn_based_recs = recommend_knn(song_index, top_n)

    # Combine recommendations
    combined_recs = pd.concat([content_based_recs, knn_based_recs])

    # Group by song index (or identifier) and average scores
    hybrid_recs = combined_recs.groupby(combined_recs.index).mean().sort_values(by='score', ascending=False).head(top_n)

    return hybrid_recs

# Set up the title of the app
st.title('Hybrid Recommender App')

# Get song index from user input
song_index_to_recommend = st.number_input('Enter song index:', min_value=0, max_value=len(df)-1, value=0)

# Get lyrics for emotion prediction
lyrics = df.iloc[song_index_to_recommend]['lyrics']

# Process the lyrics
sequence = tokenizer.texts_to_sequences([lyrics])
padded_sequence = pad_sequences(sequence, maxlen=50)  # Adjust the maxlen to match the expected input size
emotion = emotion_model.predict(padded_sequence).flatten()

# Combine emotion and audio features for recommendation
combined_features = np.concatenate([emotion, audio_features_scaled_knn[song_index_to_recommend]])

# Get hybrid recommendations
hybrid_recs = hybrid_recommendation(song_index_to_recommend)

# Display the predicted emotion and recommendations
st.write(f"Predicted Emotion: {emotion}")
st.write("Hybrid Recommendations:")
for index in hybrid_recs.index:
    st.write(f"Song Index: {index}, Title: {df.iloc[index]['title']}, Artist: {df.iloc[index]['artist']}, Score: {hybrid_recs.loc[index, 'score']}")