nlovoldegar commited on
Commit
bfaf419
·
1 Parent(s): 0cb8f83

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +141 -0
  2. preprocessed_data.py +78 -0
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[ ]:
5
+
6
+
7
+ import streamlit as st
8
+ st.set_page_config(page_title="Karaoke Playlist Generator", layout="wide")
9
+ st.markdown "Plan your performance"
10
+ import pandas as pd
11
+ from sklearn.neighbors import NearestNeighbors
12
+ import plotly.express as px
13
+ import streamlit.components.v1 as components
14
+
15
+ @st.cache(allow_output_mutation=True)
16
+ def load_data():
17
+ df = pd.read_csv("data/filtered_track_df.csv")
18
+ df['genres'] = df.genres.apply(lambda x: [i[1:-1] for i in str(x)[1:-1].split(", ")])
19
+ exploded_track_df = df.explode("genres")
20
+ return exploded_track_df
21
+
22
+ genre_names = ['Dance Pop', 'Electronic', 'Electropop', 'Hip Hop', 'Jazz', 'K-pop', 'Latin', 'Pop', 'Pop Rap', 'R&B', 'Rock']
23
+ audio_feats = ["acousticness", "danceability", "energy", "instrumentalness", "valence", "tempo"]
24
+
25
+ exploded_track_df = load_data()
26
+
27
+ def n_neighbors_uri_audio(genre, start_year, end_year, test_feat):
28
+ genre = genre.lower()
29
+ genre_data = exploded_track_df[(exploded_track_df["genres"]==genre) & (exploded_track_df["release_year"]>=start_year) & (exploded_track_df["release_year"]<=end_year)]
30
+ genre_data = genre_data.sort_values(by='popularity', ascending=False)[:500]
31
+
32
+ neigh = NearestNeighbors()
33
+ neigh.fit(genre_data[audio_feats].to_numpy())
34
+
35
+ n_neighbors = neigh.kneighbors([test_feat], n_neighbors=len(genre_data), return_distance=False)[0]
36
+
37
+ uris = genre_data.iloc[n_neighbors]["uri"].tolist()
38
+ audios = genre_data.iloc[n_neighbors][audio_feats].to_numpy()
39
+ return uris, audios
40
+
41
+
42
+ title = "Karaoke Performance Geneartp"
43
+ st.title(title)
44
+
45
+ st.write("Customize your performance based on genre and several key audio features!")
46
+ st.markdown("##")
47
+
48
+ with st.container():
49
+ col1, col2,col3,col4 = st.columns((2,0.5,0.5,0.5))
50
+ with col3:
51
+ st.markdown("***Choose your genre:***")
52
+ genre = st.radio(
53
+ "",
54
+ genre_names, index=genre_names.index("Pop"))
55
+ with col1:
56
+ st.markdown("***Choose features to customize:***")
57
+ start_year, end_year = st.slider(
58
+ 'Select the year range',
59
+ 1990, 2019, (2015, 2019)
60
+ )
61
+ acousticness = st.slider(
62
+ 'Acousticness',
63
+ 0.0, 1.0, 0.5)
64
+ danceability = st.slider(
65
+ 'Danceability',
66
+ 0.0, 1.0, 0.5)
67
+ energy = st.slider(
68
+ 'Energy',
69
+ 0.0, 1.0, 0.5)
70
+ instrumentalness = st.slider(
71
+ 'Instrumentalness',
72
+ 0.0, 1.0, 0.0)
73
+ valence = st.slider(
74
+ 'Valence',
75
+ 0.0, 1.0, 0.45)
76
+ tempo = st.slider(
77
+ 'Tempo',
78
+ 0.0, 244.0, 118.0)
79
+
80
+ tracks_per_page = 6
81
+ test_feat = [acousticness, danceability, energy, instrumentalness, valence, tempo]
82
+ uris, audios = n_neighbors_uri_audio(genre, start_year, end_year, test_feat)
83
+
84
+ tracks = []
85
+ for uri in uris:
86
+ track = """<iframe src="https://open.spotify.com/embed/track/{}" width="260" height="380" frameborder="0" allowtransparency="true" allow="encrypted-media"></iframe>""".format(uri)
87
+ tracks.append(track)
88
+
89
+ if 'previous_inputs' not in st.session_state:
90
+ st.session_state['previous_inputs'] = [genre, start_year, end_year] + test_feat
91
+
92
+ current_inputs = [genre, start_year, end_year] + test_feat
93
+ if current_inputs != st.session_state['previous_inputs']:
94
+ if 'start_track_i' in st.session_state:
95
+ st.session_state['start_track_i'] = 0
96
+ st.session_state['previous_inputs'] = current_inputs
97
+
98
+ if 'start_track_i' not in st.session_state:
99
+ st.session_state['start_track_i'] = 0
100
+
101
+ with st.container():
102
+ col1, col2, col3 = st.columns([2,1,2])
103
+ if st.button("Recommend More Songs"):
104
+ if st.session_state['start_track_i'] < len(tracks):
105
+ st.session_state['start_track_i'] += tracks_per_page
106
+
107
+ current_tracks = tracks[st.session_state['start_track_i']: st.session_state['start_track_i'] + tracks_per_page]
108
+ current_audios = audios[st.session_state['start_track_i']: st.session_state['start_track_i'] + tracks_per_page]
109
+ if st.session_state['start_track_i'] < len(tracks):
110
+ for i, (track, audio) in enumerate(zip(current_tracks, current_audios)):
111
+ if i%2==0:
112
+ with col1:
113
+ components.html(
114
+ track,
115
+ height=400,
116
+ )
117
+ with st.expander("See more details"):
118
+ df = pd.DataFrame(dict(
119
+ r=audio[:5],
120
+ theta=audio_feats[:5]))
121
+ fig = px.line_polar(df, r='r', theta='theta', line_close=True)
122
+ fig.update_layout(height=400, width=340)
123
+ st.plotly_chart(fig)
124
+
125
+ else:
126
+ with col3:
127
+ components.html(
128
+ track,
129
+ height=400,
130
+ )
131
+ with st.expander("See more details"):
132
+ df = pd.DataFrame(dict(
133
+ r=audio[:5],
134
+ theta=audio_feats[:5]))
135
+ fig = px.line_polar(df, r='r', theta='theta', line_close=True)
136
+ fig.update_layout(height=400, width=340)
137
+ st.plotly_chart(fig)
138
+
139
+ else:
140
+ st.write("No songs left to recommend")
141
+
preprocessed_data.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """preprocessed_data
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1ssqIgQzcaBw12nyIoBSmaoWGZoaBPPHR
8
+ """
9
+
10
+ import pandas as pd
11
+
12
+ from google.colab import files
13
+ files.upload()
14
+
15
+ !mkdir ~/.kaggle/
16
+ !cp kaggle.json ~/.kaggle/
17
+
18
+ !chmod 600 ~/.kaggle/kaggle.json
19
+
20
+ !kaggle datasets download -d saurabhshahane/spotgen-music-dataset
21
+
22
+ !pip install patool
23
+
24
+ import patoolib
25
+
26
+ patoolib.extract_archive('spotgen-music-dataset.zip')
27
+
28
+ data_dir = "SpotGenTrack/Data Sources/"
29
+ albums_data = pd.read_csv(data_dir + "spotify_albums.csv")
30
+ artists_data = pd.read_csv(data_dir + "spotify_artists.csv")
31
+ tracks_data = pd.read_csv(data_dir + "spotify_tracks.csv")
32
+
33
+ display(albums_data.head())
34
+ albums_data.columns
35
+
36
+ display(artists_data.head())
37
+ artists_data.columns
38
+
39
+ display(tracks_data.head())
40
+ tracks_data.columns
41
+
42
+ ## join artist genre information and album release date with track dataset
43
+ # drop irrelevant columns
44
+ # get only tracks after 1990
45
+ def join_genre_and_date(artist_df, album_df, track_df):
46
+ album = album_df.rename(columns={'id':"album_id"}).set_index('album_id')
47
+ artist = artist_df.rename(columns={'id':"artists_id",'name':"artists_name"}).set_index('artists_id')
48
+ track = track_df.set_index('album_id').join(album['release_date'], on='album_id' )
49
+ track.artists_id = track.artists_id.apply(lambda x: x[2:-2])
50
+ track = track.set_index('artists_id').join(artist[['artists_name','genres']], on='artists_id' )
51
+ track.reset_index(drop=False, inplace=True)
52
+ track['release_year'] = pd.to_datetime(track.release_date).dt.year
53
+ track.drop(columns = ['Unnamed: 0','country','track_name_prev','track_number','type'], inplace = True)
54
+
55
+ return track[track.release_year >= 1990]
56
+
57
+ def get_filtered_track_df(df, genres_to_include):
58
+ df['genres'] = df.genres.apply(lambda x: [i[1:-1] for i in str(x)[1:-1].split(", ")])
59
+ df_exploded = df.explode("genres")[df.explode("genres")["genres"].isin(genres_to_include)]
60
+ df_exploded.loc[df_exploded["genres"]=="korean pop", "genres"] = "k-pop"
61
+ df_exploded_indices = list(df_exploded.index.unique())
62
+ df = df[df.index.isin(df_exploded_indices)]
63
+ df = df.reset_index(drop=True)
64
+ return df
65
+
66
+ track_with_year_and_genre = join_genre_and_date(artists_data, albums_data, tracks_data)
67
+ genres_to_include = genres = ['dance pop', 'electronic', 'electropop', 'hip hop', 'jazz', 'k-pop', 'latin', 'pop', 'pop rap', 'r&b', 'rock']
68
+ filtered_track_df = get_filtered_track_df(track_with_year_and_genre, genres_to_include)
69
+
70
+ filtered_track_df["uri"] = filtered_track_df["uri"].str.replace("spotify:track:", "")
71
+ filtered_track_df = filtered_track_df.drop(columns=['analysis_url', 'available_markets'])
72
+
73
+ display(filtered_track_df.head())
74
+ filtered_track_df.columns
75
+
76
+ filtered_track_df.to_csv("filtered_track_df.csv", index=False)
77
+
78
+ tracks_data.describe()