Spaces:
Runtime error
Runtime error
Commit
·
bfaf419
1
Parent(s):
0cb8f83
Upload 2 files
Browse files- app.py +141 -0
- preprocessed_data.py +78 -0
app.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
# In[ ]:
|
5 |
+
|
6 |
+
|
7 |
+
import streamlit as st
|
8 |
+
st.set_page_config(page_title="Karaoke Playlist Generator", layout="wide")
|
9 |
+
st.markdown "Plan your performance"
|
10 |
+
import pandas as pd
|
11 |
+
from sklearn.neighbors import NearestNeighbors
|
12 |
+
import plotly.express as px
|
13 |
+
import streamlit.components.v1 as components
|
14 |
+
|
15 |
+
@st.cache(allow_output_mutation=True)
|
16 |
+
def load_data():
|
17 |
+
df = pd.read_csv("data/filtered_track_df.csv")
|
18 |
+
df['genres'] = df.genres.apply(lambda x: [i[1:-1] for i in str(x)[1:-1].split(", ")])
|
19 |
+
exploded_track_df = df.explode("genres")
|
20 |
+
return exploded_track_df
|
21 |
+
|
22 |
+
genre_names = ['Dance Pop', 'Electronic', 'Electropop', 'Hip Hop', 'Jazz', 'K-pop', 'Latin', 'Pop', 'Pop Rap', 'R&B', 'Rock']
|
23 |
+
audio_feats = ["acousticness", "danceability", "energy", "instrumentalness", "valence", "tempo"]
|
24 |
+
|
25 |
+
exploded_track_df = load_data()
|
26 |
+
|
27 |
+
def n_neighbors_uri_audio(genre, start_year, end_year, test_feat):
|
28 |
+
genre = genre.lower()
|
29 |
+
genre_data = exploded_track_df[(exploded_track_df["genres"]==genre) & (exploded_track_df["release_year"]>=start_year) & (exploded_track_df["release_year"]<=end_year)]
|
30 |
+
genre_data = genre_data.sort_values(by='popularity', ascending=False)[:500]
|
31 |
+
|
32 |
+
neigh = NearestNeighbors()
|
33 |
+
neigh.fit(genre_data[audio_feats].to_numpy())
|
34 |
+
|
35 |
+
n_neighbors = neigh.kneighbors([test_feat], n_neighbors=len(genre_data), return_distance=False)[0]
|
36 |
+
|
37 |
+
uris = genre_data.iloc[n_neighbors]["uri"].tolist()
|
38 |
+
audios = genre_data.iloc[n_neighbors][audio_feats].to_numpy()
|
39 |
+
return uris, audios
|
40 |
+
|
41 |
+
|
42 |
+
title = "Karaoke Performance Geneartp"
|
43 |
+
st.title(title)
|
44 |
+
|
45 |
+
st.write("Customize your performance based on genre and several key audio features!")
|
46 |
+
st.markdown("##")
|
47 |
+
|
48 |
+
with st.container():
|
49 |
+
col1, col2,col3,col4 = st.columns((2,0.5,0.5,0.5))
|
50 |
+
with col3:
|
51 |
+
st.markdown("***Choose your genre:***")
|
52 |
+
genre = st.radio(
|
53 |
+
"",
|
54 |
+
genre_names, index=genre_names.index("Pop"))
|
55 |
+
with col1:
|
56 |
+
st.markdown("***Choose features to customize:***")
|
57 |
+
start_year, end_year = st.slider(
|
58 |
+
'Select the year range',
|
59 |
+
1990, 2019, (2015, 2019)
|
60 |
+
)
|
61 |
+
acousticness = st.slider(
|
62 |
+
'Acousticness',
|
63 |
+
0.0, 1.0, 0.5)
|
64 |
+
danceability = st.slider(
|
65 |
+
'Danceability',
|
66 |
+
0.0, 1.0, 0.5)
|
67 |
+
energy = st.slider(
|
68 |
+
'Energy',
|
69 |
+
0.0, 1.0, 0.5)
|
70 |
+
instrumentalness = st.slider(
|
71 |
+
'Instrumentalness',
|
72 |
+
0.0, 1.0, 0.0)
|
73 |
+
valence = st.slider(
|
74 |
+
'Valence',
|
75 |
+
0.0, 1.0, 0.45)
|
76 |
+
tempo = st.slider(
|
77 |
+
'Tempo',
|
78 |
+
0.0, 244.0, 118.0)
|
79 |
+
|
80 |
+
tracks_per_page = 6
|
81 |
+
test_feat = [acousticness, danceability, energy, instrumentalness, valence, tempo]
|
82 |
+
uris, audios = n_neighbors_uri_audio(genre, start_year, end_year, test_feat)
|
83 |
+
|
84 |
+
tracks = []
|
85 |
+
for uri in uris:
|
86 |
+
track = """<iframe src="https://open.spotify.com/embed/track/{}" width="260" height="380" frameborder="0" allowtransparency="true" allow="encrypted-media"></iframe>""".format(uri)
|
87 |
+
tracks.append(track)
|
88 |
+
|
89 |
+
if 'previous_inputs' not in st.session_state:
|
90 |
+
st.session_state['previous_inputs'] = [genre, start_year, end_year] + test_feat
|
91 |
+
|
92 |
+
current_inputs = [genre, start_year, end_year] + test_feat
|
93 |
+
if current_inputs != st.session_state['previous_inputs']:
|
94 |
+
if 'start_track_i' in st.session_state:
|
95 |
+
st.session_state['start_track_i'] = 0
|
96 |
+
st.session_state['previous_inputs'] = current_inputs
|
97 |
+
|
98 |
+
if 'start_track_i' not in st.session_state:
|
99 |
+
st.session_state['start_track_i'] = 0
|
100 |
+
|
101 |
+
with st.container():
|
102 |
+
col1, col2, col3 = st.columns([2,1,2])
|
103 |
+
if st.button("Recommend More Songs"):
|
104 |
+
if st.session_state['start_track_i'] < len(tracks):
|
105 |
+
st.session_state['start_track_i'] += tracks_per_page
|
106 |
+
|
107 |
+
current_tracks = tracks[st.session_state['start_track_i']: st.session_state['start_track_i'] + tracks_per_page]
|
108 |
+
current_audios = audios[st.session_state['start_track_i']: st.session_state['start_track_i'] + tracks_per_page]
|
109 |
+
if st.session_state['start_track_i'] < len(tracks):
|
110 |
+
for i, (track, audio) in enumerate(zip(current_tracks, current_audios)):
|
111 |
+
if i%2==0:
|
112 |
+
with col1:
|
113 |
+
components.html(
|
114 |
+
track,
|
115 |
+
height=400,
|
116 |
+
)
|
117 |
+
with st.expander("See more details"):
|
118 |
+
df = pd.DataFrame(dict(
|
119 |
+
r=audio[:5],
|
120 |
+
theta=audio_feats[:5]))
|
121 |
+
fig = px.line_polar(df, r='r', theta='theta', line_close=True)
|
122 |
+
fig.update_layout(height=400, width=340)
|
123 |
+
st.plotly_chart(fig)
|
124 |
+
|
125 |
+
else:
|
126 |
+
with col3:
|
127 |
+
components.html(
|
128 |
+
track,
|
129 |
+
height=400,
|
130 |
+
)
|
131 |
+
with st.expander("See more details"):
|
132 |
+
df = pd.DataFrame(dict(
|
133 |
+
r=audio[:5],
|
134 |
+
theta=audio_feats[:5]))
|
135 |
+
fig = px.line_polar(df, r='r', theta='theta', line_close=True)
|
136 |
+
fig.update_layout(height=400, width=340)
|
137 |
+
st.plotly_chart(fig)
|
138 |
+
|
139 |
+
else:
|
140 |
+
st.write("No songs left to recommend")
|
141 |
+
|
preprocessed_data.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""preprocessed_data
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1ssqIgQzcaBw12nyIoBSmaoWGZoaBPPHR
|
8 |
+
"""
|
9 |
+
|
10 |
+
import pandas as pd
|
11 |
+
|
12 |
+
from google.colab import files
|
13 |
+
files.upload()
|
14 |
+
|
15 |
+
!mkdir ~/.kaggle/
|
16 |
+
!cp kaggle.json ~/.kaggle/
|
17 |
+
|
18 |
+
!chmod 600 ~/.kaggle/kaggle.json
|
19 |
+
|
20 |
+
!kaggle datasets download -d saurabhshahane/spotgen-music-dataset
|
21 |
+
|
22 |
+
!pip install patool
|
23 |
+
|
24 |
+
import patoolib
|
25 |
+
|
26 |
+
patoolib.extract_archive('spotgen-music-dataset.zip')
|
27 |
+
|
28 |
+
data_dir = "SpotGenTrack/Data Sources/"
|
29 |
+
albums_data = pd.read_csv(data_dir + "spotify_albums.csv")
|
30 |
+
artists_data = pd.read_csv(data_dir + "spotify_artists.csv")
|
31 |
+
tracks_data = pd.read_csv(data_dir + "spotify_tracks.csv")
|
32 |
+
|
33 |
+
display(albums_data.head())
|
34 |
+
albums_data.columns
|
35 |
+
|
36 |
+
display(artists_data.head())
|
37 |
+
artists_data.columns
|
38 |
+
|
39 |
+
display(tracks_data.head())
|
40 |
+
tracks_data.columns
|
41 |
+
|
42 |
+
## join artist genre information and album release date with track dataset
|
43 |
+
# drop irrelevant columns
|
44 |
+
# get only tracks after 1990
|
45 |
+
def join_genre_and_date(artist_df, album_df, track_df):
|
46 |
+
album = album_df.rename(columns={'id':"album_id"}).set_index('album_id')
|
47 |
+
artist = artist_df.rename(columns={'id':"artists_id",'name':"artists_name"}).set_index('artists_id')
|
48 |
+
track = track_df.set_index('album_id').join(album['release_date'], on='album_id' )
|
49 |
+
track.artists_id = track.artists_id.apply(lambda x: x[2:-2])
|
50 |
+
track = track.set_index('artists_id').join(artist[['artists_name','genres']], on='artists_id' )
|
51 |
+
track.reset_index(drop=False, inplace=True)
|
52 |
+
track['release_year'] = pd.to_datetime(track.release_date).dt.year
|
53 |
+
track.drop(columns = ['Unnamed: 0','country','track_name_prev','track_number','type'], inplace = True)
|
54 |
+
|
55 |
+
return track[track.release_year >= 1990]
|
56 |
+
|
57 |
+
def get_filtered_track_df(df, genres_to_include):
|
58 |
+
df['genres'] = df.genres.apply(lambda x: [i[1:-1] for i in str(x)[1:-1].split(", ")])
|
59 |
+
df_exploded = df.explode("genres")[df.explode("genres")["genres"].isin(genres_to_include)]
|
60 |
+
df_exploded.loc[df_exploded["genres"]=="korean pop", "genres"] = "k-pop"
|
61 |
+
df_exploded_indices = list(df_exploded.index.unique())
|
62 |
+
df = df[df.index.isin(df_exploded_indices)]
|
63 |
+
df = df.reset_index(drop=True)
|
64 |
+
return df
|
65 |
+
|
66 |
+
track_with_year_and_genre = join_genre_and_date(artists_data, albums_data, tracks_data)
|
67 |
+
genres_to_include = genres = ['dance pop', 'electronic', 'electropop', 'hip hop', 'jazz', 'k-pop', 'latin', 'pop', 'pop rap', 'r&b', 'rock']
|
68 |
+
filtered_track_df = get_filtered_track_df(track_with_year_and_genre, genres_to_include)
|
69 |
+
|
70 |
+
filtered_track_df["uri"] = filtered_track_df["uri"].str.replace("spotify:track:", "")
|
71 |
+
filtered_track_df = filtered_track_df.drop(columns=['analysis_url', 'available_markets'])
|
72 |
+
|
73 |
+
display(filtered_track_df.head())
|
74 |
+
filtered_track_df.columns
|
75 |
+
|
76 |
+
filtered_track_df.to_csv("filtered_track_df.csv", index=False)
|
77 |
+
|
78 |
+
tracks_data.describe()
|