Spaces:
Sleeping
Sleeping
Ezhil
commited on
Commit
·
97fec97
0
Parent(s):
Moved the data, app.py, requirements.txt file out
Browse files- REQUIREMENTS.txt +3 -0
- app.py +118 -0
- data/music_data.csv +0 -0
REQUIREMENTS.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
plotly
|
app.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import plotly.express as px
|
4 |
+
import plotly.graph_objects as go
|
5 |
+
from datetime import datetime
|
6 |
+
import os
|
7 |
+
|
8 |
+
# Set page configuration
|
9 |
+
st.set_page_config(page_title="Music Popularity Trends", layout="wide")
|
10 |
+
|
11 |
+
# Title
|
12 |
+
st.title("Music Popularity Trends Over Time")
|
13 |
+
|
14 |
+
# Load the data from the 'data' folder
|
15 |
+
@st.cache_data
|
16 |
+
def load_data():
|
17 |
+
# Define the path to the data folder
|
18 |
+
data_path = os.path.join(os.getcwd(), 'data', 'music_data.csv')
|
19 |
+
# Load the CSV file
|
20 |
+
data = pd.read_csv(data_path)
|
21 |
+
# Convert Album Release Date to datetime
|
22 |
+
data['Album Release Date'] = pd.to_datetime(data['Album Release Date'], errors='coerce')
|
23 |
+
# Extract year and decade
|
24 |
+
data['Year'] = data['Album Release Date'].dt.year
|
25 |
+
data['Decade'] = (data['Year'] // 10) * 10
|
26 |
+
return data
|
27 |
+
|
28 |
+
# Load data
|
29 |
+
try:
|
30 |
+
df = load_data()
|
31 |
+
except FileNotFoundError:
|
32 |
+
st.error("Error: 'music_data.csv' not found in the 'data' folder. Please ensure the file exists.")
|
33 |
+
st.stop()
|
34 |
+
|
35 |
+
# Sidebar for filtering
|
36 |
+
st.sidebar.header("Filter Options")
|
37 |
+
min_year = int(df['Year'].min())
|
38 |
+
max_year = int(df['Year'].max())
|
39 |
+
year_range = st.sidebar.slider(
|
40 |
+
"Select Year Range",
|
41 |
+
min_year,
|
42 |
+
max_year,
|
43 |
+
(min_year, max_year)
|
44 |
+
)
|
45 |
+
|
46 |
+
# Filter data based on year range
|
47 |
+
filtered_df = df[
|
48 |
+
(df['Year'] >= year_range[0]) &
|
49 |
+
(df['Year'] <= year_range[1])
|
50 |
+
]
|
51 |
+
|
52 |
+
# 1. Line Chart - Average Popularity by Decade
|
53 |
+
st.header("Average Popularity by Decade")
|
54 |
+
decade_avg = filtered_df.groupby('Decade')['Popularity'].mean().reset_index()
|
55 |
+
|
56 |
+
fig_line = px.line(
|
57 |
+
decade_avg,
|
58 |
+
x='Decade',
|
59 |
+
y='Popularity',
|
60 |
+
title='Average Song Popularity by Decade',
|
61 |
+
labels={'Popularity': 'Average Popularity', 'Decade': 'Decade'},
|
62 |
+
template='plotly_white'
|
63 |
+
)
|
64 |
+
|
65 |
+
fig_line.update_layout(
|
66 |
+
xaxis=dict(tickmode='linear', dtick=10),
|
67 |
+
yaxis=dict(range=[0, 100])
|
68 |
+
)
|
69 |
+
|
70 |
+
st.plotly_chart(fig_line, use_container_width=True)
|
71 |
+
|
72 |
+
# 2. Scatter Plot - Individual Song Popularity Over Time
|
73 |
+
st.header("Individual Song Popularity Over Time")
|
74 |
+
fig_scatter = px.scatter(
|
75 |
+
filtered_df,
|
76 |
+
x='Album Release Date',
|
77 |
+
y='Popularity',
|
78 |
+
hover_data=['Track Name', 'Artist Name(s)'],
|
79 |
+
title='Song Popularity by Release Date',
|
80 |
+
labels={'Album Release Date': 'Release Date', 'Popularity': 'Popularity'},
|
81 |
+
template='plotly_white'
|
82 |
+
)
|
83 |
+
|
84 |
+
fig_scatter.update_traces(
|
85 |
+
marker=dict(size=8, opacity=0.6),
|
86 |
+
selector=dict(mode='markers')
|
87 |
+
)
|
88 |
+
|
89 |
+
fig_scatter.update_layout(
|
90 |
+
yaxis=dict(range=[0, 100]),
|
91 |
+
showlegend=False
|
92 |
+
)
|
93 |
+
|
94 |
+
st.plotly_chart(fig_scatter, use_container_width=True)
|
95 |
+
|
96 |
+
# Additional Insights
|
97 |
+
st.header("Key Insights")
|
98 |
+
col1, col2 = st.columns(2)
|
99 |
+
|
100 |
+
with col1:
|
101 |
+
st.subheader("Most Popular Decade")
|
102 |
+
most_popular_decade = decade_avg.loc[decade_avg['Popularity'].idxmax()]
|
103 |
+
st.write(f"Decade: {int(most_popular_decade['Decade'])}s")
|
104 |
+
st.write(f"Average Popularity: {most_popular_decade['Popularity']:.1f}")
|
105 |
+
|
106 |
+
with col2:
|
107 |
+
st.subheader("Most Popular Song")
|
108 |
+
most_popular_song = filtered_df.loc[filtered_df['Popularity'].idxmax()]
|
109 |
+
st.write(f"Track: {most_popular_song['Track Name']}")
|
110 |
+
st.write(f"Artist: {most_popular_song['Artist Name(s)']}")
|
111 |
+
st.write(f"Popularity: {most_popular_song['Popularity']}")
|
112 |
+
st.write(f"Release Year: {int(most_popular_song['Year'])}")
|
113 |
+
|
114 |
+
# Notes
|
115 |
+
st.markdown("""
|
116 |
+
**Notes:**
|
117 |
+
- Popularity scores range from 0 to 100
|
118 |
+
""")
|
data/music_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|