Ezhil commited on
Commit
10d82a8
·
1 Parent(s): 97fec97

Initial commit-folder structure

Browse files
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: DataVisualizatioin Spotify
3
+ emoji: 🚀
4
+ colorFrom: purple
5
+ colorTo: purple
6
+ sdk: streamlit
7
+ sdk_version: 1.42.2
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
REQUIREMENTS.txt CHANGED
@@ -1,3 +1,6 @@
1
- streamlit
2
- pandas
3
- plotly
 
 
 
 
1
+ streamlit==1.31.1
2
+ pandas==2.2.1
3
+ plotly==5.20.0
4
+ seaborn==0.13.2
5
+ matplotlib==3.8.3
6
+ networkx==3.2.1
app.py CHANGED
@@ -1,118 +1,79 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import plotly.express as px
4
- import plotly.graph_objects as go
5
- from datetime import datetime
6
- import os
7
-
8
- # Set page configuration
9
- st.set_page_config(page_title="Music Popularity Trends", layout="wide")
10
-
11
- # Title
12
- st.title("Music Popularity Trends Over Time")
13
-
14
- # Load the data from the 'data' folder
15
- @st.cache_data
16
- def load_data():
17
- # Define the path to the data folder
18
- data_path = os.path.join(os.getcwd(), 'data', 'music_data.csv')
19
- # Load the CSV file
20
- data = pd.read_csv(data_path)
21
- # Convert Album Release Date to datetime
22
- data['Album Release Date'] = pd.to_datetime(data['Album Release Date'], errors='coerce')
23
- # Extract year and decade
24
- data['Year'] = data['Album Release Date'].dt.year
25
- data['Decade'] = (data['Year'] // 10) * 10
26
- return data
27
-
28
- # Load data
29
- try:
30
- df = load_data()
31
- except FileNotFoundError:
32
- st.error("Error: 'music_data.csv' not found in the 'data' folder. Please ensure the file exists.")
33
- st.stop()
34
-
35
- # Sidebar for filtering
36
- st.sidebar.header("Filter Options")
37
- min_year = int(df['Year'].min())
38
- max_year = int(df['Year'].max())
39
- year_range = st.sidebar.slider(
40
- "Select Year Range",
41
- min_year,
42
- max_year,
43
- (min_year, max_year)
44
- )
45
-
46
- # Filter data based on year range
47
- filtered_df = df[
48
- (df['Year'] >= year_range[0]) &
49
- (df['Year'] <= year_range[1])
50
- ]
51
-
52
- # 1. Line Chart - Average Popularity by Decade
53
- st.header("Average Popularity by Decade")
54
- decade_avg = filtered_df.groupby('Decade')['Popularity'].mean().reset_index()
55
-
56
- fig_line = px.line(
57
- decade_avg,
58
- x='Decade',
59
- y='Popularity',
60
- title='Average Song Popularity by Decade',
61
- labels={'Popularity': 'Average Popularity', 'Decade': 'Decade'},
62
- template='plotly_white'
63
- )
64
-
65
- fig_line.update_layout(
66
- xaxis=dict(tickmode='linear', dtick=10),
67
- yaxis=dict(range=[0, 100])
68
  )
69
 
70
- st.plotly_chart(fig_line, use_container_width=True)
71
-
72
- # 2. Scatter Plot - Individual Song Popularity Over Time
73
- st.header("Individual Song Popularity Over Time")
74
- fig_scatter = px.scatter(
75
- filtered_df,
76
- x='Album Release Date',
77
- y='Popularity',
78
- hover_data=['Track Name', 'Artist Name(s)'],
79
- title='Song Popularity by Release Date',
80
- labels={'Album Release Date': 'Release Date', 'Popularity': 'Popularity'},
81
- template='plotly_white'
82
- )
83
-
84
- fig_scatter.update_traces(
85
- marker=dict(size=8, opacity=0.6),
86
- selector=dict(mode='markers')
87
- )
88
-
89
- fig_scatter.update_layout(
90
- yaxis=dict(range=[0, 100]),
91
- showlegend=False
92
- )
93
-
94
- st.plotly_chart(fig_scatter, use_container_width=True)
95
-
96
- # Additional Insights
97
- st.header("Key Insights")
98
- col1, col2 = st.columns(2)
99
-
100
- with col1:
101
- st.subheader("Most Popular Decade")
102
- most_popular_decade = decade_avg.loc[decade_avg['Popularity'].idxmax()]
103
- st.write(f"Decade: {int(most_popular_decade['Decade'])}s")
104
- st.write(f"Average Popularity: {most_popular_decade['Popularity']:.1f}")
105
-
106
- with col2:
107
- st.subheader("Most Popular Song")
108
- most_popular_song = filtered_df.loc[filtered_df['Popularity'].idxmax()]
109
- st.write(f"Track: {most_popular_song['Track Name']}")
110
- st.write(f"Artist: {most_popular_song['Artist Name(s)']}")
111
- st.write(f"Popularity: {most_popular_song['Popularity']}")
112
- st.write(f"Release Year: {int(most_popular_song['Year'])}")
113
-
114
- # Notes
115
- st.markdown("""
116
- **Notes:**
117
- - Popularity scores range from 0 to 100
118
- """)
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import base64
4
+ from models.data_processor import load_data
5
+ from functions.visualizations import generate_popularity_trends, generate_audio_features, generate_genre_analysis, \
6
+ generate_explicit_trends, generate_album_insights, generate_tempo_mood, generate_top_artists_songs, \
7
+ generate_album_release_trends, generate_duration_analysis, generate_streaming_insights, \
8
+ generate_feature_comparisons, generate_network_analysis
9
+
10
+ # Load data and display raw sample at the top
11
+ df = load_data()
12
+ if not df.empty:
13
+ st.write("**Raw Data Sample:**", df.head()) # Display raw data sample
14
+ else:
15
+ st.error("Failed to load raw data. Check the 'data/music_data.csv' file.")
16
+
17
+ # Sidebar
18
+ st.sidebar.title("Music Data Analysis")
19
+ # st.sidebar.markdown("[View Raw Data]('data/music_data.csv')", unsafe_allow_html=True) # Replace with your Google Drive ID
20
+ analysis_option = st.sidebar.selectbox(
21
+ "Choose Analysis",
22
+ [
23
+ "Popularity Trends Over Time",
24
+ "Audio Features Analysis",
25
+ "Genre & Artist Analysis",
26
+ "Explicit Content Trends",
27
+ "Album & Label Insights",
28
+ "Tempo & Mood Analysis",
29
+ "Top Artists and Songs",
30
+ "Album Release Trends",
31
+ "Track Duration Analysis",
32
+ "Streaming and Engagement Insights",
33
+ "Feature Comparisons Across Decades",
34
+ "Network Analysis"
35
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  )
37
 
38
+ st.sidebar.subheader("Filters")
39
+ if not df.empty and 'Decade' in df.columns:
40
+ decades = st.sidebar.multiselect("Select Decades", sorted(df['Decade'].unique()),
41
+ default=sorted(df['Decade'].unique()))
42
+ filtered_df = df[df['Decade'].isin(decades)] if decades else df
43
+ else:
44
+ st.sidebar.warning(
45
+ "No data loaded or 'Decade' column missing. Check the 'data' folder.")
46
+ filtered_df = pd.DataFrame()
47
+
48
+ # Main content
49
+ # st.image("assets/spotify-logo.png", width=100) # Spotify logo
50
+ st.title("Music Data Analysis Dashboard")
51
+ st.markdown("Explore trends and insights from a diverse music dataset.")
52
+
53
+ if analysis_option == "Popularity Trends Over Time":
54
+ generate_popularity_trends(filtered_df)
55
+ elif analysis_option == "Audio Features Analysis":
56
+ generate_audio_features(filtered_df)
57
+ elif analysis_option == "Genre & Artist Analysis":
58
+ generate_genre_analysis(filtered_df)
59
+ elif analysis_option == "Explicit Content Trends":
60
+ generate_explicit_trends(filtered_df)
61
+ elif analysis_option == "Album & Label Insights":
62
+ generate_album_insights(filtered_df)
63
+ elif analysis_option == "Tempo & Mood Analysis":
64
+ generate_tempo_mood(filtered_df)
65
+ elif analysis_option == "Top Artists and Songs":
66
+ generate_top_artists_songs(filtered_df)
67
+ elif analysis_option == "Album Release Trends":
68
+ generate_album_release_trends(filtered_df)
69
+ elif analysis_option == "Track Duration Analysis":
70
+ generate_duration_analysis(filtered_df)
71
+ elif analysis_option == "Streaming and Engagement Insights":
72
+ generate_streaming_insights(filtered_df)
73
+ elif analysis_option == "Feature Comparisons Across Decades":
74
+ generate_feature_comparisons(filtered_df)
75
+ elif analysis_option == "Network Analysis":
76
+ generate_network_analysis(filtered_df)
77
+
78
+ # Footer
79
+ # st.sidebar.markdown("Built with Streamlit by Grok 3 (xAI)")
 
 
 
 
 
 
 
assests/spotify-logo.png ADDED
functions/__pycache__/visualizations.cpython-310.pyc ADDED
Binary file (17.1 kB). View file
 
functions/visualizations.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ import networkx as nx
7
+ import plotly.graph_objects as go
8
+ from itertools import combinations
9
+
10
+
11
+ def generate_popularity_trends(df):
12
+ st.header("Popularity Trends Over Time")
13
+ tab1, tab2 = st.tabs(["Average Popularity", "Individual Songs"])
14
+ with tab1:
15
+ st.markdown("<span style='color:blue'>**Average Popularity by Decade**</span>: Tracks how song popularity has <span style='color:red'>changed over time</span>. This <span style='color:green'>blue</span> line chart highlights peaks.", unsafe_allow_html=True)
16
+ if 'Decade' in df.columns:
17
+ avg_pop_by_decade = df.groupby(
18
+ 'Decade')['Popularity'].mean().reset_index()
19
+ fig1 = px.line(avg_pop_by_decade, x='Decade', y='Popularity',
20
+ title='Average Popularity by Decade', color_discrete_sequence=['blue'])
21
+ fig1.update_layout(template='plotly_white', width=800, height=400)
22
+ st.plotly_chart(fig1)
23
+ else:
24
+ st.error("Cannot plot: 'Decade' column missing.")
25
+ with tab2:
26
+ st.markdown("<span style='color:blue'>**Song Popularity Over Time**</span>: Highlights individual trends with <span style='color:red'>red</span> points, showing <span style='color:green'>green</span> details on hover.", unsafe_allow_html=True)
27
+ if 'Year' in df.columns:
28
+ fig2 = px.scatter(df, x='Year', y='Popularity', title='Song Popularity Over Time', hover_data=[
29
+ 'Track Name', 'Artist Name(s)'], color_discrete_sequence=['red'])
30
+ fig2.update_layout(template='plotly_white', width=800, height=400)
31
+ st.plotly_chart(fig2)
32
+ else:
33
+ st.error("Cannot plot: 'Year' column missing.")
34
+
35
+
36
+ def generate_audio_features(df):
37
+ st.header("Audio Features Analysis")
38
+ feature = st.selectbox(
39
+ "Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness'])
40
+ tab1, tab2, tab3 = st.tabs(["Distribution", "By Decade", "Correlations"])
41
+ with tab1:
42
+ st.markdown(
43
+ f"<span style='color:blue'>**Distribution of {feature}**</span>: Shows variation in <span style='color:red'>{feature.lower()}</span> with <span style='color:green'>green</span> bars.", unsafe_allow_html=True)
44
+ fig3 = px.histogram(
45
+ df, x=feature, title=f'Distribution of {feature}', color_discrete_sequence=['green'])
46
+ fig3.update_layout(template='plotly_white', width=800, height=400)
47
+ st.plotly_chart(fig3)
48
+ with tab2:
49
+ st.markdown(
50
+ f"<span style='color:blue'>**{feature} by Decade**</span>: Compares <span style='color:red'>{feature.lower()}</span> across decades with <span style='color:green'>green</span> boxes.", unsafe_allow_html=True)
51
+ if 'Decade' in df.columns:
52
+ fig4 = px.box(df, x='Decade', y=feature,
53
+ title=f'{feature} Distribution by Decade', color_discrete_sequence=['green'])
54
+ fig4.update_layout(template='plotly_white', width=800, height=400)
55
+ st.plotly_chart(fig4)
56
+ else:
57
+ st.error("Cannot plot: 'Decade' column missing.")
58
+ with tab3:
59
+ st.markdown("<span style='color:blue'>**Feature Correlations**</span>: Explores relationships with <span style='color:red'>multi-colored</span> scatter points.", unsafe_allow_html=True)
60
+ fig, ax = plt.subplots()
61
+ sns.pairplot(df[['Energy', 'Danceability', 'Valence', 'Tempo']])
62
+ st.pyplot(fig)
63
+
64
+
65
+ def generate_genre_analysis(df):
66
+ st.header("Genre & Artist Analysis")
67
+ tab1, tab2, tab3 = st.tabs(
68
+ ["Top Genres", "Genre Distribution", "Artist Popularity"])
69
+ with tab1:
70
+ st.markdown("<span style='color:blue'>**Top Genres by Decade**</span>: Shows frequent genres with <span style='color:red'>red</span> bars, <span style='color:green'>green</span> highlights.", unsafe_allow_html=True)
71
+ if 'Decade' in df.columns:
72
+ genre_decade = df.explode('Genres').groupby(
73
+ ['Decade', 'Genres']).size().reset_index(name='Count')
74
+ top_genres = genre_decade.groupby('Decade').apply(
75
+ lambda x: x.nlargest(5, 'Count')).reset_index(drop=True)
76
+ fig5 = px.bar(top_genres, x='Decade', y='Count', color='Genres',
77
+ title='Top Genres by Decade', color_discrete_sequence=px.colors.qualitative.Set1)
78
+ fig5.update_layout(template='plotly_white', width=800, height=400)
79
+ st.plotly_chart(fig5)
80
+ else:
81
+ st.error("Cannot plot: 'Decade' column missing.")
82
+ with tab2:
83
+ st.markdown("<span style='color:blue'>**Genre Distribution**</span>: Breaks down genres with <span style='color:red'>multi-colored</span> pie slices.", unsafe_allow_html=True)
84
+ genre_counts = df.explode(
85
+ 'Genres')['Genres'].value_counts().reset_index()
86
+ fig6 = px.pie(genre_counts, values='count', names='Genres',
87
+ title='Genre Distribution', color_discrete_sequence=px.colors.qualitative.Set2)
88
+ fig6.update_layout(width=800, height=400)
89
+ st.plotly_chart(fig6)
90
+ with tab3:
91
+ st.markdown("<span style='color:blue'>**Artist Popularity Heatmap**</span>: Visualizes popularity with <span style='color:red'>red</span> intensity.", unsafe_allow_html=True)
92
+ if 'Artist Name(s)' in df.columns:
93
+ artist_pop = df.groupby('Artist Name(s)')[
94
+ 'Popularity'].mean().reset_index()
95
+ fig7 = px.imshow(pd.pivot_table(df, values='Popularity', index='Artist Name(s)', aggfunc='mean').fillna(
96
+ 0), title='Artist Popularity Heatmap', color_continuous_scale='Reds')
97
+ fig7.update_layout(width=800, height=400)
98
+ st.plotly_chart(fig7)
99
+ else:
100
+ st.error("Cannot plot: 'Artist Name(s)' column missing.")
101
+
102
+
103
+ def generate_explicit_trends(df):
104
+ st.header("Explicit Content Trends")
105
+ st.markdown("<span style='color:blue'>**Explicit vs Non-Explicit Songs**</span>: Compares content with <span style='color:red'>stacked bars</span> in <span style='color:green'>green</span> and <span style='color:purple'>purple</span>.", unsafe_allow_html=True)
106
+ if 'Decade' in df.columns and 'Explicit' in df.columns:
107
+ explicit_by_decade = df.groupby(
108
+ ['Decade', 'Explicit']).size().unstack().fillna(0)
109
+ fig8 = px.bar(explicit_by_decade, barmode='stack',
110
+ title='Explicit vs Non-Explicit Songs by Decade', color_discrete_sequence=['green', 'purple'])
111
+ fig8.update_layout(template='plotly_white', width=800, height=400)
112
+ st.plotly_chart(fig8)
113
+ else:
114
+ st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")
115
+
116
+
117
+ def generate_album_insights(df):
118
+ st.header("Album & Label Insights")
119
+ tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])
120
+ with tab1:
121
+ st.markdown("<span style='color:blue'>**Top Record Labels**</span>: Identifies labels with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
122
+ if 'Label' in df.columns:
123
+ top_labels = df['Label'].value_counts().nlargest(10).reset_index()
124
+ fig9 = px.bar(top_labels, x='Label', y='count',
125
+ title='Top Record Labels by Song Count', color_discrete_sequence=['blue'])
126
+ fig9.update_layout(template='plotly_white', width=800, height=400)
127
+ st.plotly_chart(fig9)
128
+ else:
129
+ st.error("Cannot plot: 'Label' column missing.")
130
+ with tab2:
131
+ st.markdown("<span style='color:blue'>**Album Popularity**</span>: Shows albums with <span style='color:red'>red</span> bubbles.", unsafe_allow_html=True)
132
+ if 'Album Name' in df.columns and 'Popularity' in df.columns:
133
+ album_pop = df.groupby('Album Name')['Popularity'].agg(
134
+ ['mean', 'count']).reset_index()
135
+ fig10 = px.scatter(album_pop, x='count', y='mean', size='mean', hover_data=[
136
+ 'Album Name'], title='Albums: Song Count vs Average Popularity', color_discrete_sequence=['red'])
137
+ fig10.update_layout(template='plotly_white', width=800, height=400)
138
+ st.plotly_chart(fig10)
139
+ else:
140
+ st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")
141
+
142
+
143
+ def generate_tempo_mood(df):
144
+ st.header("Tempo & Mood Analysis")
145
+ tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
146
+ with tab1:
147
+ st.markdown("<span style='color:blue'>**Tempo Trends**</span>: Tracks changes with <span style='color:red'>orange</span> line.", unsafe_allow_html=True)
148
+ if 'Year' in df.columns and 'Tempo' in df.columns:
149
+ tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
150
+ fig11 = px.line(tempo_by_year, x='Year', y='Tempo',
151
+ title='Average Tempo Over Time', color_discrete_sequence=['orange'])
152
+ fig11.update_layout(template='plotly_white', width=800, height=400)
153
+ st.plotly_chart(fig11)
154
+ else:
155
+ st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
156
+ with tab2:
157
+ st.markdown("<span style='color:blue'>**Valence vs Energy**</span>: Groups mood with <span style='color:red'>purple</span> points.", unsafe_allow_html=True)
158
+ if 'Valence' in df.columns and 'Energy' in df.columns:
159
+ fig12 = px.scatter(df, x='Valence', y='Energy', title='Valence vs Energy', hover_data=[
160
+ 'Track Name'], color_discrete_sequence=['purple'])
161
+ fig12.update_layout(template='plotly_white', width=800, height=400)
162
+ st.plotly_chart(fig12)
163
+ else:
164
+ st.error("Cannot plot: 'Valence' or 'Energy' column missing.")
165
+
166
+
167
+ def generate_top_artists_songs(df):
168
+ st.header("Top Artists and Songs")
169
+ tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
170
+ with tab1:
171
+ st.markdown("<span style='color:blue'>**Most Featured Artists**</span>: Shows artists with <span style='color:red'>green</span> bars.", unsafe_allow_html=True)
172
+ if 'Artist Name(s)' in df.columns:
173
+ top_artists = df['Artist Name(s)'].value_counts().nlargest(
174
+ 10).reset_index()
175
+ fig13 = px.bar(top_artists, x='Artist Name(s)', y='count',
176
+ title='Most Featured Artists', color_discrete_sequence=['green'])
177
+ fig13.update_layout(template='plotly_white', width=800, height=400)
178
+ st.plotly_chart(fig13)
179
+ else:
180
+ st.error("Cannot plot: 'Artist Name(s)' column missing.")
181
+ with tab2:
182
+ st.markdown(
183
+ "<span style='color:blue'>**Top 10 Songs**</span>: Lists songs with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
184
+ if 'Track Name' in df.columns and 'Popularity' in df.columns:
185
+ top_songs = df.nlargest(10, 'Popularity')[
186
+ ['Track Name', 'Popularity']]
187
+ fig14 = px.bar(top_songs, y='Track Name', x='Popularity', orientation='h',
188
+ title='Top 10 Songs by Popularity', color_discrete_sequence=['blue'])
189
+ fig14.update_layout(template='plotly_white', width=800, height=400)
190
+ st.plotly_chart(fig14)
191
+ else:
192
+ st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
193
+
194
+
195
+ def generate_album_release_trends(df):
196
+ st.header("Album Release Trends")
197
+ tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
198
+ with tab1:
199
+ st.markdown("<span style='color:blue'>**Albums per Year**</span>: Tracks releases with <span style='color:red'>purple</span> line.", unsafe_allow_html=True)
200
+ if 'Year' in df.columns:
201
+ albums_per_year = df['Year'].value_counts(
202
+ ).sort_index().reset_index()
203
+ fig15 = px.line(albums_per_year, x='Year', y='count',
204
+ title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
205
+ fig15.update_layout(template='plotly_white', width=800, height=400)
206
+ st.plotly_chart(fig15)
207
+ else:
208
+ st.error("Cannot plot: 'Year' column missing.")
209
+ with tab2:
210
+ st.markdown("<span style='color:blue'>**Songs by Artists and Years**</span>: Visualizes with <span style='color:red'>heatmap colors</span>.", unsafe_allow_html=True)
211
+ if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
212
+ artist_year = df.groupby(
213
+ ['Artist Name(s)', 'Year']).size().unstack().fillna(0)
214
+ fig16 = px.imshow(
215
+ artist_year, title='Songs Released by Artists Across Years', color_continuous_scale='Viridis')
216
+ fig16.update_layout(width=800, height=400)
217
+ st.plotly_chart(fig16)
218
+ else:
219
+ st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
220
+
221
+
222
+ def generate_duration_analysis(df):
223
+ st.header("Track Duration Analysis")
224
+ tab1, tab2 = st.tabs(["Distribution", "By Decade"])
225
+ with tab1:
226
+ st.markdown("<span style='color:blue'>**Track Duration Distribution**</span>: Shows lengths with <span style='color:red'>orange</span> bars.", unsafe_allow_html=True)
227
+ if 'Track Duration (ms)' in df.columns:
228
+ fig17 = px.histogram(df, x='Track Duration (ms)',
229
+ title='Distribution of Track Durations', color_discrete_sequence=['orange'])
230
+ fig17.update_layout(template='plotly_white', width=800, height=400)
231
+ st.plotly_chart(fig17)
232
+ else:
233
+ st.error("Cannot plot: 'Track Duration (ms)' column missing.")
234
+ with tab2:
235
+ st.markdown("<span style='color:blue'>**Duration by Decade**</span>: Compares with <span style='color:red'>green</span> boxes.", unsafe_allow_html=True)
236
+ if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
237
+ fig18 = px.box(df, x='Decade', y='Track Duration (ms)',
238
+ title='Track Duration by Decade', color_discrete_sequence=['green'])
239
+ fig18.update_layout(template='plotly_white', width=800, height=400)
240
+ st.plotly_chart(fig18)
241
+ else:
242
+ st.error(
243
+ "Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")
244
+
245
+
246
+ def generate_streaming_insights(df):
247
+ st.header("Streaming and Engagement Insights")
248
+ tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])
249
+ with tab1:
250
+ st.markdown("<span style='color:blue'>**Popularity vs Duration**</span>: Explores trends with <span style='color:red'>blue</span> scatter.", unsafe_allow_html=True)
251
+ if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
252
+ fig19 = px.scatter(df, x='Track Duration (ms)', y='Popularity',
253
+ title='Popularity vs Track Duration', color_discrete_sequence=['blue'])
254
+ fig19.update_layout(template='plotly_white', width=800, height=400)
255
+ st.plotly_chart(fig19)
256
+ else:
257
+ st.error(
258
+ "Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")
259
+ with tab2:
260
+ st.markdown("<span style='color:blue'>**Popularity by Time Signature**</span>: Compares with <span style='color:red'>purple</span> bars.", unsafe_allow_html=True)
261
+ if 'Time Signature' in df.columns and 'Popularity' in df.columns:
262
+ pop_by_time = df.groupby('Time Signature')[
263
+ 'Popularity'].mean().reset_index()
264
+ fig20 = px.bar(pop_by_time, x='Time Signature', y='Popularity',
265
+ title='Average Popularity by Time Signature', color_discrete_sequence=['purple'])
266
+ fig20.update_layout(template='plotly_white', width=800, height=400)
267
+ st.plotly_chart(fig20)
268
+ else:
269
+ st.error(
270
+ "Cannot plot: 'Time Signature' or 'Popularity' column missing.")
271
+
272
+
273
+ def generate_feature_comparisons(df):
274
+ st.header("Feature Comparisons Across Decades")
275
+ tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
276
+ with tab1:
277
+ st.markdown("<span style='color:blue'>**Feature Comparison**</span>: Compares features with <span style='color:red'>multi-colored</span> bars.", unsafe_allow_html=True)
278
+ if 'Decade' in df.columns:
279
+ features_by_decade = df.groupby(
280
+ 'Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
281
+ fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
282
+ barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
283
+ fig21.update_layout(template='plotly_white', width=800, height=400)
284
+ st.plotly_chart(fig21)
285
+ else:
286
+ st.error("Cannot plot: 'Decade' column missing.")
287
+ with tab2:
288
+ st.markdown("<span style='color:blue'>**Loudness Over Time**</span>: Tracks with <span style='color:red'>green</span> line.", unsafe_allow_html=True)
289
+ if 'Year' in df.columns and 'Loudness' in df.columns:
290
+ loudness_by_year = df.groupby(
291
+ 'Year')['Loudness'].mean().reset_index()
292
+ fig22 = px.line(loudness_by_year, x='Year', y='Loudness',
293
+ title='Average Loudness Over Time', color_discrete_sequence=['green'])
294
+ fig22.update_layout(template='plotly_white', width=800, height=400)
295
+ st.plotly_chart(fig22)
296
+ else:
297
+ st.error("Cannot plot: 'Year' or 'Loudness' column missing.")
298
+
299
+
300
+ def generate_network_analysis(df):
301
+ st.header("Network Analysis")
302
+ tab1, tab2 = st.tabs(["Artist Collaborations", "Genre Crossover"])
303
+ with tab1:
304
+ st.markdown("<span style='color:blue'>**Artist Collaborations**</span>: Visualizes connections with <span style='color:red'>interactive red nodes</span>. Hover for details.", unsafe_allow_html=True)
305
+ if 'Artist Name(s)' in df.columns:
306
+ # Filter out non-string values and handle missing data
307
+ valid_artists = df['Artist Name(s)'].dropna().astype(str)
308
+ G = nx.Graph()
309
+ for artists in valid_artists:
310
+ artists_list = [a.strip() for a in artists.split(
311
+ ',') if a.strip()] # Split and clean
312
+ if len(artists_list) > 1: # Check length of list
313
+ for a1, a2 in combinations(artists_list, 2):
314
+ G.add_edge(a1, a2)
315
+ if G.number_of_nodes() > 0:
316
+ # Convert to Plotly format
317
+ # Use spring layout for better spacing
318
+ pos = nx.spring_layout(G)
319
+ edge_x = []
320
+ edge_y = []
321
+ for edge in G.edges():
322
+ x0, y0 = pos[edge[0]]
323
+ x1, y1 = pos[edge[1]]
324
+ edge_x.extend([x0, x1, None])
325
+ edge_y.extend([y0, y1, None])
326
+
327
+ edge_trace = go.Scatter(
328
+ x=edge_x, y=edge_y,
329
+ line=dict(width=0.5, color='#888'),
330
+ hoverinfo='none',
331
+ mode='lines')
332
+
333
+ node_x = [pos[node][0] for node in G.nodes()]
334
+ node_y = [pos[node][1] for node in G.nodes()]
335
+ node_trace = go.Scatter(
336
+ x=node_x, y=node_y,
337
+ mode='markers+text',
338
+ hoverinfo='text',
339
+ marker=dict(size=10, color='red'),
340
+ text=list(G.nodes()),
341
+ textposition="top center")
342
+
343
+ fig = go.Figure(data=[edge_trace, node_trace],
344
+ layout=go.Layout(
345
+ title='Artist Collaborations',
346
+ showlegend=False,
347
+ hovermode='closest',
348
+ margin=dict(b=0, l=0, r=0, t=40),
349
+ width=800, height=600))
350
+ st.plotly_chart(fig)
351
+ else:
352
+ st.warning("No artist collaborations to display.")
353
+ else:
354
+ st.error("Cannot plot: 'Artist Name(s)' column missing.")
355
+ with tab2:
356
+ st.markdown("<span style='color:blue'>**Genre Crossover**</span>: Placeholder with <span style='color:red'>future multi-color</span> potential.", unsafe_allow_html=True)
357
+ st.write("To implement, install `holoviews` and use the following code:")
358
+ st.code("""
359
+ import holoviews as hv
360
+ hv.extension('bokeh')
361
+ genre_pairs = df.explode('Genres')[['Genres']].merge(df.explode('Genres')[['Genres']], how='cross')
362
+ chord_data = genre_pairs.groupby(['Genres_x', 'Genres_y']).size().reset_index(name='value')
363
+ chord = hv.Chord(chord_data).opts(title="Genre Crossover")
364
+ st.write(hv.render(chord, backend='bokeh'))
365
+ """)
models/__pycache__/data_processor.cpython-310.pyc ADDED
Binary file (1.66 kB). View file
 
models/data_processor.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+
4
+ def load_data():
5
+ try:
6
+ df = pd.read_csv('data/music_data.csv', on_bad_lines='skip')
7
+ st.write("**Raw Data Sample:**", df.head()) # Display raw data
8
+ except FileNotFoundError:
9
+ st.error("Error: 'data/music_data.csv' not found. Please ensure the file exists.")
10
+ return pd.DataFrame()
11
+ except Exception as e:
12
+ st.error(f"Error loading raw data: {e}")
13
+ return pd.DataFrame()
14
+
15
+ if df.empty:
16
+ st.warning("Warning: Loaded DataFrame is empty. Check the CSV content.")
17
+ return df
18
+
19
+ if 'Album Release Date' not in df.columns:
20
+ st.error("'Album Release Date' column missing from CSV")
21
+ return df
22
+
23
+ df['Year'] = pd.to_datetime(df['Album Release Date'], errors='coerce').dt.year
24
+ df['Year'] = df['Year'].fillna(0).astype(int)
25
+ df['Decade'] = (df['Year'] // 10 * 10).astype(int)
26
+
27
+ df['Genres'] = df['Artist Genres'].fillna('Unknown').str.split(',').apply(lambda x: [g.strip() for g in x])
28
+ df['Popularity'] = pd.to_numeric(df['Popularity'], errors='coerce').fillna(0)
29
+
30
+ if 'Decade' not in df.columns:
31
+ st.error("Failed to create 'Decade' column")
32
+ return df
33
+ st.write("**Processed Data Sample:**", df[['Track Name', 'Year', 'Decade', 'Popularity']].head())
34
+
35
+ return df