Spaces:

Ezhil24
/

DataVisualizatioin_spotify

Sleeping

App Files Files Community

Ezhil commited on Mar 4

Commit

10d82a8

1 Parent(s): 97fec97

Initial commit-folder structure

Browse files

Files changed (8) hide show

README.md +10 -0
REQUIREMENTS.txt +6 -3
app.py +75 -114
assests/spotify-logo.png +0 -0
functions/__pycache__/visualizations.cpython-310.pyc +0 -0
functions/visualizations.py +365 -0
models/__pycache__/data_processor.cpython-310.pyc +0 -0
models/data_processor.py +35 -0

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: DataVisualizatioin Spotify
+emoji: 🚀
+colorFrom: purple
+colorTo: purple
+sdk: streamlit
+sdk_version: 1.42.2
+app_file: app.py
+pinned: false
+---

REQUIREMENTS.txt CHANGED Viewed

@@ -1,3 +1,6 @@
-streamlit
-pandas
-plotly

+streamlit==1.31.1
+pandas==2.2.1
+plotly==5.20.0
+seaborn==0.13.2
+matplotlib==3.8.3
+networkx==3.2.1

app.py CHANGED Viewed

@@ -1,118 +1,79 @@
 import streamlit as st
 import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from datetime import datetime
-import os
-# Set page configuration
-st.set_page_config(page_title="Music Popularity Trends", layout="wide")
-# Title
-st.title("Music Popularity Trends Over Time")
-# Load the data from the 'data' folder
-@st.cache_data
-def load_data():
-    # Define the path to the data folder
-    data_path = os.path.join(os.getcwd(), 'data', 'music_data.csv')
-    # Load the CSV file
-    data = pd.read_csv(data_path)
-    # Convert Album Release Date to datetime
-    data['Album Release Date'] = pd.to_datetime(data['Album Release Date'], errors='coerce')
-    # Extract year and decade
-    data['Year'] = data['Album Release Date'].dt.year
-    data['Decade'] = (data['Year'] // 10) * 10
-    return data
-# Load data
-try:
-    df = load_data()
-except FileNotFoundError:
-    st.error("Error: 'music_data.csv' not found in the 'data' folder. Please ensure the file exists.")
-    st.stop()
-# Sidebar for filtering
-st.sidebar.header("Filter Options")
-min_year = int(df['Year'].min())
-max_year = int(df['Year'].max())
-year_range = st.sidebar.slider(
-    "Select Year Range",
-    min_year,
-    max_year,
-    (min_year, max_year)
-)
-# Filter data based on year range
-filtered_df = df[
-    (df['Year'] >= year_range[0]) &
-    (df['Year'] <= year_range[1])
-]
-# 1. Line Chart - Average Popularity by Decade
-st.header("Average Popularity by Decade")
-decade_avg = filtered_df.groupby('Decade')['Popularity'].mean().reset_index()
-fig_line = px.line(
-    decade_avg,
-    x='Decade',
-    y='Popularity',
-    title='Average Song Popularity by Decade',
-    labels={'Popularity': 'Average Popularity', 'Decade': 'Decade'},
-    template='plotly_white'
-)
-fig_line.update_layout(
-    xaxis=dict(tickmode='linear', dtick=10),
-    yaxis=dict(range=[0, 100])
 )
-st.plotly_chart(fig_line, use_container_width=True)
-# 2. Scatter Plot - Individual Song Popularity Over Time
-st.header("Individual Song Popularity Over Time")
-fig_scatter = px.scatter(
-    filtered_df,
-    x='Album Release Date',
-    y='Popularity',
-    hover_data=['Track Name', 'Artist Name(s)'],
-    title='Song Popularity by Release Date',
-    labels={'Album Release Date': 'Release Date', 'Popularity': 'Popularity'},
-    template='plotly_white'
-)
-fig_scatter.update_traces(
-    marker=dict(size=8, opacity=0.6),
-    selector=dict(mode='markers')
-)
-fig_scatter.update_layout(
-    yaxis=dict(range=[0, 100]),
-    showlegend=False
-)
-st.plotly_chart(fig_scatter, use_container_width=True)
-# Additional Insights
-st.header("Key Insights")
-col1, col2 = st.columns(2)
-with col1:
-    st.subheader("Most Popular Decade")
-    most_popular_decade = decade_avg.loc[decade_avg['Popularity'].idxmax()]
-    st.write(f"Decade: {int(most_popular_decade['Decade'])}s")
-    st.write(f"Average Popularity: {most_popular_decade['Popularity']:.1f}")
-with col2:
-    st.subheader("Most Popular Song")
-    most_popular_song = filtered_df.loc[filtered_df['Popularity'].idxmax()]
-    st.write(f"Track: {most_popular_song['Track Name']}")
-    st.write(f"Artist: {most_popular_song['Artist Name(s)']}")
-    st.write(f"Popularity: {most_popular_song['Popularity']}")
-    st.write(f"Release Year: {int(most_popular_song['Year'])}")
-# Notes
-st.markdown("""
-**Notes:**
-- Popularity scores range from 0 to 100
-""")

 import streamlit as st
 import pandas as pd
+import base64
+from models.data_processor import load_data
+from functions.visualizations import generate_popularity_trends, generate_audio_features, generate_genre_analysis, \
+    generate_explicit_trends, generate_album_insights, generate_tempo_mood, generate_top_artists_songs, \
+    generate_album_release_trends, generate_duration_analysis, generate_streaming_insights, \
+    generate_feature_comparisons, generate_network_analysis
+# Load data and display raw sample at the top
+df = load_data()
+if not df.empty:
+    st.write("**Raw Data Sample:**", df.head())  # Display raw data sample
+else:
+    st.error("Failed to load raw data. Check the 'data/music_data.csv' file.")
+# Sidebar
+st.sidebar.title("Music Data Analysis")
+# st.sidebar.markdown("[View Raw Data]('data/music_data.csv')", unsafe_allow_html=True)  # Replace with your Google Drive ID
+analysis_option = st.sidebar.selectbox(
+    "Choose Analysis",
+    [
+        "Popularity Trends Over Time",
+        "Audio Features Analysis",
+        "Genre & Artist Analysis",
+        "Explicit Content Trends",
+        "Album & Label Insights",
+        "Tempo & Mood Analysis",
+        "Top Artists and Songs",
+        "Album Release Trends",
+        "Track Duration Analysis",
+        "Streaming and Engagement Insights",
+        "Feature Comparisons Across Decades",
+        "Network Analysis"
+    ]
 )
+st.sidebar.subheader("Filters")
+if not df.empty and 'Decade' in df.columns:
+    decades = st.sidebar.multiselect("Select Decades", sorted(df['Decade'].unique()),
+                                     default=sorted(df['Decade'].unique()))
+    filtered_df = df[df['Decade'].isin(decades)] if decades else df
+else:
+    st.sidebar.warning(
+        "No data loaded or 'Decade' column missing. Check the 'data' folder.")
+    filtered_df = pd.DataFrame()
+# Main content
+# st.image("assets/spotify-logo.png", width=100)  # Spotify logo
+st.title("Music Data Analysis Dashboard")
+st.markdown("Explore trends and insights from a diverse music dataset.")
+if analysis_option == "Popularity Trends Over Time":
+    generate_popularity_trends(filtered_df)
+elif analysis_option == "Audio Features Analysis":
+    generate_audio_features(filtered_df)
+elif analysis_option == "Genre & Artist Analysis":
+    generate_genre_analysis(filtered_df)
+elif analysis_option == "Explicit Content Trends":
+    generate_explicit_trends(filtered_df)
+elif analysis_option == "Album & Label Insights":
+    generate_album_insights(filtered_df)
+elif analysis_option == "Tempo & Mood Analysis":
+    generate_tempo_mood(filtered_df)
+elif analysis_option == "Top Artists and Songs":
+    generate_top_artists_songs(filtered_df)
+elif analysis_option == "Album Release Trends":
+    generate_album_release_trends(filtered_df)
+elif analysis_option == "Track Duration Analysis":
+    generate_duration_analysis(filtered_df)
+elif analysis_option == "Streaming and Engagement Insights":
+    generate_streaming_insights(filtered_df)
+elif analysis_option == "Feature Comparisons Across Decades":
+    generate_feature_comparisons(filtered_df)
+elif analysis_option == "Network Analysis":
+    generate_network_analysis(filtered_df)
+# Footer
+# st.sidebar.markdown("Built with Streamlit by Grok 3 (xAI)")

assests/spotify-logo.png ADDED Viewed

functions/__pycache__/visualizations.cpython-310.pyc ADDED Viewed

Binary file (17.1 kB). View file

functions/visualizations.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import seaborn as sns
+import matplotlib.pyplot as plt
+import networkx as nx
+import plotly.graph_objects as go
+from itertools import combinations
+def generate_popularity_trends(df):
+    st.header("Popularity Trends Over Time")
+    tab1, tab2 = st.tabs(["Average Popularity", "Individual Songs"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Average Popularity by Decade**</span>: Tracks how song popularity has <span style='color:red'>changed over time</span>. This <span style='color:green'>blue</span> line chart highlights peaks.", unsafe_allow_html=True)
+        if 'Decade' in df.columns:
+            avg_pop_by_decade = df.groupby(
+                'Decade')['Popularity'].mean().reset_index()
+            fig1 = px.line(avg_pop_by_decade, x='Decade', y='Popularity',
+                           title='Average Popularity by Decade', color_discrete_sequence=['blue'])
+            fig1.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig1)
+        else:
+            st.error("Cannot plot: 'Decade' column missing.")
+    with tab2:
+        st.markdown("<span style='color:blue'>**Song Popularity Over Time**</span>: Highlights individual trends with <span style='color:red'>red</span> points, showing <span style='color:green'>green</span> details on hover.", unsafe_allow_html=True)
+        if 'Year' in df.columns:
+            fig2 = px.scatter(df, x='Year', y='Popularity', title='Song Popularity Over Time', hover_data=[
+                              'Track Name', 'Artist Name(s)'], color_discrete_sequence=['red'])
+            fig2.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig2)
+        else:
+            st.error("Cannot plot: 'Year' column missing.")
+def generate_audio_features(df):
+    st.header("Audio Features Analysis")
+    feature = st.selectbox(
+        "Select Feature", ['Danceability', 'Energy', 'Tempo', 'Loudness'])
+    tab1, tab2, tab3 = st.tabs(["Distribution", "By Decade", "Correlations"])
+    with tab1:
+        st.markdown(
+            f"<span style='color:blue'>**Distribution of {feature}**</span>: Shows variation in <span style='color:red'>{feature.lower()}</span> with <span style='color:green'>green</span> bars.", unsafe_allow_html=True)
+        fig3 = px.histogram(
+            df, x=feature, title=f'Distribution of {feature}', color_discrete_sequence=['green'])
+        fig3.update_layout(template='plotly_white', width=800, height=400)
+        st.plotly_chart(fig3)
+    with tab2:
+        st.markdown(
+            f"<span style='color:blue'>**{feature} by Decade**</span>: Compares <span style='color:red'>{feature.lower()}</span> across decades with <span style='color:green'>green</span> boxes.", unsafe_allow_html=True)
+        if 'Decade' in df.columns:
+            fig4 = px.box(df, x='Decade', y=feature,
+                          title=f'{feature} Distribution by Decade', color_discrete_sequence=['green'])
+            fig4.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig4)
+        else:
+            st.error("Cannot plot: 'Decade' column missing.")
+    with tab3:
+        st.markdown("<span style='color:blue'>**Feature Correlations**</span>: Explores relationships with <span style='color:red'>multi-colored</span> scatter points.", unsafe_allow_html=True)
+        fig, ax = plt.subplots()
+        sns.pairplot(df[['Energy', 'Danceability', 'Valence', 'Tempo']])
+        st.pyplot(fig)
+def generate_genre_analysis(df):
+    st.header("Genre & Artist Analysis")
+    tab1, tab2, tab3 = st.tabs(
+        ["Top Genres", "Genre Distribution", "Artist Popularity"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Top Genres by Decade**</span>: Shows frequent genres with <span style='color:red'>red</span> bars, <span style='color:green'>green</span> highlights.", unsafe_allow_html=True)
+        if 'Decade' in df.columns:
+            genre_decade = df.explode('Genres').groupby(
+                ['Decade', 'Genres']).size().reset_index(name='Count')
+            top_genres = genre_decade.groupby('Decade').apply(
+                lambda x: x.nlargest(5, 'Count')).reset_index(drop=True)
+            fig5 = px.bar(top_genres, x='Decade', y='Count', color='Genres',
+                          title='Top Genres by Decade', color_discrete_sequence=px.colors.qualitative.Set1)
+            fig5.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig5)
+        else:
+            st.error("Cannot plot: 'Decade' column missing.")
+    with tab2:
+        st.markdown("<span style='color:blue'>**Genre Distribution**</span>: Breaks down genres with <span style='color:red'>multi-colored</span> pie slices.", unsafe_allow_html=True)
+        genre_counts = df.explode(
+            'Genres')['Genres'].value_counts().reset_index()
+        fig6 = px.pie(genre_counts, values='count', names='Genres',
+                      title='Genre Distribution', color_discrete_sequence=px.colors.qualitative.Set2)
+        fig6.update_layout(width=800, height=400)
+        st.plotly_chart(fig6)
+    with tab3:
+        st.markdown("<span style='color:blue'>**Artist Popularity Heatmap**</span>: Visualizes popularity with <span style='color:red'>red</span> intensity.", unsafe_allow_html=True)
+        if 'Artist Name(s)' in df.columns:
+            artist_pop = df.groupby('Artist Name(s)')[
+                'Popularity'].mean().reset_index()
+            fig7 = px.imshow(pd.pivot_table(df, values='Popularity', index='Artist Name(s)', aggfunc='mean').fillna(
+                0), title='Artist Popularity Heatmap', color_continuous_scale='Reds')
+            fig7.update_layout(width=800, height=400)
+            st.plotly_chart(fig7)
+        else:
+            st.error("Cannot plot: 'Artist Name(s)' column missing.")
+def generate_explicit_trends(df):
+    st.header("Explicit Content Trends")
+    st.markdown("<span style='color:blue'>**Explicit vs Non-Explicit Songs**</span>: Compares content with <span style='color:red'>stacked bars</span> in <span style='color:green'>green</span> and <span style='color:purple'>purple</span>.", unsafe_allow_html=True)
+    if 'Decade' in df.columns and 'Explicit' in df.columns:
+        explicit_by_decade = df.groupby(
+            ['Decade', 'Explicit']).size().unstack().fillna(0)
+        fig8 = px.bar(explicit_by_decade, barmode='stack',
+                      title='Explicit vs Non-Explicit Songs by Decade', color_discrete_sequence=['green', 'purple'])
+        fig8.update_layout(template='plotly_white', width=800, height=400)
+        st.plotly_chart(fig8)
+    else:
+        st.error("Cannot plot: 'Decade' or 'Explicit' column missing.")
+def generate_album_insights(df):
+    st.header("Album & Label Insights")
+    tab1, tab2 = st.tabs(["Top Labels", "Album Popularity"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Top Record Labels**</span>: Identifies labels with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
+        if 'Label' in df.columns:
+            top_labels = df['Label'].value_counts().nlargest(10).reset_index()
+            fig9 = px.bar(top_labels, x='Label', y='count',
+                          title='Top Record Labels by Song Count', color_discrete_sequence=['blue'])
+            fig9.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig9)
+        else:
+            st.error("Cannot plot: 'Label' column missing.")
+    with tab2:
+        st.markdown("<span style='color:blue'>**Album Popularity**</span>: Shows albums with <span style='color:red'>red</span> bubbles.", unsafe_allow_html=True)
+        if 'Album Name' in df.columns and 'Popularity' in df.columns:
+            album_pop = df.groupby('Album Name')['Popularity'].agg(
+                ['mean', 'count']).reset_index()
+            fig10 = px.scatter(album_pop, x='count', y='mean', size='mean', hover_data=[
+                               'Album Name'], title='Albums: Song Count vs Average Popularity', color_discrete_sequence=['red'])
+            fig10.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig10)
+        else:
+            st.error("Cannot plot: 'Album Name' or 'Popularity' column missing.")
+def generate_tempo_mood(df):
+    st.header("Tempo & Mood Analysis")
+    tab1, tab2 = st.tabs(["Tempo Trends", "Mood Scatter"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Tempo Trends**</span>: Tracks changes with <span style='color:red'>orange</span> line.", unsafe_allow_html=True)
+        if 'Year' in df.columns and 'Tempo' in df.columns:
+            tempo_by_year = df.groupby('Year')['Tempo'].mean().reset_index()
+            fig11 = px.line(tempo_by_year, x='Year', y='Tempo',
+                            title='Average Tempo Over Time', color_discrete_sequence=['orange'])
+            fig11.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig11)
+        else:
+            st.error("Cannot plot: 'Year' or 'Tempo' column missing.")
+    with tab2:
+        st.markdown("<span style='color:blue'>**Valence vs Energy**</span>: Groups mood with <span style='color:red'>purple</span> points.", unsafe_allow_html=True)
+        if 'Valence' in df.columns and 'Energy' in df.columns:
+            fig12 = px.scatter(df, x='Valence', y='Energy', title='Valence vs Energy', hover_data=[
+                               'Track Name'], color_discrete_sequence=['purple'])
+            fig12.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig12)
+        else:
+            st.error("Cannot plot: 'Valence' or 'Energy' column missing.")
+def generate_top_artists_songs(df):
+    st.header("Top Artists and Songs")
+    tab1, tab2 = st.tabs(["Top Artists", "Top Songs"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Most Featured Artists**</span>: Shows artists with <span style='color:red'>green</span> bars.", unsafe_allow_html=True)
+        if 'Artist Name(s)' in df.columns:
+            top_artists = df['Artist Name(s)'].value_counts().nlargest(
+                10).reset_index()
+            fig13 = px.bar(top_artists, x='Artist Name(s)', y='count',
+                           title='Most Featured Artists', color_discrete_sequence=['green'])
+            fig13.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig13)
+        else:
+            st.error("Cannot plot: 'Artist Name(s)' column missing.")
+    with tab2:
+        st.markdown(
+            "<span style='color:blue'>**Top 10 Songs**</span>: Lists songs with <span style='color:red'>blue</span> bars.", unsafe_allow_html=True)
+        if 'Track Name' in df.columns and 'Popularity' in df.columns:
+            top_songs = df.nlargest(10, 'Popularity')[
+                ['Track Name', 'Popularity']]
+            fig14 = px.bar(top_songs, y='Track Name', x='Popularity', orientation='h',
+                           title='Top 10 Songs by Popularity', color_discrete_sequence=['blue'])
+            fig14.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig14)
+        else:
+            st.error("Cannot plot: 'Track Name' or 'Popularity' column missing.")
+def generate_album_release_trends(df):
+    st.header("Album Release Trends")
+    tab1, tab2 = st.tabs(["Albums per Year", "Artist-Year Heatmap"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Albums per Year**</span>: Tracks releases with <span style='color:red'>purple</span> line.", unsafe_allow_html=True)
+        if 'Year' in df.columns:
+            albums_per_year = df['Year'].value_counts(
+            ).sort_index().reset_index()
+            fig15 = px.line(albums_per_year, x='Year', y='count',
+                            title='Number of Albums Released per Year', color_discrete_sequence=['purple'])
+            fig15.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig15)
+        else:
+            st.error("Cannot plot: 'Year' column missing.")
+    with tab2:
+        st.markdown("<span style='color:blue'>**Songs by Artists and Years**</span>: Visualizes with <span style='color:red'>heatmap colors</span>.", unsafe_allow_html=True)
+        if 'Artist Name(s)' in df.columns and 'Year' in df.columns:
+            artist_year = df.groupby(
+                ['Artist Name(s)', 'Year']).size().unstack().fillna(0)
+            fig16 = px.imshow(
+                artist_year, title='Songs Released by Artists Across Years', color_continuous_scale='Viridis')
+            fig16.update_layout(width=800, height=400)
+            st.plotly_chart(fig16)
+        else:
+            st.error("Cannot plot: 'Artist Name(s)' or 'Year' column missing.")
+def generate_duration_analysis(df):
+    st.header("Track Duration Analysis")
+    tab1, tab2 = st.tabs(["Distribution", "By Decade"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Track Duration Distribution**</span>: Shows lengths with <span style='color:red'>orange</span> bars.", unsafe_allow_html=True)
+        if 'Track Duration (ms)' in df.columns:
+            fig17 = px.histogram(df, x='Track Duration (ms)',
+                                 title='Distribution of Track Durations', color_discrete_sequence=['orange'])
+            fig17.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig17)
+        else:
+            st.error("Cannot plot: 'Track Duration (ms)' column missing.")
+    with tab2:
+        st.markdown("<span style='color:blue'>**Duration by Decade**</span>: Compares with <span style='color:red'>green</span> boxes.", unsafe_allow_html=True)
+        if 'Decade' in df.columns and 'Track Duration (ms)' in df.columns:
+            fig18 = px.box(df, x='Decade', y='Track Duration (ms)',
+                           title='Track Duration by Decade', color_discrete_sequence=['green'])
+            fig18.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig18)
+        else:
+            st.error(
+                "Cannot plot: 'Decade' or 'Track Duration (ms)' column missing.")
+def generate_streaming_insights(df):
+    st.header("Streaming and Engagement Insights")
+    tab1, tab2 = st.tabs(["Popularity vs Duration", "Time Signature"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Popularity vs Duration**</span>: Explores trends with <span style='color:red'>blue</span> scatter.", unsafe_allow_html=True)
+        if 'Track Duration (ms)' in df.columns and 'Popularity' in df.columns:
+            fig19 = px.scatter(df, x='Track Duration (ms)', y='Popularity',
+                               title='Popularity vs Track Duration', color_discrete_sequence=['blue'])
+            fig19.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig19)
+        else:
+            st.error(
+                "Cannot plot: 'Track Duration (ms)' or 'Popularity' column missing.")
+    with tab2:
+        st.markdown("<span style='color:blue'>**Popularity by Time Signature**</span>: Compares with <span style='color:red'>purple</span> bars.", unsafe_allow_html=True)
+        if 'Time Signature' in df.columns and 'Popularity' in df.columns:
+            pop_by_time = df.groupby('Time Signature')[
+                'Popularity'].mean().reset_index()
+            fig20 = px.bar(pop_by_time, x='Time Signature', y='Popularity',
+                           title='Average Popularity by Time Signature', color_discrete_sequence=['purple'])
+            fig20.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig20)
+        else:
+            st.error(
+                "Cannot plot: 'Time Signature' or 'Popularity' column missing.")
+def generate_feature_comparisons(df):
+    st.header("Feature Comparisons Across Decades")
+    tab1, tab2 = st.tabs(["Feature Comparison", "Loudness Trends"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Feature Comparison**</span>: Compares features with <span style='color:red'>multi-colored</span> bars.", unsafe_allow_html=True)
+        if 'Decade' in df.columns:
+            features_by_decade = df.groupby(
+                'Decade')[['Danceability', 'Energy', 'Valence']].mean().reset_index()
+            fig21 = px.bar(features_by_decade.melt(id_vars='Decade'), x='Decade', y='value', color='variable',
+                           barmode='group', title='Feature Comparison by Decade', color_discrete_sequence=px.colors.qualitative.Pastel)
+            fig21.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig21)
+        else:
+            st.error("Cannot plot: 'Decade' column missing.")
+    with tab2:
+        st.markdown("<span style='color:blue'>**Loudness Over Time**</span>: Tracks with <span style='color:red'>green</span> line.", unsafe_allow_html=True)
+        if 'Year' in df.columns and 'Loudness' in df.columns:
+            loudness_by_year = df.groupby(
+                'Year')['Loudness'].mean().reset_index()
+            fig22 = px.line(loudness_by_year, x='Year', y='Loudness',
+                            title='Average Loudness Over Time', color_discrete_sequence=['green'])
+            fig22.update_layout(template='plotly_white', width=800, height=400)
+            st.plotly_chart(fig22)
+        else:
+            st.error("Cannot plot: 'Year' or 'Loudness' column missing.")
+def generate_network_analysis(df):
+    st.header("Network Analysis")
+    tab1, tab2 = st.tabs(["Artist Collaborations", "Genre Crossover"])
+    with tab1:
+        st.markdown("<span style='color:blue'>**Artist Collaborations**</span>: Visualizes connections with <span style='color:red'>interactive red nodes</span>. Hover for details.", unsafe_allow_html=True)
+        if 'Artist Name(s)' in df.columns:
+            # Filter out non-string values and handle missing data
+            valid_artists = df['Artist Name(s)'].dropna().astype(str)
+            G = nx.Graph()
+            for artists in valid_artists:
+                artists_list = [a.strip() for a in artists.split(
+                    ',') if a.strip()]  # Split and clean
+                if len(artists_list) > 1:  # Check length of list
+                    for a1, a2 in combinations(artists_list, 2):
+                        G.add_edge(a1, a2)
+            if G.number_of_nodes() > 0:
+                # Convert to Plotly format
+                # Use spring layout for better spacing
+                pos = nx.spring_layout(G)
+                edge_x = []
+                edge_y = []
+                for edge in G.edges():
+                    x0, y0 = pos[edge[0]]
+                    x1, y1 = pos[edge[1]]
+                    edge_x.extend([x0, x1, None])
+                    edge_y.extend([y0, y1, None])
+                edge_trace = go.Scatter(
+                    x=edge_x, y=edge_y,
+                    line=dict(width=0.5, color='#888'),
+                    hoverinfo='none',
+                    mode='lines')
+                node_x = [pos[node][0] for node in G.nodes()]
+                node_y = [pos[node][1] for node in G.nodes()]
+                node_trace = go.Scatter(
+                    x=node_x, y=node_y,
+                    mode='markers+text',
+                    hoverinfo='text',
+                    marker=dict(size=10, color='red'),
+                    text=list(G.nodes()),
+                    textposition="top center")
+                fig = go.Figure(data=[edge_trace, node_trace],
+                                layout=go.Layout(
+                    title='Artist Collaborations',
+                    showlegend=False,
+                    hovermode='closest',
+                    margin=dict(b=0, l=0, r=0, t=40),
+                    width=800, height=600))
+                st.plotly_chart(fig)
+            else:
+                st.warning("No artist collaborations to display.")
+        else:
+            st.error("Cannot plot: 'Artist Name(s)' column missing.")
+    with tab2:
+        st.markdown("<span style='color:blue'>**Genre Crossover**</span>: Placeholder with <span style='color:red'>future multi-color</span> potential.", unsafe_allow_html=True)
+        st.write("To implement, install `holoviews` and use the following code:")
+        st.code("""
+        import holoviews as hv
+        hv.extension('bokeh')
+        genre_pairs = df.explode('Genres')[['Genres']].merge(df.explode('Genres')[['Genres']], how='cross')
+        chord_data = genre_pairs.groupby(['Genres_x', 'Genres_y']).size().reset_index(name='value')
+        chord = hv.Chord(chord_data).opts(title="Genre Crossover")
+        st.write(hv.render(chord, backend='bokeh'))
+        """)

models/__pycache__/data_processor.cpython-310.pyc ADDED Viewed

Binary file (1.66 kB). View file

models/data_processor.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import pandas as pd
+import streamlit as st
+def load_data():
+    try:
+        df = pd.read_csv('data/music_data.csv', on_bad_lines='skip')
+        st.write("**Raw Data Sample:**", df.head())  # Display raw data
+    except FileNotFoundError:
+        st.error("Error: 'data/music_data.csv' not found. Please ensure the file exists.")
+        return pd.DataFrame()
+    except Exception as e:
+        st.error(f"Error loading raw data: {e}")
+        return pd.DataFrame()
+    if df.empty:
+        st.warning("Warning: Loaded DataFrame is empty. Check the CSV content.")
+        return df
+    if 'Album Release Date' not in df.columns:
+        st.error("'Album Release Date' column missing from CSV")
+        return df
+    df['Year'] = pd.to_datetime(df['Album Release Date'], errors='coerce').dt.year
+    df['Year'] = df['Year'].fillna(0).astype(int)
+    df['Decade'] = (df['Year'] // 10 * 10).astype(int)
+    df['Genres'] = df['Artist Genres'].fillna('Unknown').str.split(',').apply(lambda x: [g.strip() for g in x])
+    df['Popularity'] = pd.to_numeric(df['Popularity'], errors='coerce').fillna(0)
+    if 'Decade' not in df.columns:
+        st.error("Failed to create 'Decade' column")
+        return df
+    st.write("**Processed Data Sample:**", df[['Track Name', 'Year', 'Decade', 'Popularity']].head())
+    return df