Spaces:

EDS-lab
/

Transparency_Plus

Running

App Files Files Community

mmmapms commited on Oct 18, 2024

Commit

b18c422

verified ·

1 Parent(s): ca0370b

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -47

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ from datetime import datetime
 import folium
 import seaborn as sns
 from streamlit_folium import st_folium
 def get_current_time():
@@ -156,15 +157,6 @@ if selected_country != 'Overall':
     st.sidebar.subheader("Section")
     st.sidebar.caption("Select the type of information you want to explore.")
     section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1)
-    date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
-                                   value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
-    if len(date_range) == 2:
-        start_date = pd.Timestamp(date_range[0])
-        end_date = pd.Timestamp(date_range[1])
-    else:
-        st.error("Please select a valid date range.")
-        st.stop()
 else:
     section = None  # No section is shown when "Overall" is selected
@@ -218,8 +210,12 @@ if section == 'Data Quality':
     st.header('Data Quality')
-    st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
-    data_quality=data.iloc[:-28]
     # Report % of missing values
     missing_values = data_quality[forecast_columns].isna().mean() * 100
@@ -315,7 +311,7 @@ elif section == 'Forecasts Quality':
     # Scatter plots for error distribution
     st.subheader('Error Distribution')
-    st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
     selected_variable = st.selectbox("Select Variable for Error Distribution", list(variable_options.keys()))
     # Get the corresponding columns for the selected variable
@@ -329,7 +325,7 @@ elif section == 'Forecasts Quality':
         # Calculate error and plot
         error = pred - obs
-        fig = px.scatter(x=obs, y=error, labels={'x': 'Observed [MW]', 'y': 'Error of Forecast ENTSO-E [MW]'})
         fig.update_layout(title=f'Error Distribution for {selected_variable}')
         st.plotly_chart(fig)
@@ -338,7 +334,21 @@ elif section == 'Forecasts Quality':
     st.subheader('Accuracy Metrics (Sorted by rMAE):')
-    output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
     st.write(output_text)
     data = data.loc[start_date:end_date]
@@ -424,7 +434,7 @@ elif section == 'Forecasts Quality':
         st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False}, className="small-chart")
     st.subheader('ACF plots of Errors')
-    st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
     # Dropdown to select the variable
     selected_variable = st.selectbox("Select Variable for ACF of Errors", list(variable_options.keys()))
@@ -449,20 +459,28 @@ elif section == 'Forecasts Quality':
 # Section 3: Insights
 elif section == 'Insights':
     st.header("Insights")
-    st.write("""
-    This section provides insights derived from the data and forecasts.
-    You can visualize trends, anomalies, and other important findings.
-    """)
-    # Scatter plots for correlation between wind, solar, and load
-    st.subheader('Correlation between Wind, Solar, Load and Weather Features')
-    st.write('The below scatter plots are made for checking whether there exists a correlation between the data fields obtained: Solar, Wind, Load and Weather Features.')
-    selected_columns=['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
-    selected_df=data[selected_columns]
     selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
     selected_df = selected_df.dropna()
-    print(selected_df)
     sns.set_theme(style="ticks")
     pairplot_fig = sns.pairplot(selected_df)
@@ -470,19 +488,6 @@ elif section == 'Insights':
     st.pyplot(pairplot_fig)
 elif selected_country == 'Overall':
-    st.markdown(
-        """
-        <style>
-        .main-container {
-            padding-top: 0px;  /* Remove extra padding at the top */
-        }
-        .chart-spacing {
-            margin-top: -40px;  /* Adjust this value to control spacing between map and radar plot */
-        }
-        </style>
-        """,
-        unsafe_allow_html=True
-    )
     st.subheader("Net Load Error Map")
     st.write("""
@@ -544,7 +549,7 @@ elif selected_country == 'Overall':
             fill_opacity=0.7,
             line_opacity=0.5,
             line_color="black",  # Neutral border color
-            legend_name="Net Load Error"
         ).add_to(m)
         # Add a GeoJson layer with custom tooltip for country, error, and date
@@ -553,7 +558,7 @@ elif selected_country == 'Overall':
             style_function=lambda x: {'fillOpacity': 0, 'color': 'black', 'weight': 0},
             tooltip=folium.GeoJsonTooltip(
                 fields=["name", "net_load_error", "date"],
-                aliases=["Country:", "Net Load Error:", "Date:"],
                 localize=True
             )
         ).add_to(m)
@@ -568,15 +573,10 @@ elif selected_country == 'Overall':
         'Germany': Data_DE,
         'Netherlands': Data_NL
     }
-    # Call the function to plot the map
     plot_net_load_error_map(data_dict)
-    # CSS to adjust layout and remove extra spacing
     st.subheader("rMAE of Forecasts published on ENTSO-E TP")
-    st.write("""
-        The radar chart below compares the forecast accuracy across Load, Onshore Wind, Offshore Wind, and Solar for each country.
-    """)
     def calculate_mae(actual, forecast):
         return np.mean(np.abs(actual - forecast))
@@ -611,10 +611,11 @@ elif selected_country == 'Overall':
         angles = ['Load', 'Wind_onshore', 'Wind_offshore', 'Solar']
         for _, row in rmae_df.iterrows():
             fig.add_trace(go.Scatterpolar(r=[row[angle] for angle in angles], theta=angles, fill='toself', name=row['Country']))
-        fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 2])), showlegend=True, title="rMAE Radar Chart by Country")
         st.plotly_chart(fig)
     # Main execution to create and display radar plot
     rmae_df = create_rmae_dataframe(data_dict)
     plot_rmae_radar_chart(rmae_df)

 import folium
 import seaborn as sns
 from streamlit_folium import st_folium
+import datetime
 def get_current_time():
     st.sidebar.subheader("Section")
     st.sidebar.caption("Select the type of information you want to explore.")
     section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1)
 else:
     section = None  # No section is shown when "Overall" is selected
     st.header('Data Quality')
+    st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
+    yesterday_midnight = pd.Timestamp(datetime.datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59)
+    # Filter data until the end of yesterday (midnight)
+    data_quality = data[data.index <= yesterday_midnight]
     # Report % of missing values
     missing_values = data_quality[forecast_columns].isna().mean() * 100
     # Scatter plots for error distribution
     st.subheader('Error Distribution')
+    st.write('The below scatter plots show the error distribution of all four fields: Solar, Wind Onshore, Wind Offshore and Load.')
     selected_variable = st.selectbox("Select Variable for Error Distribution", list(variable_options.keys()))
     # Get the corresponding columns for the selected variable
         # Calculate error and plot
         error = pred - obs
+        fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Forecast ENTSO-E [MW]'})
         fig.update_layout(title=f'Error Distribution for {selected_variable}')
         st.plotly_chart(fig)
     st.subheader('Accuracy Metrics (Sorted by rMAE):')
+    date_range = st.date_input(
+        "Select Date Range for Metrics Calculation:",
+        value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))
+    )
+    if len(date_range) == 2:
+        start_date = pd.Timestamp(date_range[0])
+        end_date = pd.Timestamp(date_range[1])
+    else:
+        st.error("Please select a valid date range.")
+        st.stop()
+    output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. "
     st.write(output_text)
     data = data.loc[start_date:end_date]
         st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False}, className="small-chart")
     st.subheader('ACF plots of Errors')
+    st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind Onshore, Wind Offshore and Load.')
     # Dropdown to select the variable
     selected_variable = st.selectbox("Select Variable for ACF of Errors", list(variable_options.keys()))
 # Section 3: Insights
 elif section == 'Insights':
     st.header("Insights")
+    st.write('The scatter plots below are created to explore possible correlations between the data fields: Solar, Wind Onshore, Wind Offshore, Load, and Weather Features.')
+    # Add a selection box for the data resolution (weekly, daily, hourly)
+    data_2024 = data[data.index.year == 2024]
+    resolution = st.selectbox('Select data resolution:', ['Daily', 'Hourly'])
+        # Resample data based on the selected resolution
+    if resolution == 'Hourly':
+        resampled_data = data_2024
+    elif resolution == 'Daily':
+        resampled_data = data_2024.resample('D').mean()  # Resample to daily mean
+    # Select the necessary columns for the scatter plot
+    selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
+    selected_df = resampled_data[selected_columns]
     selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
+    # Drop missing values
     selected_df = selected_df.dropna()
+    # Create the scatter plots using seaborn's pairplot
     sns.set_theme(style="ticks")
     pairplot_fig = sns.pairplot(selected_df)
     st.pyplot(pairplot_fig)
 elif selected_country == 'Overall':
     st.subheader("Net Load Error Map")
     st.write("""
             fill_opacity=0.7,
             line_opacity=0.5,
             line_color="black",  # Neutral border color
+            legend_name="Net Load Error [MW]"
         ).add_to(m)
         # Add a GeoJson layer with custom tooltip for country, error, and date
             style_function=lambda x: {'fillOpacity': 0, 'color': 'black', 'weight': 0},
             tooltip=folium.GeoJsonTooltip(
                 fields=["name", "net_load_error", "date"],
+                aliases=["Country:", "Net Load Error [MW]:", "Date:"],
                 localize=True
             )
         ).add_to(m)
         'Germany': Data_DE,
         'Netherlands': Data_NL
     }
     plot_net_load_error_map(data_dict)
     st.subheader("rMAE of Forecasts published on ENTSO-E TP")
+    st.write("""The rMAE of Forecasts chart compares the forecast accuracy of the predictions published by ENTSO-E Transparency Platform for Belgium, Germany, France, and the Netherlands. It shows the rMAE for onshore wind, offshore wind, solar, and load demand, highlighting how well forecasts perform relative to a basic persistence model across these countries and energy sectors.""")
     def calculate_mae(actual, forecast):
         return np.mean(np.abs(actual - forecast))
         angles = ['Load', 'Wind_onshore', 'Wind_offshore', 'Solar']
         for _, row in rmae_df.iterrows():
             fig.add_trace(go.Scatterpolar(r=[row[angle] for angle in angles], theta=angles, fill='toself', name=row['Country']))
+        fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 1.2])), showlegend=True, title="rMAE Radar Chart by Country")
         st.plotly_chart(fig)
     # Main execution to create and display radar plot
     rmae_df = create_rmae_dataframe(data_dict)
     plot_rmae_radar_chart(rmae_df)