Spaces:
Running
Running
import requests | |
import pandas as pd | |
from io import StringIO | |
import streamlit as st | |
import os | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import plotly.colors as pc | |
import numpy as np | |
from sklearn.metrics import mean_squared_error | |
from statsmodels.tsa.stattools import acf | |
from statsmodels.graphics.tsaplots import plot_acf | |
import matplotlib.pyplot as plt | |
from datetime import datetime | |
def get_current_time(): | |
now = datetime.now() | |
current_hour = now.hour | |
current_minute = now.minute | |
# Return the hour and a boolean indicating if it is after the 10th minute | |
return current_hour, current_minute >= 10 | |
##GET ALL FILES FROM GITHUB | |
def load_GitHub(github_token, file_name, hour, after_10_min): | |
url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}' | |
headers = {'Authorization': f'token {github_token}'} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
csv_content = StringIO(response.text) | |
df = pd.read_csv(csv_content) | |
if 'Date' in df.columns: | |
df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime | |
df.set_index('Date', inplace=True) # Set 'Date' column as the index | |
#df.to_csv(file_name) | |
return df | |
else: | |
print(f"Failed to download {file_name}. Status code: {response.status_code}") | |
return None | |
def load_forecast(github_token, hour, after_10_min): | |
predictions_dict = {} | |
for hour in range(24): | |
file_name = f'Predictions_{hour}h.csv' | |
df = load_GitHub(github_token, file_name, hour, after_10_min) | |
if df is not None: | |
predictions_dict[file_name] = df | |
return predictions_dict | |
def convert_European_time(data, time_zone): | |
data.index = pd.to_datetime(data.index, utc=True) | |
data.index = data.index.tz_convert(time_zone) | |
data.index = data.index.tz_localize(None) | |
return data | |
def simplify_model_names(df): | |
# Define the mapping of complex names to simpler ones | |
replacements = { | |
r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia', | |
r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM', | |
r'\.Naive\.\dD': '.Naive', | |
} | |
# Apply the replacements | |
for original, simplified in replacements.items(): | |
df.columns = df.columns.str.replace(original, simplified, regex=True) | |
return df | |
def simplify_model_names_in_index(df): | |
# Define the mapping of complex names to simpler ones | |
replacements = { | |
r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia', | |
r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM', | |
r'\.Naive\.\dD': '.Naive', | |
} | |
# Apply the replacements to the DataFrame index | |
for original, simplified in replacements.items(): | |
df.index = df.index.str.replace(original, simplified, regex=True) | |
return df | |
github_token = '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' | |
if github_token: | |
hour, after_10_min=get_current_time() | |
forecast_dict = load_forecast(github_token, hour, after_10_min) | |
historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv', hour, after_10_min) | |
Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv', hour, after_10_min) | |
Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv', hour, after_10_min) | |
Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv', hour, after_10_min) | |
Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv', hour, after_10_min) | |
Data_BE=convert_European_time(Data_BE, 'Europe/Brussels') | |
Data_FR=convert_European_time(Data_FR, 'Europe/Paris') | |
Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam') | |
Data_DE=convert_European_time(Data_DE, 'Europe/Berlin') | |
else: | |
print("Please enter your GitHub Personal Access Token to proceed.") | |
# Main layout of the app | |
col1, col2 = st.columns([5, 2]) # Adjust the ratio to better fit your layout needs | |
with col1: | |
st.title("Transparency++") | |
with col2: | |
upper_space = col2.empty() | |
upper_space = col2.empty() | |
col2_1, col2_2 = st.columns(2) # Create two columns within the right column for side-by-side images | |
with col2_1: | |
st.image("KU_Leuven_logo.png", width=100) # Adjust the path and width as needed | |
with col2_2: | |
st.image("energyville_logo.png", width=100) | |
upper_space.markdown(""" | |
| |
| |
""", unsafe_allow_html=True) | |
countries = { | |
'Netherlands': 'NL', | |
'Germany': 'DE', | |
'France': 'FR', | |
'Belgium': 'BE', | |
} | |
st.sidebar.header('Filters') | |
st.sidebar.subheader("Select Country") | |
st.sidebar.caption("Choose the country for which you want to display data or forecasts.") | |
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys())) | |
st.sidebar.subheader("Select Date Range ") | |
st.sidebar.caption("Define the time period over which the accuracy metrics will be calculated.") | |
st.write() | |
date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:", | |
value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))) | |
# Ensure the date range provides two dates | |
if len(date_range) == 2: | |
start_date = pd.Timestamp(date_range[0]) | |
end_date = pd.Timestamp(date_range[1]) | |
else: | |
st.error("Please select a valid date range.") | |
st.stop() | |
st.sidebar.subheader("Section") | |
st.sidebar.caption("Select the type of information you want to explore.") | |
# Sidebar with radio buttons for different sections | |
section = st.sidebar.radio('', ['Data', 'Forecasts', 'Insights'],index=1) | |
country_code = countries[selected_country] | |
if country_code == 'BE': | |
data = Data_BE | |
weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore'] | |
data['Temperature'] = data['temperature_2m_8'] | |
data['Wind Speed Offshore'] = data['wind_speed_100m_4'] | |
data['Wind Speed Onshore'] = data['wind_speed_100m_8'] | |
elif country_code == 'DE': | |
data = Data_DE | |
weather_columns = ['Temperature', 'Wind Speed'] | |
data['Temperature'] = data['temperature_2m'] | |
data['Wind Speed'] = data['wind_speed_100m'] | |
elif country_code == 'NL': | |
data = Data_NL | |
weather_columns = ['Temperature', 'Wind Speed'] | |
data['Temperature'] = data['temperature_2m'] | |
data['Wind Speed'] = data['wind_speed_100m'] | |
elif country_code == 'FR': | |
data = Data_FR | |
weather_columns = ['Temperature', 'Wind Speed'] | |
data['Temperature'] = data['temperature_2m'] | |
data['Wind Speed'] = data['wind_speed_100m'] | |
def add_feature(df2, df_main): | |
#df_main.index = pd.to_datetime(df_main.index) | |
#df2.index = pd.to_datetime(df2.index) | |
df_combined = df_main.combine_first(df2) | |
last_date_df1 = df_main.index.max() | |
first_date_df2 = df2.index.min() | |
if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1): | |
df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0) | |
#df_combined.reset_index(inplace=True) | |
return df_combined | |
#data.index = data.index.tz_localize('UTC') | |
forecast_columns = [ | |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe'] | |
if section == 'Data': | |
st.header("Data") | |
st.write(""" | |
This section allows you to explore and upload your datasets. | |
You can visualize raw data, clean it, and prepare it for analysis. | |
""") | |
st.header('Data Quality') | |
st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.') | |
data_quality=data.iloc[:-28] | |
# Report % of missing values | |
missing_values = data_quality[forecast_columns].isna().mean() * 100 | |
missing_values = missing_values.round(2) | |
installed_capacities = { | |
'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134}, | |
'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915}, | |
'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053}, | |
'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190}, | |
} | |
if country_code not in installed_capacities: | |
st.error(f"Installed capacities not defined for country code '{country_code}'.") | |
st.stop() | |
# Report % of extreme, impossible values for the selected country | |
capacities = installed_capacities[country_code] | |
extreme_values = {} | |
for col in forecast_columns: | |
if 'Solar_entsoe' in col: | |
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100 | |
elif 'Solar_forecast_entsoe' in col: | |
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100 | |
elif 'Wind_onshore_entsoe' in col: | |
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100 | |
elif 'Wind_onshore_forecast_entsoe' in col: | |
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100 | |
elif 'Wind_offshore_entsoe' in col: | |
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100 | |
elif 'Wind_offshore_forecast_entsoe' in col: | |
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100 | |
elif 'Load_entsoe' in col: | |
extreme_values[col] = ((data_quality[col] < 0)).mean() * 100 | |
elif 'Load_forecast_entsoe' in col: | |
extreme_values[col] = ((data_quality[col] < 0)).mean() * 100 | |
extreme_values = pd.Series(extreme_values).round(2) | |
# Combine all metrics into one DataFrame | |
metrics_df = pd.DataFrame({ | |
'Missing Values (%)': missing_values, | |
'Extreme/Nonsensical Values (%)': extreme_values, | |
}) | |
st.markdown( | |
""" | |
<style> | |
.dataframe {font-size: 45px !important;} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
st.dataframe(metrics_df) | |
st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True) | |
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True) | |
# Section 2: Forecasts | |
elif section == 'Forecasts': | |
st.header('Forecast Quality') | |
# Time series for last 1 week | |
st.subheader('Time Series: Last 1 Week') | |
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))] | |
st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.') | |
forecast_columns = [ | |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe'] | |
num_per_var=2 | |
forecast_columns_line=forecast_columns | |
for i in range(0, len(forecast_columns_line), num_per_var): | |
actual_col = forecast_columns_line[i] | |
forecast_col = forecast_columns_line[i + 1] | |
if forecast_col in data.columns: | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual')) | |
fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E')) | |
fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]') | |
st.plotly_chart(fig) | |
def plot_category(df_dict, category_prefix, title): | |
fig = go.Figure() | |
# Define base colors for each model | |
model_colors = { | |
'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue | |
'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green | |
'Naive': '#ff7f0e' # Orange | |
} | |
# To keep track of which model has been added to the legend | |
legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False} | |
for file_name, df in df_dict.items(): | |
# Extract the hour from the filename, assuming the format is "Predictions_Xh.csv" | |
hour = int(file_name.split('_')[1].replace('h.csv', '')) | |
filtered_columns = [col for col in df.columns if col.startswith(category_prefix)] | |
for column in filtered_columns: | |
# Identify the model type with more precise logic | |
if 'LightGBMModel' in column: | |
if 'Forecast_elia' in column: | |
model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia' | |
elif 'TimeCov' in column: | |
model_key = 'LightGBMModel.TimeCov.Temp' | |
elif 'Naive' in column: | |
model_key = 'Naive' | |
else: | |
continue # Skip if it doesn't match any model type | |
# Extract the relevant part of the model name | |
parts = column.split('.') | |
model_name_parts = parts[1:] # Skip the variable prefix | |
model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name | |
# Get the base color for the model | |
base_color = model_colors[model_key] | |
# Calculate the color shade based on the hour | |
color_scale = pc.hex_to_rgb(base_color) | |
scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible | |
adjusted_color = tuple(int(c * scale_factor) for c in color_scale) | |
# Convert to RGBA with transparency for plot lines | |
line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines | |
# Combine the hour and the model name for the legend, but only add the legend entry once | |
show_legend = not legend_added[model_key] | |
fig.add_trace(go.Scatter( | |
x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis | |
y=df[column], | |
mode='lines', | |
name=model_name if show_legend else None, # Use the model name for the legend, but only once | |
line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines | |
showlegend=show_legend, # Show legend only once per model | |
legendgroup=model_key # Grouping for consistent legend color | |
)) | |
# Mark that this model has been added to the legend | |
if show_legend: | |
legend_added[model_key] = True | |
# Add real values as a separate trace, if provided | |
filtered_Data_BE_df = Data_BE.loc[df.index] | |
if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any(): | |
fig.add_trace(go.Scatter( | |
x=filtered_Data_BE_df.index, | |
y=filtered_Data_BE_df[f'{category_prefix}_entsoe'], | |
mode='lines', | |
name=f'Actual {category_prefix}', | |
line=dict(color='black', width=2), # Black line for real values | |
showlegend=True # Always show this in the legend | |
)) | |
# Update layout to position the legend at the top, side by side | |
fig.update_layout( | |
title=dict( | |
text=title, | |
x=0, # Center the title horizontally | |
y=1.00, # Slightly lower the title to create more space | |
xanchor='left', | |
yanchor='top' | |
), | |
xaxis_title='Date', | |
yaxis_title='Value', | |
legend=dict( | |
orientation="h", # Horizontal legend | |
yanchor="bottom", # Align to the bottom of the legend box | |
y=1, # Increase y position to avoid overlap with the title | |
xanchor="center", # Center the legend horizontally | |
x=0.5 # Position at the center of the plot | |
) | |
) | |
return fig | |
def calculate_mae(y_true, y_pred): | |
return np.mean(np.abs(y_true - y_pred)) | |
def plot_mae_comparison(df_dict, category_prefix, title, real_values_df): | |
hours = list(range(24)) | |
if category_prefix=='Load': | |
model_colors = { | |
'LightGBMModel.7D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue | |
'LightGBMModel.7D.TimeCov.Temp': '#2CA02C', # Green | |
'Naive': '#FF7F0E' # Orange | |
} | |
else: | |
model_colors = { | |
'LightGBMModel.1D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue | |
'LightGBMModel.1D.TimeCov.Temp': '#2CA02C', # Green | |
'Naive': '#FF7F0E' # Orange | |
} | |
fig = go.Figure() | |
for model_key, base_color in model_colors.items(): | |
hours_with_data = [] | |
mae_ratios = [] | |
for hour in hours: | |
file_name = f'Predictions_{hour}h.csv' | |
df = df_dict.get(file_name, None) | |
if df is None: | |
continue | |
if isinstance(df.index, pd.DatetimeIndex): | |
first_day = df.index.min().normalize() | |
last_day = df.index.max().normalize() | |
df = df[df.index.normalize() != first_day] | |
df = df[df.index.normalize() != last_day] | |
# Adjusted filtering logic based on actual column names | |
filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col] | |
if not filtered_columns: | |
continue | |
# Assuming only one column matches, otherwise refine the selection logic | |
model_predictions = df[filtered_columns[0]] | |
actual_values = real_values_df[f'{category_prefix}_entsoe'] | |
actual_values = actual_values.dropna() | |
# Align both series by their common indices | |
common_indices = model_predictions.index.intersection(actual_values.index) | |
aligned_model_predictions = model_predictions.loc[common_indices] | |
aligned_actual_values = actual_values.loc[common_indices] | |
# Calculate MAE for the model | |
model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions) | |
# Calculate MAE for the entsoe forecast | |
entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices] | |
entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast) | |
# Calculate MAE ratio | |
mae_ratio = model_mae / entsoe_mae | |
mae_ratios.append(mae_ratio) | |
hours_with_data.append(hour) | |
# Plot the MAE ratio for this model as points | |
if mae_ratios: # Only plot if there's data | |
fig.add_trace(go.Scatter( | |
x=hours_with_data, # The hours where we have data | |
y=mae_ratios, | |
mode='markers+lines', # Plot as points connected by lines | |
name=model_key, | |
line=dict(color=base_color), | |
marker=dict(color=base_color, size=8) # Customize marker size | |
)) | |
# Update layout | |
fig.update_layout( | |
title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by hour of Forecasting.', | |
xaxis_title='Hour of Forecast', | |
yaxis_title='MAE Ratio (Model / entsoe)', | |
legend=dict( | |
orientation="h", | |
yanchor="bottom", | |
y=1.02, | |
xanchor="center", | |
x=0.5 | |
) | |
) | |
return fig | |
def plot_mae_comparison_clock(df_dict, category_prefix, title, real_values_df): | |
hours = list(range(24)) | |
if category_prefix=='Load': | |
model_colors = { | |
'LightGBM_with_Forecast_elia': '#1F77B4', # Blue | |
'LightGBM': '#2CA02C', # Green | |
'Naive': '#FF7F0E' # Orange | |
} | |
else: | |
model_colors = { | |
'LightGBM_with_Forecast_elia': '#1F77B4', # Blue | |
'LightGBM': '#2CA02C', # Green | |
'Naive': '#FF7F0E' # Orange | |
} | |
fig = go.Figure() | |
for model_key, base_color in model_colors.items(): | |
hours_with_data = [] | |
mae_ratios = [] | |
for hour in hours: | |
file_name = f'Predictions_{hour}h.csv' | |
df = df_dict.get(file_name, None) | |
if df is None: | |
continue | |
if isinstance(df.index, pd.DatetimeIndex): | |
first_day = df.index.min().normalize() | |
last_day = df.index.max().normalize() | |
df = df[df.index.normalize() != first_day] | |
df = df[df.index.normalize() != last_day] | |
filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col] | |
if not filtered_columns: | |
print(f"No matching columns for {model_key} at hour {hour}. Skipping...") | |
continue | |
model_predictions = df[filtered_columns[0]] | |
actual_values = real_values_df[f'{category_prefix}_entsoe'] | |
actual_values = actual_values.dropna() | |
common_indices = model_predictions.index.intersection(actual_values.index) | |
aligned_model_predictions = model_predictions.loc[common_indices] | |
aligned_actual_values = actual_values.loc[common_indices] | |
model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions) | |
entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices] | |
entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast) | |
mae_ratio = model_mae / entsoe_mae | |
mae_ratios.append(mae_ratio) | |
hours_with_data.append(hour) | |
if mae_ratios: | |
fig.add_trace(go.Scatterpolar( | |
r=mae_ratios + [mae_ratios[0]], # Ensure closure of the polar plot | |
theta=[h * 15 for h in hours_with_data] + [0], # Ensure closure at 0 degrees | |
mode='lines+markers', | |
name=model_key, | |
line=dict(color=base_color), | |
marker=dict(color=base_color, size=8) | |
)) | |
else: | |
print(f"No data to plot for {model_key}.") # Debugging print | |
fig.update_layout( | |
polar=dict( | |
radialaxis=dict(visible=True, range=[0, max(max(mae_ratios), 1.0) * 1.1] if mae_ratios else [0, 1.0]), | |
angularaxis=dict(tickmode='array', tickvals=[h * 15 for h in hours], ticktext=hours) | |
), | |
title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by Hour of Forecasting', | |
showlegend=True | |
) | |
return fig | |
# Scatter plots for error distribution | |
st.subheader('Error Distribution') | |
st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range') | |
data_2024 = data[data.index.year > 2023] | |
for i in range(0, len(forecast_columns), 2): | |
actual_col = forecast_columns[i] | |
forecast_col = forecast_columns[i + 1] | |
if forecast_col in data_2024.columns: | |
obs = data_2024[actual_col] | |
pred = data_2024[forecast_col] | |
error = pred - obs | |
fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'}) | |
fig.update_layout(title=f'Error Distribution for {forecast_col}') | |
st.plotly_chart(fig) | |
st.subheader('Accuracy Metrics (Sorted by rMAE):') | |
output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar." | |
st.write(output_text) | |
data = data.loc[start_date:end_date] | |
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore']) | |
for i in range(0, len(forecast_columns), 2): | |
actual_col = forecast_columns[i] | |
forecast_col = forecast_columns[i + 1] | |
if forecast_col in data.columns: | |
obs = data[actual_col] | |
pred = data[forecast_col] | |
error = pred - obs | |
mae = round(np.mean(np.abs(error)),2) | |
if 'Load' in actual_col: | |
persistence = obs.shift(168) # Weekly persistence | |
else: | |
persistence = obs.shift(24) # Daily persistence | |
# Using the whole year's data for rMAE calculations | |
rmae = round(mae / np.mean(np.abs(obs - persistence)),2) | |
row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore' | |
accuracy_metrics.loc[row_label] = [mae, rmae] | |
accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column) | |
accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True) | |
accuracy_metrics = accuracy_metrics.round(4) | |
col1, col2 = st.columns([3, 2]) | |
with col1: | |
st.dataframe(accuracy_metrics) | |
with col2: | |
st.markdown(""" | |
<style> | |
.big-font { | |
font-size: 20px; | |
font-weight: 500; | |
} | |
</style> | |
<div class="big-font"> | |
Equations | |
</div> | |
""", unsafe_allow_html=True) | |
st.markdown(r""" | |
$\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$ | |
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$ | |
""") | |
st.subheader('ACF plots of Errors') | |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.') | |
for i in range(0, len(forecast_columns), 2): | |
actual_col = forecast_columns[i] | |
forecast_col = forecast_columns[i + 1] | |
if forecast_col in data.columns: | |
obs = data[actual_col] | |
pred = data[forecast_col] | |
error = pred - obs | |
st.write(f"**ACF of Errors for {actual_col}**") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
plot_acf(error.dropna(), ax=ax) | |
st.pyplot(fig) | |
acf_values = acf(error.dropna(), nlags=240) | |
# Section 3: Insights | |
elif section == 'Insights': | |
st.header("Insights") | |
st.write(""" | |
This section provides insights derived from the data and forecasts. | |
You can visualize trends, anomalies, and other important findings. | |
""") | |
# Scatter plots for correlation between wind, solar, and load | |
st.subheader('Correlation between Wind, Solar, and Load') | |
st.write('The below scatter plots are made for checking whether there exists a correlation between all three data fields obtained from ENTSO-E: Solar, Wind and Load.') | |
combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')] | |
for x_col, y_col in combinations: | |
if x_col in data.columns and y_col in data.columns: | |
# For solar combinations, filter out zero values | |
if 'Solar_entsoe' in x_col: | |
filtered_data = data[data['Solar_entsoe'] > 0] | |
x_values = filtered_data[x_col] | |
y_values = filtered_data[y_col] | |
else: | |
x_values = data[x_col] | |
y_values = data[y_col] | |
corr_coef = x_values.corr(y_values) | |
fig = px.scatter( | |
x=x_values, | |
y=y_values, | |
labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'}, | |
title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey']) | |
st.plotly_chart(fig) | |
st.subheader('Weather vs. Generation/Demand') | |
st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and the generation/demand data from ENTSO-E.') | |
for weather_col in weather_columns: | |
for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']: | |
if weather_col in data.columns and actual_col in data.columns: | |
clean_label = actual_col.replace('_entsoe', '') | |
if weather_col == 'Temperature': | |
fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange']) | |
else: | |
fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label}) | |
fig.update_layout(title=f'{weather_col} vs {actual_col}') | |
st.plotly_chart(fig) | |