Spaces:
Running
Running
import requests | |
import pandas as pd | |
from io import StringIO | |
import streamlit as st | |
import os | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import plotly.colors as pc | |
import numpy as np | |
from sklearn.metrics import mean_squared_error | |
from statsmodels.tsa.stattools import acf | |
from statsmodels.graphics.tsaplots import plot_acf | |
import matplotlib.pyplot as plt | |
##GET ALL FILES FROM GITHUB | |
def load_GitHub(github_token, file_name): | |
url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}' | |
headers = {'Authorization': f'token {github_token}'} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
csv_content = StringIO(response.text) | |
df = pd.read_csv(csv_content) | |
if 'Date' in df.columns: | |
df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime | |
df.set_index('Date', inplace=True) # Set 'Date' column as the index | |
#df.to_csv(file_name) | |
return df | |
else: | |
print(f"Failed to download {file_name}. Status code: {response.status_code}") | |
return None | |
def load_forecast(github_token): | |
predictions_dict = {} | |
for hour in range(24): | |
file_name = f'Predictions_{hour}h.csv' | |
df = load_GitHub(github_token, file_name) | |
if df is not None: | |
predictions_dict[file_name] = df | |
return predictions_dict | |
def convert_European_time(data, time_zone): | |
data.index = pd.to_datetime(data.index, utc=True) | |
data.index = data.index.tz_convert(time_zone) | |
data.index = data.index.tz_localize(None) | |
return data | |
github_token = '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' | |
if github_token: | |
forecast_dict = load_forecast(github_token) | |
historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv') | |
Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv') | |
Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv') | |
Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv') | |
Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv') | |
Data_BE=convert_European_time(Data_BE, 'Europe/Brussels') | |
Data_FR=convert_European_time(Data_FR, 'Europe/Paris') | |
Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam') | |
Data_DE=convert_European_time(Data_DE, 'Europe/Berlin') | |
else: | |
print("Please enter your GitHub Personal Access Token to proceed.") | |
def conformal_predictions(data, target, my_forecast): | |
data['Residuals'] = data[my_forecast] - data[actual_col] | |
data['Hour'] = data.index.hour | |
min_date = data.index.min() | |
for date in data.index.normalize().unique(): | |
if date >= min_date + pd.DateOffset(days=30): | |
start_date = date - pd.DateOffset(days=30) | |
end_date = date | |
calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)] | |
quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.8) | |
# Use .loc to safely access and modify data | |
if date in data.index: | |
current_day_data = data.loc[date.strftime('%Y-%m-%d')] | |
for hour in current_day_data['Hour'].unique(): | |
if hour in quantiles.index: | |
hour_quantile = quantiles[hour] | |
idx = (data.index.normalize() == date) & (data.Hour == hour) | |
data.loc[idx, 'Quantile_80'] = hour_quantile | |
data.loc[idx, 'Lower_Interval'] = data.loc[idx, my_forecast] - hour_quantile | |
data.loc[idx, 'Upper_Interval'] = data.loc[idx, my_forecast] + hour_quantile | |
#data.reset_index(inplace=True) | |
return data | |
st.title("Transparency++") | |
countries = { | |
'Belgium': 'BE', | |
'Netherlands': 'NL', | |
'Germany': 'DE', | |
'France': 'FR', | |
} | |
st.sidebar.header('Filters') | |
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys())) | |
st.write() | |
date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:", | |
value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))) | |
# Ensure the date range provides two dates | |
if len(date_range) == 2: | |
start_date = pd.Timestamp(date_range[0]) | |
end_date = pd.Timestamp(date_range[1]) | |
else: | |
st.error("Please select a valid date range.") | |
st.stop() | |
# Sidebar with radio buttons for different sections | |
section = st.sidebar.radio('Section', ['Data', 'Forecasts', 'Insights']) | |
country_code = countries[selected_country] | |
if country_code == 'BE': | |
data = Data_BE | |
weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore'] | |
data['Temperature'] = data['temperature_2m_8'] | |
data['Wind Speed Offshore'] = data['wind_speed_100m_4'] | |
data['Wind Speed Onshore'] = data['wind_speed_100m_8'] | |
elif country_code == 'DE': | |
data = Data_DE | |
weather_columns = ['Temperature', 'Wind Speed'] | |
data['Temperature'] = data['temperature_2m'] | |
data['Wind Speed'] = data['wind_speed_100m'] | |
elif country_code == 'NL': | |
data = Data_NL | |
weather_columns = ['Temperature', 'Wind Speed'] | |
data['Temperature'] = data['temperature_2m'] | |
data['Wind Speed'] = data['wind_speed_100m'] | |
elif country_code == 'FR': | |
data = Data_FR | |
weather_columns = ['Temperature', 'Wind Speed'] | |
data['Temperature'] = data['temperature_2m'] | |
data['Wind Speed'] = data['wind_speed_100m'] | |
def add_feature(df2, df_main): | |
#df_main.index = pd.to_datetime(df_main.index) | |
#df2.index = pd.to_datetime(df2.index) | |
df_combined = df_main.combine_first(df2) | |
last_date_df1 = df_main.index.max() | |
first_date_df2 = df2.index.min() | |
if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1): | |
df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0) | |
#df_combined.reset_index(inplace=True) | |
return df_combined | |
#data.index = data.index.tz_localize('UTC') | |
data = data.loc[start_date:end_date] | |
forecast_columns = [ | |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe'] | |
if section == 'Data': | |
st.header("Data") | |
st.write(""" | |
This section allows you to explore and upload your datasets. | |
You can visualize raw data, clean it, and prepare it for analysis. | |
""") | |
st.header('Data Quality') | |
output_text = f"The below percentages are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar." | |
st.write(output_text) | |
# Report % of missing values | |
missing_values = data[forecast_columns].isna().mean() * 100 | |
missing_values = missing_values.round(2) | |
installed_capacities = { | |
'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134}, | |
'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915}, | |
'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053}, | |
'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190}, | |
} | |
if country_code not in installed_capacities: | |
st.error(f"Installed capacities not defined for country code '{country_code}'.") | |
st.stop() | |
# Report % of extreme, impossible values for the selected country | |
capacities = installed_capacities[country_code] | |
extreme_values = {} | |
for col in forecast_columns: | |
if 'Solar_entsoe' in col: | |
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100 | |
elif 'Solar_forecast_entsoe' in col: | |
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100 | |
elif 'Wind_onshore_entsoe' in col: | |
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100 | |
elif 'Wind_onshore_forecast_entsoe' in col: | |
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100 | |
elif 'Wind_offshore_entsoe' in col: | |
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100 | |
elif 'Wind_offshore_forecast_entsoe' in col: | |
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100 | |
elif 'Load_entsoe' in col: | |
extreme_values[col] = ((data[col] < 0)).mean() * 100 | |
elif 'Load_forecast_entsoe' in col: | |
extreme_values[col] = ((data[col] < 0)).mean() * 100 | |
extreme_values = pd.Series(extreme_values).round(2) | |
# Combine all metrics into one DataFrame | |
metrics_df = pd.DataFrame({ | |
'Missing Values (%)': missing_values, | |
'Extreme/Nonsensical Values (%)': extreme_values, | |
}) | |
st.markdown( | |
""" | |
<style> | |
.dataframe {font-size: 45px !important;} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
st.dataframe(metrics_df) | |
st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True) | |
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True) | |
# Section 2: Forecasts | |
elif section == 'Forecasts': | |
st.header('Forecast Quality') | |
# Time series for last 1 week | |
st.subheader('Time Series: Last 1 Week') | |
last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))] | |
st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.') | |
forecast_columns_operational = [ | |
'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia'] | |
forecast_columns = [ | |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe'] | |
operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1) | |
operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)') | |
operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns] | |
operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns] | |
Historical_and_Load=add_feature(operation_forecast_load, historical_forecast) | |
Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load) | |
#print(Historical_and_operational.filter(like='Forecast_elia', axis=1)) | |
best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1) | |
df_combined = Historical_and_operational.join(Data_BE, how='inner') | |
last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))] | |
for i in range(0, len(forecast_columns_operational), 3): | |
actual_col = forecast_columns_operational[i] | |
forecast_col = forecast_columns_operational[i + 1] | |
my_forecast = forecast_columns_operational[i + 2] | |
if forecast_col in data.columns: | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual')) | |
fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E')) | |
if country_code=='BE': | |
conformal=conformal_predictions(df_combined, actual_col, my_forecast) | |
last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))] | |
if actual_col =='Load_entsoe': | |
last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))] | |
fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS')) | |
fig.add_trace(go.Scatter( | |
x=last_week_conformal.index, | |
y=last_week_conformal['Lower_Interval'], | |
mode='lines', | |
line=dict(width=0), | |
showlegend=False | |
)) | |
# Add the upper interval trace and fill to the lower interval | |
fig.add_trace(go.Scatter( | |
x=last_week_conformal.index, | |
y=last_week_conformal['Upper_Interval'], | |
mode='lines', | |
line=dict(width=0), | |
fill='tonexty', # Fill between this trace and the previous one | |
fillcolor='rgba(68, 68, 68, 0.3)', | |
name='P10/P90 prediction intervals' | |
)) | |
fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]') | |
st.plotly_chart(fig) | |
def plot_category(df_dict, category_prefix, title): | |
fig = go.Figure() | |
# Define base colors for each model | |
model_colors = { | |
'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue | |
'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green | |
'Naive': '#ff7f0e' # Orange | |
} | |
# To keep track of which model has been added to the legend | |
legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False} | |
for file_name, df in df_dict.items(): | |
# Extract the hour from the filename, assuming the format is "Predictions_Xh.csv" | |
hour = int(file_name.split('_')[1].replace('h.csv', '')) | |
filtered_columns = [col for col in df.columns if col.startswith(category_prefix)] | |
for column in filtered_columns: | |
# Identify the model type with more precise logic | |
if 'LightGBMModel' in column: | |
if 'Forecast_elia' in column: | |
model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia' | |
elif 'TimeCov' in column: | |
model_key = 'LightGBMModel.TimeCov.Temp' | |
elif 'Naive' in column: | |
model_key = 'Naive' | |
else: | |
continue # Skip if it doesn't match any model type | |
# Extract the relevant part of the model name | |
parts = column.split('.') | |
model_name_parts = parts[1:] # Skip the variable prefix | |
model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name | |
# Get the base color for the model | |
base_color = model_colors[model_key] | |
# Calculate the color shade based on the hour | |
color_scale = pc.hex_to_rgb(base_color) | |
scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible | |
adjusted_color = tuple(int(c * scale_factor) for c in color_scale) | |
# Convert to RGBA with transparency for plot lines | |
line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines | |
# Combine the hour and the model name for the legend, but only add the legend entry once | |
show_legend = not legend_added[model_key] | |
fig.add_trace(go.Scatter( | |
x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis | |
y=df[column], | |
mode='lines', | |
name=model_name if show_legend else None, # Use the model name for the legend, but only once | |
line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines | |
showlegend=show_legend, # Show legend only once per model | |
legendgroup=model_key # Grouping for consistent legend color | |
)) | |
# Mark that this model has been added to the legend | |
if show_legend: | |
legend_added[model_key] = True | |
# Add real values as a separate trace, if provided | |
filtered_Data_BE_df = Data_BE.loc[df.index] | |
if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any(): | |
fig.add_trace(go.Scatter( | |
x=filtered_Data_BE_df.index, | |
y=filtered_Data_BE_df[f'{category_prefix}_entsoe'], | |
mode='lines', | |
name=f'Actual {category_prefix}', | |
line=dict(color='black', width=2), # Black line for real values | |
showlegend=True # Always show this in the legend | |
)) | |
# Update layout to position the legend at the top, side by side | |
fig.update_layout( | |
title=dict( | |
text=title, | |
x=0, # Center the title horizontally | |
y=1.00, # Slightly lower the title to create more space | |
xanchor='left', | |
yanchor='top' | |
), | |
xaxis_title='Date', | |
yaxis_title='Value', | |
legend=dict( | |
orientation="h", # Horizontal legend | |
yanchor="bottom", # Align to the bottom of the legend box | |
y=1, # Increase y position to avoid overlap with the title | |
xanchor="center", # Center the legend horizontally | |
x=0.5 # Position at the center of the plot | |
) | |
) | |
return fig | |
if country_code == "BE": | |
st.header('EDS Forecasts by Hour') | |
solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions') | |
st.plotly_chart(solar_fig) | |
wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions') | |
st.plotly_chart(wind_offshore_fig) | |
wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions') | |
st.plotly_chart(wind_onshore_fig) | |
load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions') | |
st.plotly_chart(load_fig) | |
# Scatter plots for error distribution | |
st.subheader('Error Distribution') | |
st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range') | |
for i in range(0, len(forecast_columns), 2): | |
actual_col = forecast_columns[i] | |
forecast_col = forecast_columns[i + 1] | |
if forecast_col in data.columns: | |
obs = last_week[actual_col] | |
pred = last_week[forecast_col] | |
error = pred - obs | |
fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'}) | |
fig.update_layout(title=f'Error Distribution for {forecast_col}') | |
st.plotly_chart(fig) | |
st.subheader('Accuracy Metrics (Sorted by rMAE):') | |
if country_code == "BE": | |
# Combine the two DataFrames on their index | |
df_combined = Historical_and_operational.join(Data_BE, how='inner') | |
# List of model columns from historical_forecast | |
model_columns = historical_forecast.columns | |
# Initialize dictionaries to store MAE and RMSE results for each variable | |
results_wind_onshore = {} | |
results_wind_offshore = {} | |
results_load = {} | |
results_solar = {} | |
# Mapping of variables to their corresponding naive models | |
naive_models = { | |
'Wind_onshore': 'Wind_onshore_DailyNaiveSeasonal', | |
'Wind_offshore': 'Wind_offshore_DailyNaiveSeasonal', | |
'Load': 'Load_WeeklyNaiveSeasonal', | |
'Solar': 'Solar_DailyNaiveSeasonal' | |
} | |
# Step 1: Calculate MAE, RMSE, and rMAE for each model | |
for col in model_columns: | |
# Extract the variable name by taking everything before the first underscore | |
base_variable = col.split('_')[0] | |
# Handle cases where variable names might be combined with multiple parts (e.g., "Load_LightGBMModel...") | |
if base_variable in ['Wind', 'Load', 'Solar']: | |
if 'onshore' in col: | |
variable_name = 'Wind_onshore' | |
results_dict = results_wind_onshore | |
elif 'offshore' in col: | |
variable_name = 'Wind_offshore' | |
results_dict = results_wind_offshore | |
else: | |
variable_name = base_variable | |
results_dict = results_load if base_variable == 'Load' else results_solar | |
else: | |
variable_name = base_variable | |
# Construct the corresponding `variable_entsoe` column name | |
entsoe_column = f'{variable_name}_entsoe' | |
naive_model_col = naive_models.get(variable_name, None) | |
# Drop NaNs for the specific pair of columns before calculating MAE and RMSE | |
if entsoe_column in df_combined.columns and naive_model_col in df_combined.columns: | |
valid_data = df_combined[[col, entsoe_column]].dropna() | |
valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna() | |
# Calculate MAE and RMSE for the model against the `variable_entsoe` | |
mae = np.mean(abs(valid_data[col] - valid_data[entsoe_column])) | |
rmse = np.sqrt(mean_squared_error(valid_data[col], valid_data[entsoe_column])) | |
# Calculate MAE for the Naive model | |
mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col])) | |
# Calculate rMAE for the model | |
rMAE = mae / mae_naive if mae_naive != 0 else np.inf | |
# Store the results in the corresponding dictionary | |
results_dict[f'{col}'] = {'MAE': mae, 'RMSE': rmse, 'rMAE': rMAE} | |
# Step 2: Calculate MAE, RMSE, and rMAE for ENTSO-E forecasts specifically | |
for variable_name in naive_models.keys(): | |
entsoe_column = f'{variable_name}_entsoe' | |
forecast_entsoe_column = f'{variable_name}_forecast_entsoe' | |
naive_model_col = naive_models[variable_name] | |
# Ensure that the ENTSO-E forecast is included in the results | |
if forecast_entsoe_column in df_combined.columns: | |
valid_data = df_combined[[forecast_entsoe_column, entsoe_column]].dropna() | |
valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna() | |
# Calculate MAE and RMSE for the ENTSO-E forecast against the actuals | |
mae_entsoe = np.mean(abs(valid_data[forecast_entsoe_column] - valid_data[entsoe_column])) | |
rmse_entsoe = np.sqrt(mean_squared_error(valid_data[forecast_entsoe_column], valid_data[entsoe_column])) | |
# Calculate rMAE for the ENTSO-E forecast | |
mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col])) | |
rMAE_entsoe = mae_entsoe / mae_naive if mae_naive != 0 else np.inf | |
# Add the ENTSO-E results to the corresponding dictionary | |
if variable_name == 'Wind_onshore': | |
results_wind_onshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe} | |
elif variable_name == 'Wind_offshore': | |
results_wind_offshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe} | |
elif variable_name == 'Load': | |
results_load[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe} | |
elif variable_name == 'Solar': | |
results_solar[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe} | |
# Convert the dictionaries to DataFrames and sort by rMAE | |
df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE') | |
df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE') | |
df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE') | |
df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE') | |
st.write("##### Wind Onshore:") | |
st.dataframe(df_wind_onshore) | |
st.write("##### Wind Offshore:") | |
st.dataframe(df_wind_offshore) | |
st.write("##### Load:") | |
st.dataframe(df_load) | |
st.write("##### Solar:") | |
st.dataframe(df_solar) | |
else: | |
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore']) | |
for i in range(0, len(forecast_columns), 2): | |
actual_col = forecast_columns[i] | |
forecast_col = forecast_columns[i + 1] | |
if forecast_col in data.columns: | |
obs = data[actual_col] | |
pred = data[forecast_col] | |
error = pred - obs | |
mae = round(np.mean(np.abs(error)),2) | |
if 'Load' in actual_col: | |
persistence = obs.shift(168) # Weekly persistence | |
else: | |
persistence = obs.shift(24) # Daily persistence | |
# Using the whole year's data for rMAE calculations | |
rmae = round(mae / np.mean(np.abs(obs - persistence)),2) | |
row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore' | |
accuracy_metrics.loc[row_label] = [mae, rmae] | |
accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column) | |
accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True) | |
accuracy_metrics = accuracy_metrics.round(4) | |
col1, col2 = st.columns([3, 2]) | |
with col1: | |
st.dataframe(accuracy_metrics) | |
with col2: | |
st.markdown(""" | |
<style> | |
.big-font { | |
font-size: 20px; | |
font-weight: 500; | |
} | |
</style> | |
<div class="big-font"> | |
Equations | |
</div> | |
""", unsafe_allow_html=True) | |
st.markdown(r""" | |
$\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$ | |
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$ | |
""") | |
st.subheader('ACF plots of Errors') | |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three fields: Solar, Wind and Load.') | |
for i in range(0, len(forecast_columns), 2): | |
actual_col = forecast_columns[i] | |
forecast_col = forecast_columns[i + 1] | |
if forecast_col in data.columns: | |
obs = data[actual_col] | |
pred = data[forecast_col] | |
error = pred - obs | |
st.write(f"**ACF of Errors for {actual_col}**") | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
plot_acf(error.dropna(), ax=ax) | |
st.pyplot(fig) | |
acf_values = acf(error.dropna(), nlags=240) | |
# Section 3: Insights | |
elif section == 'Insights': | |
st.header("Insights") | |
st.write(""" | |
This section provides insights derived from the data and forecasts. | |
You can visualize trends, anomalies, and other important findings. | |
""") | |
# Scatter plots for correlation between wind, solar, and load | |
st.subheader('Correlation between Wind, Solar, and Load') | |
st.write('The below scatter plots for correlation between all three fields: Solar, Wind and Load.') | |
combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')] | |
for x_col, y_col in combinations: | |
if x_col in data.columns and y_col in data.columns: | |
# For solar combinations, filter out zero values | |
if 'Solar_entsoe' in x_col: | |
filtered_data = data[data['Solar_entsoe'] > 0] | |
x_values = filtered_data[x_col] | |
y_values = filtered_data[y_col] | |
else: | |
x_values = data[x_col] | |
y_values = data[y_col] | |
corr_coef = x_values.corr(y_values) | |
fig = px.scatter( | |
x=x_values, | |
y=y_values, | |
labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'}, | |
title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey']) | |
st.plotly_chart(fig) | |
st.subheader('Weather vs. Generation/Demand') | |
st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and generation/demand.') | |
for weather_col in weather_columns: | |
for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']: | |
if weather_col in data.columns and actual_col in data.columns: | |
clean_label = actual_col.replace('_entsoe', '') | |
if weather_col == 'Temperature': | |
fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange']) | |
else: | |
fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label}) | |
fig.update_layout(title=f'{weather_col} vs {actual_col}') | |
st.plotly_chart(fig) | |