Transparency_Plus / app_margarida_V3.py
mmmapms's picture
Upload app_margarida_V3.py
b26e24a verified
raw
history blame
31.4 kB
import requests
import pandas as pd
from io import StringIO
import streamlit as st
import os
import plotly.express as px
import plotly.graph_objects as go
import plotly.colors as pc
import numpy as np
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import acf
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt
##GET ALL FILES FROM GITHUB
def load_GitHub(github_token, file_name):
url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
headers = {'Authorization': f'token {github_token}'}
response = requests.get(url, headers=headers)
if response.status_code == 200:
csv_content = StringIO(response.text)
df = pd.read_csv(csv_content)
if 'Date' in df.columns:
df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
df.set_index('Date', inplace=True) # Set 'Date' column as the index
#df.to_csv(file_name)
return df
else:
print(f"Failed to download {file_name}. Status code: {response.status_code}")
return None
def load_forecast(github_token):
predictions_dict = {}
for hour in range(24):
file_name = f'Predictions_{hour}h.csv'
df = load_GitHub(github_token, file_name)
if df is not None:
predictions_dict[file_name] = df
return predictions_dict
def convert_European_time(data, time_zone):
data.index = pd.to_datetime(data.index, utc=True)
data.index = data.index.tz_convert(time_zone)
data.index = data.index.tz_localize(None)
return data
github_token = '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'
if github_token:
forecast_dict = load_forecast(github_token)
historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv')
Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv')
Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv')
Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv')
Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
else:
print("Please enter your GitHub Personal Access Token to proceed.")
def conformal_predictions(data, target, my_forecast):
data['Residuals'] = data[my_forecast] - data[actual_col]
data['Hour'] = data.index.hour
min_date = data.index.min()
for date in data.index.normalize().unique():
if date >= min_date + pd.DateOffset(days=30):
start_date = date - pd.DateOffset(days=30)
end_date = date
calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.8)
# Use .loc to safely access and modify data
if date in data.index:
current_day_data = data.loc[date.strftime('%Y-%m-%d')]
for hour in current_day_data['Hour'].unique():
if hour in quantiles.index:
hour_quantile = quantiles[hour]
idx = (data.index.normalize() == date) & (data.Hour == hour)
data.loc[idx, 'Quantile_80'] = hour_quantile
data.loc[idx, 'Lower_Interval'] = data.loc[idx, my_forecast] - hour_quantile
data.loc[idx, 'Upper_Interval'] = data.loc[idx, my_forecast] + hour_quantile
#data.reset_index(inplace=True)
return data
st.title("Transparency++")
countries = {
'Belgium': 'BE',
'Netherlands': 'NL',
'Germany': 'DE',
'France': 'FR',
}
st.sidebar.header('Filters')
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
st.write()
date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
# Ensure the date range provides two dates
if len(date_range) == 2:
start_date = pd.Timestamp(date_range[0])
end_date = pd.Timestamp(date_range[1])
else:
st.error("Please select a valid date range.")
st.stop()
# Sidebar with radio buttons for different sections
section = st.sidebar.radio('Section', ['Data', 'Forecasts', 'Insights'])
country_code = countries[selected_country]
if country_code == 'BE':
data = Data_BE
weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
data['Temperature'] = data['temperature_2m_8']
data['Wind Speed Offshore'] = data['wind_speed_100m_4']
data['Wind Speed Onshore'] = data['wind_speed_100m_8']
elif country_code == 'DE':
data = Data_DE
weather_columns = ['Temperature', 'Wind Speed']
data['Temperature'] = data['temperature_2m']
data['Wind Speed'] = data['wind_speed_100m']
elif country_code == 'NL':
data = Data_NL
weather_columns = ['Temperature', 'Wind Speed']
data['Temperature'] = data['temperature_2m']
data['Wind Speed'] = data['wind_speed_100m']
elif country_code == 'FR':
data = Data_FR
weather_columns = ['Temperature', 'Wind Speed']
data['Temperature'] = data['temperature_2m']
data['Wind Speed'] = data['wind_speed_100m']
def add_feature(df2, df_main):
#df_main.index = pd.to_datetime(df_main.index)
#df2.index = pd.to_datetime(df2.index)
df_combined = df_main.combine_first(df2)
last_date_df1 = df_main.index.max()
first_date_df2 = df2.index.min()
if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
#df_combined.reset_index(inplace=True)
return df_combined
#data.index = data.index.tz_localize('UTC')
data = data.loc[start_date:end_date]
forecast_columns = [
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
if section == 'Data':
st.header("Data")
st.write("""
This section allows you to explore and upload your datasets.
You can visualize raw data, clean it, and prepare it for analysis.
""")
st.header('Data Quality')
output_text = f"The below percentages are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
st.write(output_text)
# Report % of missing values
missing_values = data[forecast_columns].isna().mean() * 100
missing_values = missing_values.round(2)
installed_capacities = {
'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
}
if country_code not in installed_capacities:
st.error(f"Installed capacities not defined for country code '{country_code}'.")
st.stop()
# Report % of extreme, impossible values for the selected country
capacities = installed_capacities[country_code]
extreme_values = {}
for col in forecast_columns:
if 'Solar_entsoe' in col:
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
elif 'Solar_forecast_entsoe' in col:
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
elif 'Wind_onshore_entsoe' in col:
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
elif 'Wind_onshore_forecast_entsoe' in col:
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
elif 'Wind_offshore_entsoe' in col:
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
elif 'Wind_offshore_forecast_entsoe' in col:
extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
elif 'Load_entsoe' in col:
extreme_values[col] = ((data[col] < 0)).mean() * 100
elif 'Load_forecast_entsoe' in col:
extreme_values[col] = ((data[col] < 0)).mean() * 100
extreme_values = pd.Series(extreme_values).round(2)
# Combine all metrics into one DataFrame
metrics_df = pd.DataFrame({
'Missing Values (%)': missing_values,
'Extreme/Nonsensical Values (%)': extreme_values,
})
st.markdown(
"""
<style>
.dataframe {font-size: 45px !important;}
</style>
""",
unsafe_allow_html=True
)
st.dataframe(metrics_df)
st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
# Section 2: Forecasts
elif section == 'Forecasts':
st.header('Forecast Quality')
# Time series for last 1 week
st.subheader('Time Series: Last 1 Week')
last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))]
st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
forecast_columns_operational = [
'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
forecast_columns = [
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
#print(Historical_and_operational.filter(like='Forecast_elia', axis=1))
best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
df_combined = Historical_and_operational.join(Data_BE, how='inner')
last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))]
for i in range(0, len(forecast_columns_operational), 3):
actual_col = forecast_columns_operational[i]
forecast_col = forecast_columns_operational[i + 1]
my_forecast = forecast_columns_operational[i + 2]
if forecast_col in data.columns:
fig = go.Figure()
fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
if country_code=='BE':
conformal=conformal_predictions(df_combined, actual_col, my_forecast)
last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))]
if actual_col =='Load_entsoe':
last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
fig.add_trace(go.Scatter(
x=last_week_conformal.index,
y=last_week_conformal['Lower_Interval'],
mode='lines',
line=dict(width=0),
showlegend=False
))
# Add the upper interval trace and fill to the lower interval
fig.add_trace(go.Scatter(
x=last_week_conformal.index,
y=last_week_conformal['Upper_Interval'],
mode='lines',
line=dict(width=0),
fill='tonexty', # Fill between this trace and the previous one
fillcolor='rgba(68, 68, 68, 0.3)',
name='P10/P90 prediction intervals'
))
fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
st.plotly_chart(fig)
def plot_category(df_dict, category_prefix, title):
fig = go.Figure()
# Define base colors for each model
model_colors = {
'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue
'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green
'Naive': '#ff7f0e' # Orange
}
# To keep track of which model has been added to the legend
legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}
for file_name, df in df_dict.items():
# Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
hour = int(file_name.split('_')[1].replace('h.csv', ''))
filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
for column in filtered_columns:
# Identify the model type with more precise logic
if 'LightGBMModel' in column:
if 'Forecast_elia' in column:
model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
elif 'TimeCov' in column:
model_key = 'LightGBMModel.TimeCov.Temp'
elif 'Naive' in column:
model_key = 'Naive'
else:
continue # Skip if it doesn't match any model type
# Extract the relevant part of the model name
parts = column.split('.')
model_name_parts = parts[1:] # Skip the variable prefix
model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name
# Get the base color for the model
base_color = model_colors[model_key]
# Calculate the color shade based on the hour
color_scale = pc.hex_to_rgb(base_color)
scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible
adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
# Convert to RGBA with transparency for plot lines
line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines
# Combine the hour and the model name for the legend, but only add the legend entry once
show_legend = not legend_added[model_key]
fig.add_trace(go.Scatter(
x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis
y=df[column],
mode='lines',
name=model_name if show_legend else None, # Use the model name for the legend, but only once
line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines
showlegend=show_legend, # Show legend only once per model
legendgroup=model_key # Grouping for consistent legend color
))
# Mark that this model has been added to the legend
if show_legend:
legend_added[model_key] = True
# Add real values as a separate trace, if provided
filtered_Data_BE_df = Data_BE.loc[df.index]
if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
fig.add_trace(go.Scatter(
x=filtered_Data_BE_df.index,
y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
mode='lines',
name=f'Actual {category_prefix}',
line=dict(color='black', width=2), # Black line for real values
showlegend=True # Always show this in the legend
))
# Update layout to position the legend at the top, side by side
fig.update_layout(
title=dict(
text=title,
x=0, # Center the title horizontally
y=1.00, # Slightly lower the title to create more space
xanchor='left',
yanchor='top'
),
xaxis_title='Date',
yaxis_title='Value',
legend=dict(
orientation="h", # Horizontal legend
yanchor="bottom", # Align to the bottom of the legend box
y=1, # Increase y position to avoid overlap with the title
xanchor="center", # Center the legend horizontally
x=0.5 # Position at the center of the plot
)
)
return fig
if country_code == "BE":
st.header('EDS Forecasts by Hour')
solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions')
st.plotly_chart(solar_fig)
wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions')
st.plotly_chart(wind_offshore_fig)
wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions')
st.plotly_chart(wind_onshore_fig)
load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions')
st.plotly_chart(load_fig)
# Scatter plots for error distribution
st.subheader('Error Distribution')
st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
for i in range(0, len(forecast_columns), 2):
actual_col = forecast_columns[i]
forecast_col = forecast_columns[i + 1]
if forecast_col in data.columns:
obs = last_week[actual_col]
pred = last_week[forecast_col]
error = pred - obs
fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
fig.update_layout(title=f'Error Distribution for {forecast_col}')
st.plotly_chart(fig)
st.subheader('Accuracy Metrics (Sorted by rMAE):')
if country_code == "BE":
# Combine the two DataFrames on their index
df_combined = Historical_and_operational.join(Data_BE, how='inner')
# List of model columns from historical_forecast
model_columns = historical_forecast.columns
# Initialize dictionaries to store MAE and RMSE results for each variable
results_wind_onshore = {}
results_wind_offshore = {}
results_load = {}
results_solar = {}
# Mapping of variables to their corresponding naive models
naive_models = {
'Wind_onshore': 'Wind_onshore_DailyNaiveSeasonal',
'Wind_offshore': 'Wind_offshore_DailyNaiveSeasonal',
'Load': 'Load_WeeklyNaiveSeasonal',
'Solar': 'Solar_DailyNaiveSeasonal'
}
# Step 1: Calculate MAE, RMSE, and rMAE for each model
for col in model_columns:
# Extract the variable name by taking everything before the first underscore
base_variable = col.split('_')[0]
# Handle cases where variable names might be combined with multiple parts (e.g., "Load_LightGBMModel...")
if base_variable in ['Wind', 'Load', 'Solar']:
if 'onshore' in col:
variable_name = 'Wind_onshore'
results_dict = results_wind_onshore
elif 'offshore' in col:
variable_name = 'Wind_offshore'
results_dict = results_wind_offshore
else:
variable_name = base_variable
results_dict = results_load if base_variable == 'Load' else results_solar
else:
variable_name = base_variable
# Construct the corresponding `variable_entsoe` column name
entsoe_column = f'{variable_name}_entsoe'
naive_model_col = naive_models.get(variable_name, None)
# Drop NaNs for the specific pair of columns before calculating MAE and RMSE
if entsoe_column in df_combined.columns and naive_model_col in df_combined.columns:
valid_data = df_combined[[col, entsoe_column]].dropna()
valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
# Calculate MAE and RMSE for the model against the `variable_entsoe`
mae = np.mean(abs(valid_data[col] - valid_data[entsoe_column]))
rmse = np.sqrt(mean_squared_error(valid_data[col], valid_data[entsoe_column]))
# Calculate MAE for the Naive model
mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
# Calculate rMAE for the model
rMAE = mae / mae_naive if mae_naive != 0 else np.inf
# Store the results in the corresponding dictionary
results_dict[f'{col}'] = {'MAE': mae, 'RMSE': rmse, 'rMAE': rMAE}
# Step 2: Calculate MAE, RMSE, and rMAE for ENTSO-E forecasts specifically
for variable_name in naive_models.keys():
entsoe_column = f'{variable_name}_entsoe'
forecast_entsoe_column = f'{variable_name}_forecast_entsoe'
naive_model_col = naive_models[variable_name]
# Ensure that the ENTSO-E forecast is included in the results
if forecast_entsoe_column in df_combined.columns:
valid_data = df_combined[[forecast_entsoe_column, entsoe_column]].dropna()
valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
# Calculate MAE and RMSE for the ENTSO-E forecast against the actuals
mae_entsoe = np.mean(abs(valid_data[forecast_entsoe_column] - valid_data[entsoe_column]))
rmse_entsoe = np.sqrt(mean_squared_error(valid_data[forecast_entsoe_column], valid_data[entsoe_column]))
# Calculate rMAE for the ENTSO-E forecast
mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
rMAE_entsoe = mae_entsoe / mae_naive if mae_naive != 0 else np.inf
# Add the ENTSO-E results to the corresponding dictionary
if variable_name == 'Wind_onshore':
results_wind_onshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
elif variable_name == 'Wind_offshore':
results_wind_offshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
elif variable_name == 'Load':
results_load[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
elif variable_name == 'Solar':
results_solar[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
# Convert the dictionaries to DataFrames and sort by rMAE
df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')
st.write("##### Wind Onshore:")
st.dataframe(df_wind_onshore)
st.write("##### Wind Offshore:")
st.dataframe(df_wind_offshore)
st.write("##### Load:")
st.dataframe(df_load)
st.write("##### Solar:")
st.dataframe(df_solar)
else:
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
for i in range(0, len(forecast_columns), 2):
actual_col = forecast_columns[i]
forecast_col = forecast_columns[i + 1]
if forecast_col in data.columns:
obs = data[actual_col]
pred = data[forecast_col]
error = pred - obs
mae = round(np.mean(np.abs(error)),2)
if 'Load' in actual_col:
persistence = obs.shift(168) # Weekly persistence
else:
persistence = obs.shift(24) # Daily persistence
# Using the whole year's data for rMAE calculations
rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
accuracy_metrics.loc[row_label] = [mae, rmae]
accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
accuracy_metrics = accuracy_metrics.round(4)
col1, col2 = st.columns([3, 2])
with col1:
st.dataframe(accuracy_metrics)
with col2:
st.markdown("""
<style>
.big-font {
font-size: 20px;
font-weight: 500;
}
</style>
<div class="big-font">
Equations
</div>
""", unsafe_allow_html=True)
st.markdown(r"""
$\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
""")
st.subheader('ACF plots of Errors')
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three fields: Solar, Wind and Load.')
for i in range(0, len(forecast_columns), 2):
actual_col = forecast_columns[i]
forecast_col = forecast_columns[i + 1]
if forecast_col in data.columns:
obs = data[actual_col]
pred = data[forecast_col]
error = pred - obs
st.write(f"**ACF of Errors for {actual_col}**")
fig, ax = plt.subplots(figsize=(10, 5))
plot_acf(error.dropna(), ax=ax)
st.pyplot(fig)
acf_values = acf(error.dropna(), nlags=240)
# Section 3: Insights
elif section == 'Insights':
st.header("Insights")
st.write("""
This section provides insights derived from the data and forecasts.
You can visualize trends, anomalies, and other important findings.
""")
# Scatter plots for correlation between wind, solar, and load
st.subheader('Correlation between Wind, Solar, and Load')
st.write('The below scatter plots for correlation between all three fields: Solar, Wind and Load.')
combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]
for x_col, y_col in combinations:
if x_col in data.columns and y_col in data.columns:
# For solar combinations, filter out zero values
if 'Solar_entsoe' in x_col:
filtered_data = data[data['Solar_entsoe'] > 0]
x_values = filtered_data[x_col]
y_values = filtered_data[y_col]
else:
x_values = data[x_col]
y_values = data[y_col]
corr_coef = x_values.corr(y_values)
fig = px.scatter(
x=x_values,
y=y_values,
labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'},
title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey'])
st.plotly_chart(fig)
st.subheader('Weather vs. Generation/Demand')
st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and generation/demand.')
for weather_col in weather_columns:
for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
if weather_col in data.columns and actual_col in data.columns:
clean_label = actual_col.replace('_entsoe', '')
if weather_col == 'Temperature':
fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
else:
fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label})
fig.update_layout(title=f'{weather_col} vs {actual_col}')
st.plotly_chart(fig)