Spaces:
Running
Running
File size: 7,078 Bytes
e67fcfa c58f85c e67fcfa c58f85c e67fcfa 3b1c0d9 e67fcfa 3b1c0d9 e67fcfa 3b1c0d9 e67fcfa 3b1c0d9 e67fcfa 3b1c0d9 e67fcfa 3b1c0d9 e67fcfa 3b1c0d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from io import BytesIO
# Function to convert df to csv for download
def convert_df_to_csv(df):
return df.to_csv(index=False).encode('utf-8')
# Load your data
@st.cache_data
def load_data_elia():
df = pd.read_csv('DATA_ELIA.csv')
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
return df
# Caching data loading for Predictions.csv
@st.cache_data
def load_data_predictions():
df = pd.read_csv('Predictions.csv')
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df_filtered = df.dropna(subset=['Price'])
return df, df_filtered
# Load your data
df_input = load_data_elia()
df, df_filtered = load_data_predictions()
# Determine the first and last date
min_date_allowed = df_input['Date'].min().date()
max_date_allowed = df_input['Date'].max().date()
min_date_allowed_pred = df_filtered['Date'].min().date()
max_date_allowed_pred = df_filtered['Date'].max().date()
end_date = df['Date'].max().date()
start_date = end_date - pd.Timedelta(days=7)
st.title("Belgium: Electricity Price Forecasting")
# Sidebar for inputs
with st.sidebar:
st.write("### Variables Selection for Graph")
st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
selected_variables = st.multiselect("Select variables to display:", options=['Price', 'DNN', 'LEAR', 'Persis'], default=['Price', 'DNN', 'LEAR', 'Persis'])
st.write("### Date Range for Metrics Calculation")
st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])
st.write("### Model Selection for Scatter Plot")
model_selection = st.selectbox("Select which model's predictions to display:", options=['DNN', 'LEAR', 'Persistence'], index=0) # Default to 'DNN'
# Main content
if not selected_variables:
st.warning("Please select at least one variable to display.")
else:
# Plotting
st.write("## Belgian Day-Ahead Electricity Prices")
temp_df = df[(df['Date'] >= pd.Timestamp(start_date))] #& (df['Date'] <= pd.Timestamp(end_date))]
fig = go.Figure()
# Updated labels for each variable
variable_labels = {
'Price': 'Real Price',
'DNN': 'DNN Forecast',
'LEAR': 'LEAR Forecast',
'Persis': 'Persistence Forecast'
}
for variable in selected_variables:
fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable_labels[variable]))
fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
st.plotly_chart(fig, use_container_width=True)
st.write("The graph presented here illustrates the day-ahead electricity price forecasts for Belgium, covering the period from one week ago up to tomorrow. It incorporates predictions from three distinct models: DNN (Deep Neural Networks), LEAR (Lasso Estimated AutoRegressive), and Persistence, alongside the actual electricity prices up until today.")
if not selected_variables:
st.warning("Please select at least one variable to display.")
else:
# Plotting
st.write("## Scatter Plot: Real Price vs Model Predictions")
# Filter based on the selected date range for plotting
plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]
model_column = model_selection
if model_selection == 'Persistence':
model_column = 'Persis' # Assuming the DataFrame uses 'Persis' as the column name
# Create the scatter plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=plot_df['Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))
# Calculate the line of best fit
m, b = np.polyfit(plot_df['Price'], plot_df[model_column], 1)
# Calculate the y-values based on the line of best fit
regression_line = m * plot_df['Price'] + b
# Format the equation to display as the legend name
equation = f"y = {m:.2f}x + {b:.2f}"
# Add the line of best fit to the figure with the equation as the legend name
fig.add_trace(go.Scatter(x=plot_df['Price'], y=regression_line, mode='lines', name=equation, line=dict(color='black')))
# Update layout with appropriate titles
fig.update_layout(xaxis_title="Real Price [EUR/MWh]", yaxis_title=f"{model_selection} Predictions [EUR/MWh]", title=f"Scatter Plot of Real Price vs {model_selection} Predictions")
st.plotly_chart(fig, use_container_width=True)
# Calculating and displaying metrics
if start_date_pred and end_date_pred:
st.header("Accuracy Metrics")
st.write("Evaluate the forecasting accuracy of our models with key performance indicators. The table summarizes the Mean Absolute Error (MAE), Symmetric Mean Absolute Percentage Error (SMAPE), and Root Mean Square Error (RMSE) for the Persistence, DNN and LEAR models over your selected date range. Lower values indicate higher precision and reliability of the forecasts.")
filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] <= pd.Timestamp(end_date_pred))]
# Here you would calculate your metrics based on filtered_df
# For demonstration, let's assume these are your metrics
p_real = filtered_df['Price']
p_pred_dnn = filtered_df['DNN']
p_pred_lear = filtered_df['LEAR']
p_pred_persis = filtered_df['Persis']
# Recalculate the metrics
mae_dnn = np.mean(np.abs(p_real - p_pred_dnn))
smape_dnn = 100 * np.mean(np.abs(p_real - p_pred_dnn) / ((np.abs(p_real) + np.abs(p_pred_dnn)) / 2))
rmse_dnn = np.sqrt(np.mean((p_real - p_pred_dnn) ** 2))
mae_lear = np.mean(np.abs(p_real - p_pred_lear))
smape_lear = 100 * np.mean(np.abs(p_real - p_pred_lear) / ((np.abs(p_real) + np.abs(p_pred_lear)) / 2))
rmse_lear = np.sqrt(np.mean((p_real - p_pred_lear) ** 2))
mae_persis = np.mean(np.abs(p_real - p_pred_persis))
smape_persis = 100 * np.mean(np.abs(p_real - p_pred_persis) / ((np.abs(p_real) + np.abs(p_pred_persis)) / 2))
rmse_persis = np.sqrt(np.mean((p_real - p_pred_persis) ** 2))
new_metrics_df = pd.DataFrame({
'Metric': ['MAE', 'SMAPE', 'RMSE'],
'Persistence': [f"{mae_persis:.2f}", f"{smape_persis:.2f}%", f"{rmse_persis:.2f}"],
'DNN': [f"{mae_dnn:.2f}", f"{smape_dnn:.2f}%", f"{rmse_dnn:.2f}"],
'LEAR': [f"{mae_lear:.2f}", f"{smape_lear:.2f}%", f"{rmse_lear:.2f}"]
})
st.dataframe(new_metrics_df, hide_index=True)
# Download Predictions Button
st.write("## Access Predictions")
st.write("If you are interested in accessing the predictions made by the models, please contact Margarida Mascarenhas (KU Leuven PhD Student) at [email protected]")
|