Spaces:

EDS-lab
/

Transparency_Plus

Running

File size: 31,352 Bytes

b26e24a

import requests
import pandas as pd
from io import StringIO
import streamlit as st
import os
import plotly.express as px
import plotly.graph_objects as go
import plotly.colors as pc
import numpy as np
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import acf
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt


##GET ALL FILES FROM GITHUB
def load_GitHub(github_token, file_name):
    url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
    headers = {'Authorization': f'token {github_token}'}

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        csv_content = StringIO(response.text)
        df = pd.read_csv(csv_content)
        if 'Date' in df.columns:
            df['Date'] = pd.to_datetime(df['Date'])  # Convert 'Date' column to datetime
            df.set_index('Date', inplace=True)  # Set 'Date' column as the index
            #df.to_csv(file_name) 
        return df
    else:
        print(f"Failed to download {file_name}. Status code: {response.status_code}")
        return None

def load_forecast(github_token):
    predictions_dict = {}
    for hour in range(24):
        file_name = f'Predictions_{hour}h.csv'
        df = load_GitHub(github_token, file_name)
        if df is not None:
            predictions_dict[file_name] = df
    return predictions_dict

def convert_European_time(data, time_zone):
    data.index = pd.to_datetime(data.index, utc=True)
    data.index = data.index.tz_convert(time_zone)
    data.index = data.index.tz_localize(None)
    return data

github_token = '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'

if github_token:
    forecast_dict = load_forecast(github_token)

    historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')

    Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv')
    Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv')
    Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv')
    Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv')
    
    Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
    Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
    Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
    Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')


else:
    print("Please enter your GitHub Personal Access Token to proceed.")

def conformal_predictions(data, target, my_forecast):
    data['Residuals'] = data[my_forecast] - data[actual_col]
    data['Hour'] = data.index.hour

    min_date = data.index.min()
    for date in data.index.normalize().unique():
        if date >= min_date + pd.DateOffset(days=30):
            start_date = date - pd.DateOffset(days=30)
            end_date = date
            calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
            quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.8)
            # Use .loc to safely access and modify data
            if date in data.index:
                current_day_data = data.loc[date.strftime('%Y-%m-%d')]
                for hour in current_day_data['Hour'].unique():
                    if hour in quantiles.index:
                        hour_quantile = quantiles[hour]
                        idx = (data.index.normalize() == date) & (data.Hour == hour)
                        data.loc[idx, 'Quantile_80'] = hour_quantile
                        data.loc[idx, 'Lower_Interval'] = data.loc[idx, my_forecast] - hour_quantile
                        data.loc[idx, 'Upper_Interval'] = data.loc[idx, my_forecast] + hour_quantile
    #data.reset_index(inplace=True)
    return data


st.title("Transparency++")

countries = {
    'Belgium': 'BE',
    'Netherlands': 'NL',
    'Germany': 'DE',
    'France': 'FR',
}


st.sidebar.header('Filters')

selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))


st.write()
date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:", 
                                   value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))

# Ensure the date range provides two dates
if len(date_range) == 2:
    start_date = pd.Timestamp(date_range[0])
    end_date = pd.Timestamp(date_range[1])
else:
    st.error("Please select a valid date range.")
    st.stop()

# Sidebar with radio buttons for different sections
section = st.sidebar.radio('Section', ['Data', 'Forecasts', 'Insights'])


country_code = countries[selected_country]
if country_code == 'BE':
    data = Data_BE
    weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
    data['Temperature'] = data['temperature_2m_8']
    data['Wind Speed Offshore'] = data['wind_speed_100m_4']
    data['Wind Speed Onshore'] = data['wind_speed_100m_8']

elif country_code == 'DE':
    data = Data_DE
    weather_columns = ['Temperature', 'Wind Speed']
    data['Temperature'] = data['temperature_2m']
    data['Wind Speed'] = data['wind_speed_100m']

elif country_code == 'NL':
    data = Data_NL
    weather_columns = ['Temperature', 'Wind Speed']
    data['Temperature'] = data['temperature_2m']
    data['Wind Speed'] = data['wind_speed_100m']

elif country_code == 'FR':
    data = Data_FR
    weather_columns = ['Temperature', 'Wind Speed']
    data['Temperature'] = data['temperature_2m']
    data['Wind Speed'] = data['wind_speed_100m']

def add_feature(df2, df_main):
    #df_main.index = pd.to_datetime(df_main.index)
    #df2.index = pd.to_datetime(df2.index)
    df_combined = df_main.combine_first(df2)
    last_date_df1 = df_main.index.max()
    first_date_df2 = df2.index.min()
    if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
        df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
    #df_combined.reset_index(inplace=True)
    return df_combined
#data.index = data.index.tz_localize('UTC')
data = data.loc[start_date:end_date]

forecast_columns = [
    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']

if section == 'Data':
    st.header("Data")
    st.write("""

    This section allows you to explore and upload your datasets.

    You can visualize raw data, clean it, and prepare it for analysis.

    """)
   
    st.header('Data Quality')
    
    output_text = f"The below percentages are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
    st.write(output_text)
    
    # Report % of missing values
    missing_values = data[forecast_columns].isna().mean() * 100
    missing_values = missing_values.round(2)

    installed_capacities = {
        'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
        'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
        'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},  
        'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},  
    }

    if country_code not in installed_capacities:
        st.error(f"Installed capacities not defined for country code '{country_code}'.")
        st.stop()


    # Report % of extreme, impossible values for the selected country
    capacities = installed_capacities[country_code]
    extreme_values = {}

    for col in forecast_columns:
            if 'Solar_entsoe' in col:
                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
            elif 'Solar_forecast_entsoe' in col:
                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
            elif 'Wind_onshore_entsoe' in col:
                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
            elif 'Wind_onshore_forecast_entsoe' in col:
                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
            elif 'Wind_offshore_entsoe' in col:
                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
            elif 'Wind_offshore_forecast_entsoe' in col:
                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
            elif 'Load_entsoe' in col:
                extreme_values[col] = ((data[col] < 0)).mean() * 100
            elif 'Load_forecast_entsoe' in col:
                extreme_values[col] = ((data[col] < 0)).mean() * 100


    extreme_values = pd.Series(extreme_values).round(2)

    # Combine all metrics into one DataFrame
    metrics_df = pd.DataFrame({
    'Missing Values (%)': missing_values,
    'Extreme/Nonsensical Values (%)': extreme_values,
    })

    st.markdown(
    """

    <style>

    .dataframe {font-size: 45px !important;}

    </style>

    """,
    unsafe_allow_html=True
    )

    st.dataframe(metrics_df)

    st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
    st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)

# Section 2: Forecasts
elif section == 'Forecasts':
   
    st.header('Forecast Quality')
    
    # Time series for last 1 week
    st.subheader('Time Series: Last 1 Week')
    last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))]
    st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
    forecast_columns_operational = [
    'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
    forecast_columns = [
    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']

    operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
    operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
    operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
    operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
    Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
    Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
    #print(Historical_and_operational.filter(like='Forecast_elia', axis=1))
    best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
    df_combined = Historical_and_operational.join(Data_BE, how='inner')
    last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))]
    

    for i in range(0, len(forecast_columns_operational), 3):
        actual_col = forecast_columns_operational[i]
        forecast_col = forecast_columns_operational[i + 1]
        my_forecast = forecast_columns_operational[i + 2]


        if forecast_col in data.columns:
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
            fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))

            if country_code=='BE':
                conformal=conformal_predictions(df_combined, actual_col, my_forecast)
                last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))]
                if actual_col =='Load_entsoe':
                    last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
                fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))

                fig.add_trace(go.Scatter(
                    x=last_week_conformal.index,
                    y=last_week_conformal['Lower_Interval'],
                    mode='lines',
                    line=dict(width=0),
                    showlegend=False
                ))

                # Add the upper interval trace and fill to the lower interval
                fig.add_trace(go.Scatter(
                    x=last_week_conformal.index,
                    y=last_week_conformal['Upper_Interval'],
                    mode='lines',
                    line=dict(width=0),
                    fill='tonexty',  # Fill between this trace and the previous one
                    fillcolor='rgba(68, 68, 68, 0.3)',
                    name='P10/P90 prediction intervals'
                ))


            fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
        
            st.plotly_chart(fig)


    def plot_category(df_dict, category_prefix, title):
        fig = go.Figure()

        # Define base colors for each model
        model_colors = {
            'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4',  # Blue
            'LightGBMModel.TimeCov.Temp': '#2ca02c',  # Green
            'Naive': '#ff7f0e'  # Orange
        }

        # To keep track of which model has been added to the legend
        legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}

        for file_name, df in df_dict.items():
            # Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
            hour = int(file_name.split('_')[1].replace('h.csv', ''))
            
            filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
            for column in filtered_columns:
                # Identify the model type with more precise logic
                if 'LightGBMModel' in column:
                    if 'Forecast_elia' in column:
                        model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
                    elif 'TimeCov' in column:
                        model_key = 'LightGBMModel.TimeCov.Temp'
                elif 'Naive' in column:
                    model_key = 'Naive'
                else:
                    continue  # Skip if it doesn't match any model type

                # Extract the relevant part of the model name
                parts = column.split('.')
                model_name_parts = parts[1:]  # Skip the variable prefix
                model_name = '.'.join(model_name_parts)  # Rejoin the parts to form the model name

                # Get the base color for the model
                base_color = model_colors[model_key]

                # Calculate the color shade based on the hour
                color_scale = pc.hex_to_rgb(base_color)
                scale_factor = 0.3 + (hour / 40)  # Adjust scale to ensure the gradient is visible
                adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
                # Convert to RGBA with transparency for plot lines
                line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)'  # Transparent color for lines

                # Combine the hour and the model name for the legend, but only add the legend entry once
                show_legend = not legend_added[model_key]

                fig.add_trace(go.Scatter(
                    x=df.index,  # Assuming 'Date' is the index, use 'df.index' for x-axis
                    y=df[column],
                    mode='lines',
                    name=model_name if show_legend else None,  # Use the model name for the legend, but only once
                    line=dict(color=base_color if show_legend else line_color),  # Use opaque color for legend, transparent for lines
                    showlegend=show_legend,  # Show legend only once per model
                    legendgroup=model_key  # Grouping for consistent legend color
                ))

                # Mark that this model has been added to the legend
                if show_legend:
                    legend_added[model_key] = True
                
            # Add real values as a separate trace, if provided
            filtered_Data_BE_df = Data_BE.loc[df.index]

        if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
            fig.add_trace(go.Scatter(
                x=filtered_Data_BE_df.index,
                y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
                mode='lines',
                name=f'Actual {category_prefix}',
                line=dict(color='black', width=2),  # Black line for real values
                showlegend=True  # Always show this in the legend
            ))

        # Update layout to position the legend at the top, side by side
        fig.update_layout(
            title=dict(
                text=title,
                x=0,  # Center the title horizontally
                y=1.00,  # Slightly lower the title to create more space
                xanchor='left',
                yanchor='top'
            ),
            xaxis_title='Date',
            yaxis_title='Value',
            legend=dict(
                orientation="h",  # Horizontal legend
                yanchor="bottom",  # Align to the bottom of the legend box
                y=1,  # Increase y position to avoid overlap with the title
                xanchor="center",  # Center the legend horizontally
                x=0.5  # Position at the center of the plot
            )
        )
        return fig

    if country_code == "BE":
        st.header('EDS Forecasts by Hour')

        solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions')
        st.plotly_chart(solar_fig)

        wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions')
        st.plotly_chart(wind_offshore_fig)

        wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions')
        st.plotly_chart(wind_onshore_fig)

        load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions')
        st.plotly_chart(load_fig)

    # Scatter plots for error distribution
    st.subheader('Error Distribution')
    st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
    for i in range(0, len(forecast_columns), 2):
        actual_col = forecast_columns[i]
        forecast_col = forecast_columns[i + 1]
        if forecast_col in data.columns:
            obs = last_week[actual_col]
            pred = last_week[forecast_col]
            error = pred - obs

            fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
            fig.update_layout(title=f'Error Distribution for {forecast_col}')
            st.plotly_chart(fig)


        
    st.subheader('Accuracy Metrics (Sorted by rMAE):')

    if country_code == "BE":

        # Combine the two DataFrames on their index
        df_combined = Historical_and_operational.join(Data_BE, how='inner')
        # List of model columns from historical_forecast
        model_columns = historical_forecast.columns

        # Initialize dictionaries to store MAE and RMSE results for each variable
        results_wind_onshore = {}
        results_wind_offshore = {}
        results_load = {}
        results_solar = {}

        # Mapping of variables to their corresponding naive models
        naive_models = {
            'Wind_onshore': 'Wind_onshore_DailyNaiveSeasonal',
            'Wind_offshore': 'Wind_offshore_DailyNaiveSeasonal',
            'Load': 'Load_WeeklyNaiveSeasonal',
            'Solar': 'Solar_DailyNaiveSeasonal'
        }

        # Step 1: Calculate MAE, RMSE, and rMAE for each model
        for col in model_columns:
            # Extract the variable name by taking everything before the first underscore
            base_variable = col.split('_')[0]

            # Handle cases where variable names might be combined with multiple parts (e.g., "Load_LightGBMModel...")
            if base_variable in ['Wind', 'Load', 'Solar']:
                if 'onshore' in col:
                    variable_name = 'Wind_onshore'
                    results_dict = results_wind_onshore
                elif 'offshore' in col:
                    variable_name = 'Wind_offshore'
                    results_dict = results_wind_offshore
                else:
                    variable_name = base_variable
                    results_dict = results_load if base_variable == 'Load' else results_solar
            else:
                variable_name = base_variable

            # Construct the corresponding `variable_entsoe` column name
            entsoe_column = f'{variable_name}_entsoe'
            naive_model_col = naive_models.get(variable_name, None)

            # Drop NaNs for the specific pair of columns before calculating MAE and RMSE
            if entsoe_column in df_combined.columns and naive_model_col in df_combined.columns:
                valid_data = df_combined[[col, entsoe_column]].dropna()
                valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()

                # Calculate MAE and RMSE for the model against the `variable_entsoe`
                mae = np.mean(abs(valid_data[col] - valid_data[entsoe_column]))
                rmse = np.sqrt(mean_squared_error(valid_data[col], valid_data[entsoe_column]))

                # Calculate MAE for the Naive model
                mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))

                # Calculate rMAE for the model
                rMAE = mae / mae_naive if mae_naive != 0 else np.inf

                # Store the results in the corresponding dictionary
                results_dict[f'{col}'] = {'MAE': mae, 'RMSE': rmse, 'rMAE': rMAE}

        # Step 2: Calculate MAE, RMSE, and rMAE for ENTSO-E forecasts specifically
        for variable_name in naive_models.keys():
            entsoe_column = f'{variable_name}_entsoe'
            forecast_entsoe_column = f'{variable_name}_forecast_entsoe'
            naive_model_col = naive_models[variable_name]

            # Ensure that the ENTSO-E forecast is included in the results
            if forecast_entsoe_column in df_combined.columns:
                valid_data = df_combined[[forecast_entsoe_column, entsoe_column]].dropna()
                valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()

                # Calculate MAE and RMSE for the ENTSO-E forecast against the actuals
                mae_entsoe = np.mean(abs(valid_data[forecast_entsoe_column] - valid_data[entsoe_column]))
                rmse_entsoe = np.sqrt(mean_squared_error(valid_data[forecast_entsoe_column], valid_data[entsoe_column]))

                # Calculate rMAE for the ENTSO-E forecast
                mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
                rMAE_entsoe = mae_entsoe / mae_naive if mae_naive != 0 else np.inf

                # Add the ENTSO-E results to the corresponding dictionary
                if variable_name == 'Wind_onshore':
                    results_wind_onshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
                elif variable_name == 'Wind_offshore':
                    results_wind_offshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
                elif variable_name == 'Load':
                    results_load[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
                elif variable_name == 'Solar':
                    results_solar[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}

        # Convert the dictionaries to DataFrames and sort by rMAE
        df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
        df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
        df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
        df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')


        st.write("##### Wind Onshore:")
        st.dataframe(df_wind_onshore)

        st.write("##### Wind Offshore:")
        st.dataframe(df_wind_offshore)

        st.write("##### Load:")
        st.dataframe(df_load)

        st.write("##### Solar:")
        st.dataframe(df_solar)



    else:
        accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])

        for i in range(0, len(forecast_columns), 2):
            actual_col = forecast_columns[i]
            forecast_col = forecast_columns[i + 1]
            if forecast_col in data.columns:
                obs = data[actual_col]
                pred = data[forecast_col]
                error = pred - obs
                
                mae = round(np.mean(np.abs(error)),2)
                if 'Load' in actual_col:
                    persistence = obs.shift(168)  # Weekly persistence
                else:
                    persistence = obs.shift(24)  # Daily persistence
                
                # Using the whole year's data for rMAE calculations
                rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
                
                row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
                accuracy_metrics.loc[row_label] = [mae, rmae]

        accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
        accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
        accuracy_metrics = accuracy_metrics.round(4)

        col1, col2 = st.columns([3, 2])

        with col1:
            st.dataframe(accuracy_metrics)

        with col2:
            st.markdown("""

                <style>

                .big-font {

                    font-size: 20px;

                    font-weight: 500;

                }

                </style>

                <div class="big-font">

                Equations

                </div>

                """, unsafe_allow_html=True)

            st.markdown(r"""

            $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$

            

                        

            $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$

                        



            """)

    

    st.subheader('ACF plots of Errors')
    st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three fields: Solar, Wind and Load.')

    for i in range(0, len(forecast_columns), 2):
        actual_col = forecast_columns[i]
        forecast_col = forecast_columns[i + 1]
        if forecast_col in data.columns:
            obs = data[actual_col]
            pred = data[forecast_col]
            error = pred - obs

            st.write(f"**ACF of Errors for {actual_col}**")
            fig, ax = plt.subplots(figsize=(10, 5))
            plot_acf(error.dropna(), ax=ax)
            st.pyplot(fig)

            acf_values = acf(error.dropna(), nlags=240)
        
# Section 3: Insights
elif section == 'Insights':
    st.header("Insights")
    st.write("""

    This section provides insights derived from the data and forecasts.

    You can visualize trends, anomalies, and other important findings.

    """)

    # Scatter plots for correlation between wind, solar, and load
    st.subheader('Correlation between Wind, Solar, and Load')
    st.write('The below scatter plots for correlation between all three fields: Solar, Wind and Load.')

    combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]

    for x_col, y_col in combinations:
        if x_col in data.columns and y_col in data.columns:
            # For solar combinations, filter out zero values
            if 'Solar_entsoe' in x_col:
                filtered_data = data[data['Solar_entsoe'] > 0]
                x_values = filtered_data[x_col]
                y_values = filtered_data[y_col]
            else:
                x_values = data[x_col]
                y_values = data[y_col]

            corr_coef = x_values.corr(y_values)
            fig = px.scatter(
                x=x_values,
                y=y_values,
                labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'},
                title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey'])
            st.plotly_chart(fig)


    st.subheader('Weather vs. Generation/Demand')
    st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and generation/demand.')

    for weather_col in weather_columns:
        for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
            if weather_col in data.columns and actual_col in data.columns:
                clean_label = actual_col.replace('_entsoe', '')
                if weather_col == 'Temperature':
                    fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
                else:
                    fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label})
                fig.update_layout(title=f'{weather_col} vs {actual_col}')
                st.plotly_chart(fig)