StefanoBergia's picture
fixed structure
958b1f9
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import requests
import json
from datetime import datetime
def get_series_name_and_unit(series, dataset_description):
"""
Extract the name and unit from a time series using its dataset description.
Args:
series: Dictionary containing series data
dataset_description: Dictionary containing dataset field descriptions
Returns:
tuple: (name, unit) of the series
"""
field_id = series['datacellar:datasetFieldID']
field = next((f for f in dataset_description['datacellar:datasetFields']
if f['datacellar:datasetFieldID'] == field_id), None)
name = field['datacellar:fieldName'] if field else f'Series {field_id}'
unit = field['datacellar:type']['datacellar:unitText'] if field else 'Unknown'
# Override name if metadata contains loadType
if 'datacellar:timeSeriesMetadata' in series:
metadata = series['datacellar:timeSeriesMetadata']
if 'datacellar:loadType' in metadata:
name = metadata['datacellar:loadType']
return name, unit
def process_series(series, dataset_description, is_input=False):
"""
Process a single time series into a pandas DataFrame.
Args:
series: Dictionary containing series data
dataset_description: Dictionary containing dataset field descriptions
is_input: Boolean indicating if this is input data
Returns:
tuple: (DataFrame, unit, name) of the processed series
"""
name, unit = get_series_name_and_unit(series, dataset_description)
df = pd.DataFrame(series['datacellar:dataPoints'])
# Convert timestamp to datetime and ensure values are numeric
df['datacellar:timeStamp'] = pd.to_datetime(df['datacellar:timeStamp'])
df['datacellar:value'] = pd.to_numeric(df['datacellar:value'], errors='coerce')
# Add series identifier
df['series_id'] = f'{name} (Input)' if is_input else name
return df, unit, name
def load_and_process_data(json_data, input_data=None):
"""
Load and process time series from the JSON data, filtering out empty series.
"""
series_by_unit = {}
try:
dataset_description = json_data['datacellar:datasetSelfDescription']
except:
dataset_description = {
"@type": "datacellar:DatasetField",
"datacellar:datasetFieldID": 0,
"datacellar:fieldName": "anomaly",
"datacellar:description": "Anomalies",
"datacellar:type": {
"@type": "datacellar:boolean",
"datacellar:unitText": "-"
}
}
# Process output series
try:
for series in json_data['datacellar:timeSeriesList']:
# Check if series has any data points
if series.get('datacellar:dataPoints'):
df, unit, _ = process_series(series, dataset_description)
# Additional check for non-empty DataFrame
if not df.empty and df['datacellar:value'].notna().any():
if unit not in series_by_unit:
series_by_unit[unit] = []
series_by_unit[unit].append(df)
except Exception as e:
st.error(f"Error processing series: {str(e)}")
# Process input series if provided
if input_data:
input_description = input_data['datacellar:datasetSelfDescription']
for series in input_data['datacellar:timeSeriesList']:
if series.get('datacellar:dataPoints'):
df, unit, _ = process_series(series, input_description, is_input=True)
if not df.empty and df['datacellar:value'].notna().any():
if unit not in series_by_unit:
series_by_unit[unit] = []
series_by_unit[unit].append(df)
# Concatenate and filter out units with no valid data
result = {}
for unit, dfs in series_by_unit.items():
if dfs: # Check if there are any DataFrames for this unit
combined_df = pd.concat(dfs)
if not combined_df.empty and combined_df['datacellar:value'].notna().any():
result[unit] = combined_df
return result
def create_time_series_plot(df, unit, service_type=None,fig=None):
"""
Create visualization for time series data, handling empty series appropriately.
"""
if service_type == "Anomaly Detection":
if not fig:
fig = go.Figure()
# Filter for non-empty input data
input_data = df[df['series_id'].str.contains('Input')]
input_data = input_data[input_data['datacellar:value'].notna()]
if not input_data.empty:
fig.add_trace(go.Scatter(
x=input_data['datacellar:timeStamp'],
y=input_data['datacellar:value'],
mode='lines',
name='Energy Consumption',
line=dict(color='blue')
))
# Handle anomalies
anomalies = df[(~df['series_id'].str.contains('Output')) &
(df['datacellar:value'] == True) &
(df['datacellar:value'].notna())]
if not anomalies.empty:
anomaly_values = []
for timestamp in anomalies['datacellar:timeStamp']:
value = input_data.loc[input_data['datacellar:timeStamp'] == timestamp, 'datacellar:value']
anomaly_values.append(value.iloc[0] if not value.empty else None)
# fig.add_trace(go.Scatter(
# x=anomalies['datacellar:timeStamp'],
# y=anomaly_values,
# mode='markers',
# name='Anomalies',
# marker=dict(color='red', size=10)
# ))
fig.update_layout(
title=f'Time Series Data with Anomalies ({unit})',
xaxis_title="Time",
yaxis_title=f"Value ({unit})",
hovermode='x unified',
legend_title="Series"
)
return fig
else:
# Filter out series with no valid data
valid_series = []
for series_id in df['series_id'].unique():
series_data = df[df['series_id'] == series_id]
if not series_data.empty and series_data['datacellar:value'].notna().any():
valid_series.append(series_id)
# Create plot only for valid series
if valid_series:
filtered_df = df[df['series_id'].isin(valid_series)]
return px.line(
filtered_df,
x='datacellar:timeStamp',
y='datacellar:value',
color='series_id',
title=f'Time Series Data ({unit})'
).update_layout(
xaxis_title="Time",
yaxis_title=f"Value ({unit})",
hovermode='x unified',
legend_title="Series"
)
else:
# Return None or an empty figure if no valid series
return None
def display_statistics(dfs_by_unit):
"""
Display statistics only for non-empty series.
"""
for unit, df in dfs_by_unit.items():
st.write(f"## Measurements in {unit}")
for series_id in df['series_id'].unique():
series_data = df[df['series_id'] == series_id]
# Check if series has valid data
if not series_data.empty and series_data['datacellar:value'].notna().any():
st.write(f"### {series_id}")
cols = st.columns(4)
metrics = [
("Average", series_data['datacellar:value'].mean()),
("Max", series_data['datacellar:value'].max()),
("Min", series_data['datacellar:value'].min()),
("Total", series_data['datacellar:value'].sum() * 6/3600)
]
for col, (label, value) in zip(cols, metrics):
with col:
unit_suffix = "h" if label == "Total" else ""
st.metric(label, f"{value:.2f} {unit}{unit_suffix}")
def call_api(file_content, token, service_endpoint):
"""
Call the analysis API with the provided data.
Args:
file_content: Binary content of the JSON file
token: API authentication token
service_endpoint: String indicating which API endpoint to call
Returns:
dict: JSON response from the API or None if the call fails
"""
try:
url = f'https://loki.linksfoundation.com/datacellar/{service_endpoint}'
response = requests.post(
url,
headers={'Authorization': f'Bearer {token}'},
files={'input_file': ('data.json', file_content, 'application/json')}
)
if response.status_code == 401:
st.error("Authentication failed. Please check your API token.")
return None
return response.json()
except Exception as e:
st.error(f"API Error: {str(e)}")
return None
def get_dataset_type(json_data):
"""
Determine the type of dataset from its description.
Args:
json_data: Dictionary containing the JSON data
Returns:
str: "production", "consumption", or "other"
"""
desc = json_data.get('datacellar:description', '').lower()
if 'production' in desc:
return "production"
elif 'consumption' in desc:
return "consumption"
return "other"
def get_forecast_horizon(json_data):
"""
Determine the forecast horizon from dataset description.
Args:
json_data: Dictionary containing the JSON data
Returns:
str: "long", "short", or None
"""
desc = json_data.get('datacellar:description', '').lower()
if 'long term' in desc:
return "long"
elif 'short term' in desc:
return "short"
return None