File size: 10,215 Bytes
866cee0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import requests
import json
from datetime import datetime

def get_series_name_and_unit(series, dataset_description):
    """
    Extract the name and unit from a time series using its dataset description.
    
    Args:
        series: Dictionary containing series data
        dataset_description: Dictionary containing dataset field descriptions
        
    Returns:
        tuple: (name, unit) of the series
    """
    field_id = series['datacellar:datasetFieldID']
    field = next((f for f in dataset_description['datacellar:datasetFields'] 
                 if f['datacellar:datasetFieldID'] == field_id), None)
    
    name = field['datacellar:fieldName'] if field else f'Series {field_id}'
    unit = field['datacellar:type']['datacellar:unitText'] if field else 'Unknown'
    
    # Override name if metadata contains loadType
    if 'datacellar:timeSeriesMetadata' in series:
        metadata = series['datacellar:timeSeriesMetadata']
        if 'datacellar:loadType' in metadata:
            name = metadata['datacellar:loadType']
    
    return name, unit

def process_series(series, dataset_description, is_input=False):
    """
    Process a single time series into a pandas DataFrame.
    
    Args:
        series: Dictionary containing series data
        dataset_description: Dictionary containing dataset field descriptions
        is_input: Boolean indicating if this is input data
        
    Returns:
        tuple: (DataFrame, unit, name) of the processed series
    """
    name, unit = get_series_name_and_unit(series, dataset_description)
    df = pd.DataFrame(series['datacellar:dataPoints'])
    
    # Convert timestamp to datetime and ensure values are numeric
    df['datacellar:timeStamp'] = pd.to_datetime(df['datacellar:timeStamp'])
    df['datacellar:value'] = pd.to_numeric(df['datacellar:value'], errors='coerce')
    
    # Add series identifier
    df['series_id'] = f'{name} (Input)' if is_input else name
    
    return df, unit, name

def load_and_process_data(json_data, input_data=None):
    """
    Load and process time series from the JSON data, filtering out empty series.
    """
    series_by_unit = {}
    try:
        dataset_description = json_data['datacellar:datasetSelfDescription']
    except:
        dataset_description = {
            "@type": "datacellar:DatasetField",
            "datacellar:datasetFieldID": 0,
            "datacellar:fieldName": "anomaly",
            "datacellar:description": "Anomalies",
            "datacellar:type": {
                "@type": "datacellar:boolean",
                "datacellar:unitText": "-"
            }
        }
    
    # Process output series
    try:
        for series in json_data['datacellar:timeSeriesList']:
            # Check if series has any data points
            if series.get('datacellar:dataPoints'):
                df, unit, _ = process_series(series, dataset_description)
                # Additional check for non-empty DataFrame
                if not df.empty and df['datacellar:value'].notna().any():
                    if unit not in series_by_unit:
                        series_by_unit[unit] = []
                    series_by_unit[unit].append(df)
    except Exception as e:
        st.error(f"Error processing series: {str(e)}")
    
    # Process input series if provided
    if input_data:
        input_description = input_data['datacellar:datasetSelfDescription']
        for series in input_data['datacellar:timeSeriesList']:
            if series.get('datacellar:dataPoints'):
                df, unit, _ = process_series(series, input_description, is_input=True)
                if not df.empty and df['datacellar:value'].notna().any():
                    if unit not in series_by_unit:
                        series_by_unit[unit] = []
                    series_by_unit[unit].append(df)
    
    # Concatenate and filter out units with no valid data
    result = {}
    for unit, dfs in series_by_unit.items():
        if dfs:  # Check if there are any DataFrames for this unit
            combined_df = pd.concat(dfs)
            if not combined_df.empty and combined_df['datacellar:value'].notna().any():
                result[unit] = combined_df
    
    return result

def create_time_series_plot(df, unit, service_type=None,fig=None):
    """
    Create visualization for time series data, handling empty series appropriately.
    """
    if service_type == "Anomaly Detection":

        if not fig:
            fig = go.Figure()
        
        # Filter for non-empty input data
        input_data = df[df['series_id'].str.contains('Input')]
        input_data = input_data[input_data['datacellar:value'].notna()]
        
        if not input_data.empty:
            fig.add_trace(go.Scatter(
                x=input_data['datacellar:timeStamp'], 
                y=input_data['datacellar:value'],
                mode='lines',
                name='Energy Consumption',
                line=dict(color='blue')
            ))
        
            # Handle anomalies
            anomalies = df[(~df['series_id'].str.contains('Output')) & 
                          (df['datacellar:value'] == True) &
                          (df['datacellar:value'].notna())]
            if not anomalies.empty:
                anomaly_values = []
                for timestamp in anomalies['datacellar:timeStamp']:
                    value = input_data.loc[input_data['datacellar:timeStamp'] == timestamp, 'datacellar:value']
                    anomaly_values.append(value.iloc[0] if not value.empty else None)
                
                # fig.add_trace(go.Scatter(
                #     x=anomalies['datacellar:timeStamp'],
                #     y=anomaly_values,
                #     mode='markers',
                #     name='Anomalies',
                #     marker=dict(color='red', size=10)
                # ))
        
        fig.update_layout(
            title=f'Time Series Data with Anomalies ({unit})',
            xaxis_title="Time",
            yaxis_title=f"Value ({unit})",
            hovermode='x unified',
            legend_title="Series"
        )
        return fig
    else:
        # Filter out series with no valid data
        valid_series = []
        for series_id in df['series_id'].unique():
            series_data = df[df['series_id'] == series_id]
            if not series_data.empty and series_data['datacellar:value'].notna().any():
                valid_series.append(series_id)
        
        # Create plot only for valid series
        if valid_series:
            filtered_df = df[df['series_id'].isin(valid_series)]
            return px.line(
                filtered_df,
                x='datacellar:timeStamp',
                y='datacellar:value',
                color='series_id',
                title=f'Time Series Data ({unit})'
            ).update_layout(
                xaxis_title="Time",
                yaxis_title=f"Value ({unit})",
                hovermode='x unified',
                legend_title="Series"
            )
        else:
            # Return None or an empty figure if no valid series
            return None

def display_statistics(dfs_by_unit):
    """
    Display statistics only for non-empty series.
    """
    for unit, df in dfs_by_unit.items():
        st.write(f"## Measurements in {unit}")
        for series_id in df['series_id'].unique():
            series_data = df[df['series_id'] == series_id]
            # Check if series has valid data
            if not series_data.empty and series_data['datacellar:value'].notna().any():
                st.write(f"### {series_id}")
                
                cols = st.columns(4)
                metrics = [
                    ("Average", series_data['datacellar:value'].mean()),
                    ("Max", series_data['datacellar:value'].max()),
                    ("Min", series_data['datacellar:value'].min()),
                    ("Total", series_data['datacellar:value'].sum() * 6/3600)
                ]
                
                for col, (label, value) in zip(cols, metrics):
                    with col:
                        unit_suffix = "h" if label == "Total" else ""
                        st.metric(label, f"{value:.2f} {unit}{unit_suffix}")

def call_api(file_content, token, service_endpoint):
    """
    Call the analysis API with the provided data.
    
    Args:
        file_content: Binary content of the JSON file
        token: API authentication token
        service_endpoint: String indicating which API endpoint to call
        
    Returns:
        dict: JSON response from the API or None if the call fails
    """
    try:
        url = f'https://loki.linksfoundation.com/datacellar/{service_endpoint}'
        response = requests.post(
            url,
            headers={'Authorization': f'Bearer {token}'},
            files={'input_file': ('data.json', file_content, 'application/json')}
        )
        
        if response.status_code == 401:
            st.error("Authentication failed. Please check your API token.")
            return None
            
        return response.json()
    except Exception as e:
        st.error(f"API Error: {str(e)}")
        return None

def get_dataset_type(json_data):
    """
    Determine the type of dataset from its description.
    
    Args:
        json_data: Dictionary containing the JSON data
        
    Returns:
        str: "production", "consumption", or "other"
    """
    desc = json_data.get('datacellar:description', '').lower()
    if 'production' in desc:
        return "production"
    elif 'consumption' in desc:
        return "consumption"
    return "other"

def get_forecast_horizon(json_data):
    """
    Determine the forecast horizon from dataset description.
    
    Args:
        json_data: Dictionary containing the JSON data
        
    Returns:
        str: "long", "short", or None
    """
    desc = json_data.get('datacellar:description', '').lower()
    if 'long term' in desc:
        return "long"
    elif 'short term' in desc:
        return "short"
    return None