CognitiveEDA / modules /timeseries.py
mgbam's picture
Create modules/timeseries.py
d426068 verified
raw
history blame
4.7 kB
# modules/timeseries.py
# -*- coding: utf-8 -*-
#
# PROJECT: CognitiveEDA v5.0 - The QuantumLeap Intelligence Platform
#
# DESCRIPTION: Specialized module for time-series analysis, including decomposition
# and stationarity testing (Augmented Dickey-Fuller).
import logging
from typing import Tuple
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
def analyze_time_series(df: pd.DataFrame, date_col: str, value_col: str) -> Tuple[go.Figure, str]:
"""
Performs and visualizes time-series decomposition and stationarity.
This function takes a DataFrame and specified columns, performs seasonal
decomposition (assuming an additive model and monthly frequency), and runs
an Augmented Dickey-Fuller test to check for stationarity.
Args:
df: The input DataFrame.
date_col: The name of the column containing datetime information.
value_col: The name of the numeric column to analyze.
Returns:
A tuple containing:
- A Plotly Figure of the time-series decomposition.
- A Markdown string summarizing the stationarity test results.
"""
# 1. Input Validation
if not date_col or not value_col:
return go.Figure(), "Please select both a date/time column and a value column to begin analysis."
if value_col not in df.columns or not pd.api.types.is_numeric_dtype(df[value_col]):
msg = f"Value column '{value_col}' is not numeric. Please select a numeric column for analysis."
return go.Figure().update_layout(title=msg), f"❌ **Error:** {msg}"
try:
logging.info(f"Analyzing time-series for date='{date_col}' and value='{value_col}'")
# 2. Data Preparation with robust error handling
ts_df = df[[date_col, value_col]].copy()
ts_df[date_col] = pd.to_datetime(ts_df[date_col], errors='coerce')
# Drop rows where date conversion failed or value is missing
ts_df.dropna(subset=[date_col, value_col], inplace=True)
if ts_df.empty:
msg = f"No valid data remains after parsing dates in '{date_col}' and removing missing values."
logging.warning(msg)
return go.Figure().update_layout(title=msg), f"❌ **Error:** {msg}"
ts_df = ts_df.set_index(date_col).sort_index()
ts_data = ts_df[value_col]
# 3. Decomposition
# Assume monthly data (period=12). Require at least 2 full periods for decomposition.
period = 12
if len(ts_data) < 2 * period:
msg = f"Insufficient data for reliable seasonal decomposition. Found {len(ts_data)} points, require at least {2 * period}."
logging.warning(msg)
# Still return the raw plot if decomposition isn't possible
fig_decomp = px.line(ts_data, title=f"<b>Raw Time-Series of '{value_col}'</b>")
return fig_decomp, f"⚠️ **Warning:** {msg}"
result = seasonal_decompose(ts_data, model='additive', period=period)
decomposition_data = pd.DataFrame({
'Trend': result.trend,
'Seasonal': result.seasonal,
'Residual': result.resid,
'Observed': result.observed
}).reset_index()
fig_decomp = px.line(
decomposition_data, x=date_col, y=['Observed', 'Trend', 'Seasonal', 'Residual'],
title=f"<b>Time-Series Decomposition of '{value_col}'</b>",
labels={'value': 'Value', date_col: 'Date'},
facet_row='variable'
).update_yaxes(matches=None) # Allow y-axes to have independent scales
fig_decomp.update_layout(showlegend=False)
# 4. Stationarity Test (Augmented Dickey-Fuller)
adf_result = adfuller(ts_data.dropna())
p_value = adf_result[1]
conclusion = 'likely **stationary** (p < 0.05)' if p_value < 0.05 else 'likely **non-stationary** (p >= 0.05)'
adf_md = f"""
### Stationarity Analysis (Augmented Dickey-Fuller Test)
- **ADF Statistic:** `{adf_result[0]:.4f}`
- **p-value:** `{p_value:.4f}`
- **Conclusion:** The time-series is {conclusion}. Non-stationary series often require differencing before being used in forecasting models like ARIMA.
"""
return fig_decomp, adf_md
except Exception as e:
logging.error(f"Time-series analysis failed: {e}", exc_info=True)
error_msg = f"An unexpected error occurred during analysis. Please check column formats. Details: {e}"
return go.Figure(), f"❌ **Error:** {error_msg}"