Spaces:
Sleeping
Sleeping
# modules/timeseries.py | |
# -*- coding: utf-8 -*- | |
# | |
# PROJECT: CognitiveEDA v5.0 - The QuantumLeap Intelligence Platform | |
# | |
# DESCRIPTION: Specialized module for time-series analysis, including decomposition | |
# and stationarity testing (Augmented Dickey-Fuller). | |
import logging | |
from typing import Tuple | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from statsmodels.tsa.seasonal import seasonal_decompose | |
from statsmodels.tsa.stattools import adfuller | |
def analyze_time_series(df: pd.DataFrame, date_col: str, value_col: str) -> Tuple[go.Figure, str]: | |
""" | |
Performs and visualizes time-series decomposition and stationarity. | |
This function takes a DataFrame and specified columns, performs seasonal | |
decomposition (assuming an additive model and monthly frequency), and runs | |
an Augmented Dickey-Fuller test to check for stationarity. | |
Args: | |
df: The input DataFrame. | |
date_col: The name of the column containing datetime information. | |
value_col: The name of the numeric column to analyze. | |
Returns: | |
A tuple containing: | |
- A Plotly Figure of the time-series decomposition. | |
- A Markdown string summarizing the stationarity test results. | |
""" | |
# 1. Input Validation | |
if not date_col or not value_col: | |
return go.Figure(), "Please select both a date/time column and a value column to begin analysis." | |
if value_col not in df.columns or not pd.api.types.is_numeric_dtype(df[value_col]): | |
msg = f"Value column '{value_col}' is not numeric. Please select a numeric column for analysis." | |
return go.Figure().update_layout(title=msg), f"β **Error:** {msg}" | |
try: | |
logging.info(f"Analyzing time-series for date='{date_col}' and value='{value_col}'") | |
# 2. Data Preparation with robust error handling | |
ts_df = df[[date_col, value_col]].copy() | |
ts_df[date_col] = pd.to_datetime(ts_df[date_col], errors='coerce') | |
# Drop rows where date conversion failed or value is missing | |
ts_df.dropna(subset=[date_col, value_col], inplace=True) | |
if ts_df.empty: | |
msg = f"No valid data remains after parsing dates in '{date_col}' and removing missing values." | |
logging.warning(msg) | |
return go.Figure().update_layout(title=msg), f"β **Error:** {msg}" | |
ts_df = ts_df.set_index(date_col).sort_index() | |
ts_data = ts_df[value_col] | |
# 3. Decomposition | |
# Assume monthly data (period=12). Require at least 2 full periods for decomposition. | |
period = 12 | |
if len(ts_data) < 2 * period: | |
msg = f"Insufficient data for reliable seasonal decomposition. Found {len(ts_data)} points, require at least {2 * period}." | |
logging.warning(msg) | |
# Still return the raw plot if decomposition isn't possible | |
fig_decomp = px.line(ts_data, title=f"<b>Raw Time-Series of '{value_col}'</b>") | |
return fig_decomp, f"β οΈ **Warning:** {msg}" | |
result = seasonal_decompose(ts_data, model='additive', period=period) | |
decomposition_data = pd.DataFrame({ | |
'Trend': result.trend, | |
'Seasonal': result.seasonal, | |
'Residual': result.resid, | |
'Observed': result.observed | |
}).reset_index() | |
fig_decomp = px.line( | |
decomposition_data, x=date_col, y=['Observed', 'Trend', 'Seasonal', 'Residual'], | |
title=f"<b>Time-Series Decomposition of '{value_col}'</b>", | |
labels={'value': 'Value', date_col: 'Date'}, | |
facet_row='variable' | |
).update_yaxes(matches=None) # Allow y-axes to have independent scales | |
fig_decomp.update_layout(showlegend=False) | |
# 4. Stationarity Test (Augmented Dickey-Fuller) | |
adf_result = adfuller(ts_data.dropna()) | |
p_value = adf_result[1] | |
conclusion = 'likely **stationary** (p < 0.05)' if p_value < 0.05 else 'likely **non-stationary** (p >= 0.05)' | |
adf_md = f""" | |
### Stationarity Analysis (Augmented Dickey-Fuller Test) | |
- **ADF Statistic:** `{adf_result[0]:.4f}` | |
- **p-value:** `{p_value:.4f}` | |
- **Conclusion:** The time-series is {conclusion}. Non-stationary series often require differencing before being used in forecasting models like ARIMA. | |
""" | |
return fig_decomp, adf_md | |
except Exception as e: | |
logging.error(f"Time-series analysis failed: {e}", exc_info=True) | |
error_msg = f"An unexpected error occurred during analysis. Please check column formats. Details: {e}" | |
return go.Figure(), f"β **Error:** {error_msg}" |