Spaces:
Sleeping
Sleeping
Create modules/timeseries.py
Browse files- modules/timeseries.py +106 -0
modules/timeseries.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# modules/timeseries.py
|
2 |
+
|
3 |
+
# -*- coding: utf-8 -*-
|
4 |
+
#
|
5 |
+
# PROJECT: CognitiveEDA v5.0 - The QuantumLeap Intelligence Platform
|
6 |
+
#
|
7 |
+
# DESCRIPTION: Specialized module for time-series analysis, including decomposition
|
8 |
+
# and stationarity testing (Augmented Dickey-Fuller).
|
9 |
+
|
10 |
+
import logging
|
11 |
+
from typing import Tuple
|
12 |
+
|
13 |
+
import pandas as pd
|
14 |
+
import plotly.express as px
|
15 |
+
import plotly.graph_objects as go
|
16 |
+
from statsmodels.tsa.seasonal import seasonal_decompose
|
17 |
+
from statsmodels.tsa.stattools import adfuller
|
18 |
+
|
19 |
+
def analyze_time_series(df: pd.DataFrame, date_col: str, value_col: str) -> Tuple[go.Figure, str]:
|
20 |
+
"""
|
21 |
+
Performs and visualizes time-series decomposition and stationarity.
|
22 |
+
|
23 |
+
This function takes a DataFrame and specified columns, performs seasonal
|
24 |
+
decomposition (assuming an additive model and monthly frequency), and runs
|
25 |
+
an Augmented Dickey-Fuller test to check for stationarity.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
df: The input DataFrame.
|
29 |
+
date_col: The name of the column containing datetime information.
|
30 |
+
value_col: The name of the numeric column to analyze.
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
A tuple containing:
|
34 |
+
- A Plotly Figure of the time-series decomposition.
|
35 |
+
- A Markdown string summarizing the stationarity test results.
|
36 |
+
"""
|
37 |
+
# 1. Input Validation
|
38 |
+
if not date_col or not value_col:
|
39 |
+
return go.Figure(), "Please select both a date/time column and a value column to begin analysis."
|
40 |
+
|
41 |
+
if value_col not in df.columns or not pd.api.types.is_numeric_dtype(df[value_col]):
|
42 |
+
msg = f"Value column '{value_col}' is not numeric. Please select a numeric column for analysis."
|
43 |
+
return go.Figure().update_layout(title=msg), f"❌ **Error:** {msg}"
|
44 |
+
|
45 |
+
try:
|
46 |
+
logging.info(f"Analyzing time-series for date='{date_col}' and value='{value_col}'")
|
47 |
+
|
48 |
+
# 2. Data Preparation with robust error handling
|
49 |
+
ts_df = df[[date_col, value_col]].copy()
|
50 |
+
ts_df[date_col] = pd.to_datetime(ts_df[date_col], errors='coerce')
|
51 |
+
|
52 |
+
# Drop rows where date conversion failed or value is missing
|
53 |
+
ts_df.dropna(subset=[date_col, value_col], inplace=True)
|
54 |
+
|
55 |
+
if ts_df.empty:
|
56 |
+
msg = f"No valid data remains after parsing dates in '{date_col}' and removing missing values."
|
57 |
+
logging.warning(msg)
|
58 |
+
return go.Figure().update_layout(title=msg), f"❌ **Error:** {msg}"
|
59 |
+
|
60 |
+
ts_df = ts_df.set_index(date_col).sort_index()
|
61 |
+
ts_data = ts_df[value_col]
|
62 |
+
|
63 |
+
# 3. Decomposition
|
64 |
+
# Assume monthly data (period=12). Require at least 2 full periods for decomposition.
|
65 |
+
period = 12
|
66 |
+
if len(ts_data) < 2 * period:
|
67 |
+
msg = f"Insufficient data for reliable seasonal decomposition. Found {len(ts_data)} points, require at least {2 * period}."
|
68 |
+
logging.warning(msg)
|
69 |
+
# Still return the raw plot if decomposition isn't possible
|
70 |
+
fig_decomp = px.line(ts_data, title=f"<b>Raw Time-Series of '{value_col}'</b>")
|
71 |
+
return fig_decomp, f"⚠️ **Warning:** {msg}"
|
72 |
+
|
73 |
+
result = seasonal_decompose(ts_data, model='additive', period=period)
|
74 |
+
|
75 |
+
decomposition_data = pd.DataFrame({
|
76 |
+
'Trend': result.trend,
|
77 |
+
'Seasonal': result.seasonal,
|
78 |
+
'Residual': result.resid,
|
79 |
+
'Observed': result.observed
|
80 |
+
}).reset_index()
|
81 |
+
|
82 |
+
fig_decomp = px.line(
|
83 |
+
decomposition_data, x=date_col, y=['Observed', 'Trend', 'Seasonal', 'Residual'],
|
84 |
+
title=f"<b>Time-Series Decomposition of '{value_col}'</b>",
|
85 |
+
labels={'value': 'Value', date_col: 'Date'},
|
86 |
+
facet_row='variable'
|
87 |
+
).update_yaxes(matches=None) # Allow y-axes to have independent scales
|
88 |
+
fig_decomp.update_layout(showlegend=False)
|
89 |
+
|
90 |
+
# 4. Stationarity Test (Augmented Dickey-Fuller)
|
91 |
+
adf_result = adfuller(ts_data.dropna())
|
92 |
+
p_value = adf_result[1]
|
93 |
+
conclusion = 'likely **stationary** (p < 0.05)' if p_value < 0.05 else 'likely **non-stationary** (p >= 0.05)'
|
94 |
+
|
95 |
+
adf_md = f"""
|
96 |
+
### Stationarity Analysis (Augmented Dickey-Fuller Test)
|
97 |
+
- **ADF Statistic:** `{adf_result[0]:.4f}`
|
98 |
+
- **p-value:** `{p_value:.4f}`
|
99 |
+
- **Conclusion:** The time-series is {conclusion}. Non-stationary series often require differencing before being used in forecasting models like ARIMA.
|
100 |
+
"""
|
101 |
+
return fig_decomp, adf_md
|
102 |
+
|
103 |
+
except Exception as e:
|
104 |
+
logging.error(f"Time-series analysis failed: {e}", exc_info=True)
|
105 |
+
error_msg = f"An unexpected error occurred during analysis. Please check column formats. Details: {e}"
|
106 |
+
return go.Figure(), f"❌ **Error:** {error_msg}"
|