mgbam commited on
Commit
d426068
·
verified ·
1 Parent(s): d9ea3f9

Create modules/timeseries.py

Browse files
Files changed (1) hide show
  1. modules/timeseries.py +106 -0
modules/timeseries.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modules/timeseries.py
2
+
3
+ # -*- coding: utf-8 -*-
4
+ #
5
+ # PROJECT: CognitiveEDA v5.0 - The QuantumLeap Intelligence Platform
6
+ #
7
+ # DESCRIPTION: Specialized module for time-series analysis, including decomposition
8
+ # and stationarity testing (Augmented Dickey-Fuller).
9
+
10
+ import logging
11
+ from typing import Tuple
12
+
13
+ import pandas as pd
14
+ import plotly.express as px
15
+ import plotly.graph_objects as go
16
+ from statsmodels.tsa.seasonal import seasonal_decompose
17
+ from statsmodels.tsa.stattools import adfuller
18
+
19
+ def analyze_time_series(df: pd.DataFrame, date_col: str, value_col: str) -> Tuple[go.Figure, str]:
20
+ """
21
+ Performs and visualizes time-series decomposition and stationarity.
22
+
23
+ This function takes a DataFrame and specified columns, performs seasonal
24
+ decomposition (assuming an additive model and monthly frequency), and runs
25
+ an Augmented Dickey-Fuller test to check for stationarity.
26
+
27
+ Args:
28
+ df: The input DataFrame.
29
+ date_col: The name of the column containing datetime information.
30
+ value_col: The name of the numeric column to analyze.
31
+
32
+ Returns:
33
+ A tuple containing:
34
+ - A Plotly Figure of the time-series decomposition.
35
+ - A Markdown string summarizing the stationarity test results.
36
+ """
37
+ # 1. Input Validation
38
+ if not date_col or not value_col:
39
+ return go.Figure(), "Please select both a date/time column and a value column to begin analysis."
40
+
41
+ if value_col not in df.columns or not pd.api.types.is_numeric_dtype(df[value_col]):
42
+ msg = f"Value column '{value_col}' is not numeric. Please select a numeric column for analysis."
43
+ return go.Figure().update_layout(title=msg), f"❌ **Error:** {msg}"
44
+
45
+ try:
46
+ logging.info(f"Analyzing time-series for date='{date_col}' and value='{value_col}'")
47
+
48
+ # 2. Data Preparation with robust error handling
49
+ ts_df = df[[date_col, value_col]].copy()
50
+ ts_df[date_col] = pd.to_datetime(ts_df[date_col], errors='coerce')
51
+
52
+ # Drop rows where date conversion failed or value is missing
53
+ ts_df.dropna(subset=[date_col, value_col], inplace=True)
54
+
55
+ if ts_df.empty:
56
+ msg = f"No valid data remains after parsing dates in '{date_col}' and removing missing values."
57
+ logging.warning(msg)
58
+ return go.Figure().update_layout(title=msg), f"❌ **Error:** {msg}"
59
+
60
+ ts_df = ts_df.set_index(date_col).sort_index()
61
+ ts_data = ts_df[value_col]
62
+
63
+ # 3. Decomposition
64
+ # Assume monthly data (period=12). Require at least 2 full periods for decomposition.
65
+ period = 12
66
+ if len(ts_data) < 2 * period:
67
+ msg = f"Insufficient data for reliable seasonal decomposition. Found {len(ts_data)} points, require at least {2 * period}."
68
+ logging.warning(msg)
69
+ # Still return the raw plot if decomposition isn't possible
70
+ fig_decomp = px.line(ts_data, title=f"<b>Raw Time-Series of '{value_col}'</b>")
71
+ return fig_decomp, f"⚠️ **Warning:** {msg}"
72
+
73
+ result = seasonal_decompose(ts_data, model='additive', period=period)
74
+
75
+ decomposition_data = pd.DataFrame({
76
+ 'Trend': result.trend,
77
+ 'Seasonal': result.seasonal,
78
+ 'Residual': result.resid,
79
+ 'Observed': result.observed
80
+ }).reset_index()
81
+
82
+ fig_decomp = px.line(
83
+ decomposition_data, x=date_col, y=['Observed', 'Trend', 'Seasonal', 'Residual'],
84
+ title=f"<b>Time-Series Decomposition of '{value_col}'</b>",
85
+ labels={'value': 'Value', date_col: 'Date'},
86
+ facet_row='variable'
87
+ ).update_yaxes(matches=None) # Allow y-axes to have independent scales
88
+ fig_decomp.update_layout(showlegend=False)
89
+
90
+ # 4. Stationarity Test (Augmented Dickey-Fuller)
91
+ adf_result = adfuller(ts_data.dropna())
92
+ p_value = adf_result[1]
93
+ conclusion = 'likely **stationary** (p < 0.05)' if p_value < 0.05 else 'likely **non-stationary** (p >= 0.05)'
94
+
95
+ adf_md = f"""
96
+ ### Stationarity Analysis (Augmented Dickey-Fuller Test)
97
+ - **ADF Statistic:** `{adf_result[0]:.4f}`
98
+ - **p-value:** `{p_value:.4f}`
99
+ - **Conclusion:** The time-series is {conclusion}. Non-stationary series often require differencing before being used in forecasting models like ARIMA.
100
+ """
101
+ return fig_decomp, adf_md
102
+
103
+ except Exception as e:
104
+ logging.error(f"Time-series analysis failed: {e}", exc_info=True)
105
+ error_msg = f"An unexpected error occurred during analysis. Please check column formats. Details: {e}"
106
+ return go.Figure(), f"❌ **Error:** {error_msg}"