Spaces:

mgbam
/

CognitiveEDA

Sleeping

App Files Files Community

CognitiveEDA / modules /timeseries.py

mgbam

Create modules/timeseries.py

d426068 verified 10 days ago

raw

history blame

4.7 kB

	# modules/timeseries.py

	# -- coding: utf-8 --
	#
	# PROJECT: CognitiveEDA v5.0 - The QuantumLeap Intelligence Platform
	#
	# DESCRIPTION: Specialized module for time-series analysis, including decomposition
	# and stationarity testing (Augmented Dickey-Fuller).

	import logging
	from typing import Tuple

	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from statsmodels.tsa.seasonal import seasonal_decompose
	from statsmodels.tsa.stattools import adfuller

	def analyze_time_series(df: pd.DataFrame, date_col: str, value_col: str) -> Tuple[go.Figure, str]:
	"""
	Performs and visualizes time-series decomposition and stationarity.

	This function takes a DataFrame and specified columns, performs seasonal
	decomposition (assuming an additive model and monthly frequency), and runs
	an Augmented Dickey-Fuller test to check for stationarity.

	Args:
	df: The input DataFrame.
	date_col: The name of the column containing datetime information.
	value_col: The name of the numeric column to analyze.

	Returns:
	A tuple containing:
	- A Plotly Figure of the time-series decomposition.
	- A Markdown string summarizing the stationarity test results.
	"""
	# 1. Input Validation
	if not date_col or not value_col:
	return go.Figure(), "Please select both a date/time column and a value column to begin analysis."

	if value_col not in df.columns or not pd.api.types.is_numeric_dtype(df[value_col]):
	msg = f"Value column '{value_col}' is not numeric. Please select a numeric column for analysis."
	return go.Figure().update_layout(title=msg), f"❌ Error: {msg}"

	try:
	logging.info(f"Analyzing time-series for date='{date_col}' and value='{value_col}'")

	# 2. Data Preparation with robust error handling
	ts_df = df[[date_col, value_col]].copy()
	ts_df[date_col] = pd.to_datetime(ts_df[date_col], errors='coerce')

	# Drop rows where date conversion failed or value is missing
	ts_df.dropna(subset=[date_col, value_col], inplace=True)

	if ts_df.empty:
	msg = f"No valid data remains after parsing dates in '{date_col}' and removing missing values."
	logging.warning(msg)
	return go.Figure().update_layout(title=msg), f"❌ Error: {msg}"

	ts_df = ts_df.set_index(date_col).sort_index()
	ts_data = ts_df[value_col]

	# 3. Decomposition
	# Assume monthly data (period=12). Require at least 2 full periods for decomposition.
	period = 12
	if len(ts_data) < 2 * period:
	msg = f"Insufficient data for reliable seasonal decomposition. Found {len(ts_data)} points, require at least {2 * period}."
	logging.warning(msg)
	# Still return the raw plot if decomposition isn't possible
	fig_decomp = px.line(ts_data, title=f"<b>Raw Time-Series of '{value_col}'</b>")
	return fig_decomp, f"⚠️ Warning: {msg}"

	result = seasonal_decompose(ts_data, model='additive', period=period)

	decomposition_data = pd.DataFrame({
	'Trend': result.trend,
	'Seasonal': result.seasonal,
	'Residual': result.resid,
	'Observed': result.observed
	}).reset_index()

	fig_decomp = px.line(
	decomposition_data, x=date_col, y=['Observed', 'Trend', 'Seasonal', 'Residual'],
	title=f"<b>Time-Series Decomposition of '{value_col}'</b>",
	labels={'value': 'Value', date_col: 'Date'},
	facet_row='variable'
	).update_yaxes(matches=None) # Allow y-axes to have independent scales
	fig_decomp.update_layout(showlegend=False)

	# 4. Stationarity Test (Augmented Dickey-Fuller)
	adf_result = adfuller(ts_data.dropna())
	p_value = adf_result[1]
	conclusion = 'likely stationary (p < 0.05)' if p_value < 0.05 else 'likely non-stationary (p >= 0.05)'

	adf_md = f"""
	### Stationarity Analysis (Augmented Dickey-Fuller Test)
	- ADF Statistic: `{adf_result[0]:.4f}`
	- p-value: `{p_value:.4f}`
	- Conclusion: The time-series is {conclusion}. Non-stationary series often require differencing before being used in forecasting models like ARIMA.
	"""
	return fig_decomp, adf_md

	except Exception as e:
	logging.error(f"Time-series analysis failed: {e}", exc_info=True)
	error_msg = f"An unexpected error occurred during analysis. Please check column formats. Details: {e}"
	return go.Figure(), f"❌ Error: {error_msg}"