import os import tempfile import pandas as pd import plotly.express as px from typing import Union, Tuple def _save_fig(fig, prefix: str, output_dir: str) -> str: """ Save a Plotly figure as a PNG to a temp file and return its path. """ os.makedirs(output_dir, exist_ok=True) tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix=prefix, dir=output_dir, delete=False) path = tmp.name tmp.close() try: fig.write_image(path, scale=2) except Exception as e: raise return path def histogram_tool( file_path: str, column: str, output_dir: str = '/tmp', bins: int = 30 ) -> Union[Tuple[px.Figure, str], str]: """ Build a histogram for a numeric column, return a Plotly Figure and PNG path, or an error string starting with '❌'. """ # Load data ext = os.path.splitext(file_path)[1].lower() try: df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path) except Exception as exc: return f"❌ Failed to load file: {exc}" # Validate column if column not in df.columns: return f"❌ Column '{column}' not found." # Coerce to numeric df[column] = pd.to_numeric(df[column], errors='coerce') series = df[column].dropna() if series.empty: return f"❌ No valid numeric data in '{column}'." # Create figure fig = px.histogram( df, x=column, nbins=bins, title=f"Histogram – {column}", template='plotly_dark' ) # Save PNG img_path = _save_fig(fig, f"hist_{column}_", output_dir) return fig, img_path def scatter_matrix_tool( file_path: str, cols: list[str], output_dir: str = '/tmp' ) -> Union[Tuple[px.Figure, str], str]: """ Build a scatter-matrix for selected numeric columns, return figure and PNG path, or an error string starting with '❌'. """ # Load data ext = os.path.splitext(file_path)[1].lower() try: df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path) except Exception as exc: return f"❌ Failed to load file: {exc}" # Validate columns missing = [c for c in cols if c not in df.columns] if missing: return f"❌ Missing columns: {', '.join(missing)}" # Filter numeric df_num = df[cols].apply(pd.to_numeric, errors='coerce').dropna() if df_num.empty: return f"❌ No valid numeric data in selected columns." # Create figure fig = px.scatter_matrix( df_num, dimensions=cols, title="Scatter-Matrix", template='plotly_dark' ) # Save PNG img_path = _save_fig(fig, "scatter_matrix_", output_dir) return fig, img_path def corr_heatmap_tool( file_path: str, output_dir: str = '/tmp', color_continuous_scale: str = 'RdBu' ) -> Union[Tuple[px.Figure, str], str]: """ Build a correlation heatmap for numeric columns, return figure and PNG path, or an error string starting with '❌'. """ # Load data ext = os.path.splitext(file_path)[1].lower() try: df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path) except Exception as exc: return f"❌ Failed to load file: {exc}" # Compute correlation df_num = df.select_dtypes(include='number').apply(pd.to_numeric, errors='coerce') if df_num.empty: return "❌ No numeric columns available for correlation." corr = df_num.corr() # Create figure fig = px.imshow( corr, color_continuous_scale=color_continuous_scale, title="Correlation Heatmap", labels=dict(color="Correlation"), template='plotly_dark' ) # Save PNG img_path = _save_fig(fig, "corr_heatmap_", output_dir) return fig, img_path