Spaces:
Sleeping
Sleeping
import os | |
import tempfile | |
import pandas as pd | |
import plotly.express as px | |
from typing import Union, Tuple | |
def _save_fig(fig, prefix: str, output_dir: str) -> str: | |
""" | |
Save a Plotly figure as a PNG to a temp file and return its path. | |
""" | |
os.makedirs(output_dir, exist_ok=True) | |
tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix=prefix, dir=output_dir, delete=False) | |
path = tmp.name | |
tmp.close() | |
try: | |
fig.write_image(path, scale=2) | |
except Exception as e: | |
raise | |
return path | |
def histogram_tool( | |
file_path: str, | |
column: str, | |
output_dir: str = '/tmp', | |
bins: int = 30 | |
) -> Union[Tuple[px.Figure, str], str]: | |
""" | |
Build a histogram for a numeric column, return a Plotly Figure and PNG path, | |
or an error string starting with 'β'. | |
""" | |
# Load data | |
ext = os.path.splitext(file_path)[1].lower() | |
try: | |
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path) | |
except Exception as exc: | |
return f"β Failed to load file: {exc}" | |
# Validate column | |
if column not in df.columns: | |
return f"β Column '{column}' not found." | |
# Coerce to numeric | |
df[column] = pd.to_numeric(df[column], errors='coerce') | |
series = df[column].dropna() | |
if series.empty: | |
return f"β No valid numeric data in '{column}'." | |
# Create figure | |
fig = px.histogram( | |
df, | |
x=column, | |
nbins=bins, | |
title=f"Histogram β {column}", | |
template='plotly_dark' | |
) | |
# Save PNG | |
img_path = _save_fig(fig, f"hist_{column}_", output_dir) | |
return fig, img_path | |
def scatter_matrix_tool( | |
file_path: str, | |
cols: list[str], | |
output_dir: str = '/tmp' | |
) -> Union[Tuple[px.Figure, str], str]: | |
""" | |
Build a scatter-matrix for selected numeric columns, return figure and PNG path, | |
or an error string starting with 'β'. | |
""" | |
# Load data | |
ext = os.path.splitext(file_path)[1].lower() | |
try: | |
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path) | |
except Exception as exc: | |
return f"β Failed to load file: {exc}" | |
# Validate columns | |
missing = [c for c in cols if c not in df.columns] | |
if missing: | |
return f"β Missing columns: {', '.join(missing)}" | |
# Filter numeric | |
df_num = df[cols].apply(pd.to_numeric, errors='coerce').dropna() | |
if df_num.empty: | |
return f"β No valid numeric data in selected columns." | |
# Create figure | |
fig = px.scatter_matrix( | |
df_num, | |
dimensions=cols, | |
title="Scatter-Matrix", | |
template='plotly_dark' | |
) | |
# Save PNG | |
img_path = _save_fig(fig, "scatter_matrix_", output_dir) | |
return fig, img_path | |
def corr_heatmap_tool( | |
file_path: str, | |
output_dir: str = '/tmp', | |
color_continuous_scale: str = 'RdBu' | |
) -> Union[Tuple[px.Figure, str], str]: | |
""" | |
Build a correlation heatmap for numeric columns, return figure and PNG path, | |
or an error string starting with 'β'. | |
""" | |
# Load data | |
ext = os.path.splitext(file_path)[1].lower() | |
try: | |
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path) | |
except Exception as exc: | |
return f"β Failed to load file: {exc}" | |
# Compute correlation | |
df_num = df.select_dtypes(include='number').apply(pd.to_numeric, errors='coerce') | |
if df_num.empty: | |
return "β No numeric columns available for correlation." | |
corr = df_num.corr() | |
# Create figure | |
fig = px.imshow( | |
corr, | |
color_continuous_scale=color_continuous_scale, | |
title="Correlation Heatmap", | |
labels=dict(color="Correlation"), | |
template='plotly_dark' | |
) | |
# Save PNG | |
img_path = _save_fig(fig, "corr_heatmap_", output_dir) | |
return fig, img_path | |