Spaces:
Sleeping
Sleeping
File size: 3,862 Bytes
dc5ae18 bf400de dc5ae18 bf400de dc5ae18 bf400de dc5ae18 bf400de dc5ae18 bf400de dc5ae18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
import tempfile
import pandas as pd
import plotly.express as px
from typing import Union, Tuple
def _save_fig(fig, prefix: str, output_dir: str) -> str:
"""
Save a Plotly figure as a PNG to a temp file and return its path.
"""
os.makedirs(output_dir, exist_ok=True)
tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix=prefix, dir=output_dir, delete=False)
path = tmp.name
tmp.close()
try:
fig.write_image(path, scale=2)
except Exception as e:
raise
return path
def histogram_tool(
file_path: str,
column: str,
output_dir: str = '/tmp',
bins: int = 30
) -> Union[Tuple[px.Figure, str], str]:
"""
Build a histogram for a numeric column, return a Plotly Figure and PNG path,
or an error string starting with 'β'.
"""
# Load data
ext = os.path.splitext(file_path)[1].lower()
try:
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
except Exception as exc:
return f"β Failed to load file: {exc}"
# Validate column
if column not in df.columns:
return f"β Column '{column}' not found."
# Coerce to numeric
df[column] = pd.to_numeric(df[column], errors='coerce')
series = df[column].dropna()
if series.empty:
return f"β No valid numeric data in '{column}'."
# Create figure
fig = px.histogram(
df,
x=column,
nbins=bins,
title=f"Histogram β {column}",
template='plotly_dark'
)
# Save PNG
img_path = _save_fig(fig, f"hist_{column}_", output_dir)
return fig, img_path
def scatter_matrix_tool(
file_path: str,
cols: list[str],
output_dir: str = '/tmp'
) -> Union[Tuple[px.Figure, str], str]:
"""
Build a scatter-matrix for selected numeric columns, return figure and PNG path,
or an error string starting with 'β'.
"""
# Load data
ext = os.path.splitext(file_path)[1].lower()
try:
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
except Exception as exc:
return f"β Failed to load file: {exc}"
# Validate columns
missing = [c for c in cols if c not in df.columns]
if missing:
return f"β Missing columns: {', '.join(missing)}"
# Filter numeric
df_num = df[cols].apply(pd.to_numeric, errors='coerce').dropna()
if df_num.empty:
return f"β No valid numeric data in selected columns."
# Create figure
fig = px.scatter_matrix(
df_num,
dimensions=cols,
title="Scatter-Matrix",
template='plotly_dark'
)
# Save PNG
img_path = _save_fig(fig, "scatter_matrix_", output_dir)
return fig, img_path
def corr_heatmap_tool(
file_path: str,
output_dir: str = '/tmp',
color_continuous_scale: str = 'RdBu'
) -> Union[Tuple[px.Figure, str], str]:
"""
Build a correlation heatmap for numeric columns, return figure and PNG path,
or an error string starting with 'β'.
"""
# Load data
ext = os.path.splitext(file_path)[1].lower()
try:
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
except Exception as exc:
return f"β Failed to load file: {exc}"
# Compute correlation
df_num = df.select_dtypes(include='number').apply(pd.to_numeric, errors='coerce')
if df_num.empty:
return "β No numeric columns available for correlation."
corr = df_num.corr()
# Create figure
fig = px.imshow(
corr,
color_continuous_scale=color_continuous_scale,
title="Correlation Heatmap",
labels=dict(color="Correlation"),
template='plotly_dark'
)
# Save PNG
img_path = _save_fig(fig, "corr_heatmap_", output_dir)
return fig, img_path
|