BizIntel_AI / tools /visuals.py
mgbam's picture
Update tools/visuals.py
9538f35 verified
raw
history blame
5.47 kB
import os
import tempfile
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
from scipy.cluster.hierarchy import linkage, leaves_list
from typing import Union, Tuple, List
def _save_fig(fig: go.Figure, prefix: str, output_dir: str) -> str:
"""
Save a Plotly figure as a high-res PNG and return the file path.
"""
os.makedirs(output_dir, exist_ok=True)
tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix=prefix, dir=output_dir, delete=False)
path = tmp.name
tmp.close()
fig.write_image(path, scale=3)
return path
def histogram_tool(
file_path: str,
column: str,
bins: int = 30,
kde: bool = True,
output_dir: str = '/tmp'
) -> Union[Tuple[ff.FigureFactory, str], str]:
"""
Create a histogram with optional KDE overlay for a given numeric column.
Returns (figure, png_path) or error string.
"""
# Load
ext = os.path.splitext(file_path)[1].lower()
df = pd.read_excel(file_path) if ext in ('.xls','.xlsx') else pd.read_csv(file_path)
# Validate
if column not in df.columns:
return f"❌ Column '{column}' not found."
series = pd.to_numeric(df[column], errors='coerce').dropna()
if series.empty:
return f"❌ No numeric data in '{column}'."
# Build histogram + KDE
if kde:
fig = ff.create_distplot([series], [column], bin_size=(series.max()-series.min())/bins)
else:
fig = px.histogram(series, nbins=bins, title=f"Histogram – {column}", template='plotly_dark')
fig.update_layout(template='plotly_dark')
# Save
img_path = _save_fig(fig, f"hist_{column}_", output_dir)
return fig, img_path
def boxplot_tool(
file_path: str,
column: str,
output_dir: str = '/tmp'
) -> Union[Tuple[px.Figure, str], str]:
"""
Create a box plot with outliers for a numeric column.
Returns (figure, png_path) or error string.
"""
ext = os.path.splitext(file_path)[1].lower()
df = pd.read_excel(file_path) if ext in ('.xls','.xlsx') else pd.read_csv(file_path)
if column not in df.columns:
return f"❌ Column '{column}' not found."
series = pd.to_numeric(df[column], errors='coerce').dropna()
if series.empty:
return f"❌ No numeric data in '{column}'."
fig = px.box(series, points='outliers', title=f"Boxplot – {column}", template='plotly_dark')
img_path = _save_fig(fig, f"box_{column}_", output_dir)
return fig, img_path
def violin_tool(
file_path: str,
column: str,
output_dir: str = '/tmp'
) -> Union[Tuple[px.Figure, str], str]:
"""
Create a violin plot with inner box for a numeric column.
Returns (figure, png_path) or error string.
"""
ext = os.path.splitext(file_path)[1].lower()
df = pd.read_excel(file_path) if ext in ('.xls','.xlsx') else pd.read_csv(file_path)
if column not in df.columns:
return f"❌ Column '{column}' not found."
series = pd.to_numeric(df[column], errors='coerce').dropna()
if series.empty:
return f"❌ No numeric data in '{column}'."
fig = px.violin(series, box=True, points='all', title=f"Violin – {column}", template='plotly_dark')
img_path = _save_fig(fig, f"violin_{column}_", output_dir)
return fig, img_path
def scatter_matrix_tool(
file_path: str,
columns: List[str],
output_dir: str = '/tmp',
size: int = 5
) -> Union[Tuple[px.Figure, str], str]:
"""
Create an interactive scatter matrix for selected numeric columns.
Returns (figure, png_path) or error string.
"""
ext = os.path.splitext(file_path)[1].lower()
df = pd.read_excel(file_path) if ext in ('.xls','.xlsx') else pd.read_csv(file_path)
missing = [c for c in columns if c not in df.columns]
if missing:
return f"❌ Missing columns: {', '.join(missing)}"
df_num = df[columns].apply(pd.to_numeric, errors='coerce').dropna()
if df_num.empty:
return "❌ No valid numeric data."
fig = px.scatter_matrix(df_num, dimensions=columns, title="Scatter Matrix", template='plotly_dark')
fig.update_traces(diagonal_visible=False, marker={'size': size})
img_path = _save_fig(fig, "scatter_matrix_", output_dir)
return fig, img_path
def corr_heatmap_tool(
file_path: str,
columns: List[str] = None,
output_dir: str = '/tmp',
cluster: bool = True
) -> Union[Tuple[px.Figure, str], str]:
"""
Create a correlation heatmap, with optional hierarchical clustering of variables.
Returns (figure, png_path) or error string.
"""
ext = os.path.splitext(file_path)[1].lower()
df = pd.read_excel(file_path) if ext in ('.xls','.xlsx') else pd.read_csv(file_path)
df_num = df.select_dtypes(include='number') if columns is None else df[columns]
df_num = df_num.apply(pd.to_numeric, errors='coerce').dropna(axis=1, how='all')
if df_num.shape[1] < 2:
return "❌ Need at least two numeric columns for correlation."
corr = df_num.corr()
if cluster:
link = linkage(corr, method='average')
order = leaves_list(link)
corr = corr.iloc[order, order]
fig = px.imshow(
corr,
color_continuous_scale='RdBu',
title="Correlation Heatmap",
labels=dict(color="Correlation"),
template='plotly_dark'
)
img_path = _save_fig(fig, "corr_heatmap_", output_dir)
return fig, img_path