Spaces:
Sleeping
Sleeping
Update tools/visuals.py
Browse files- tools/visuals.py +124 -17
tools/visuals.py
CHANGED
@@ -1,25 +1,132 @@
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import plotly.express as px
|
|
|
3 |
|
4 |
-
def
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
def scatter_matrix_tool(file_path: str, cols: list[str]):
|
10 |
-
df = pd.read_csv(file_path)
|
11 |
-
fig = px.scatter_matrix(df[cols], title="Scatter‑Matrix", template="plotly_dark")
|
12 |
-
return fig
|
13 |
|
14 |
-
def
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
fig = px.imshow(
|
18 |
corr,
|
19 |
-
color_continuous_scale=
|
20 |
-
title="Correlation
|
21 |
-
|
22 |
-
|
23 |
-
template="plotly_dark",
|
24 |
)
|
25 |
-
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tempfile
|
3 |
import pandas as pd
|
4 |
import plotly.express as px
|
5 |
+
from typing import Union, Tuple
|
6 |
|
7 |
+
def _save_fig(fig, prefix: str, output_dir: str) -> str:
|
8 |
+
"""
|
9 |
+
Save a Plotly figure as a PNG to a temp file and return its path.
|
10 |
+
"""
|
11 |
+
os.makedirs(output_dir, exist_ok=True)
|
12 |
+
tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix=prefix, dir=output_dir, delete=False)
|
13 |
+
path = tmp.name
|
14 |
+
tmp.close()
|
15 |
+
try:
|
16 |
+
fig.write_image(path, scale=2)
|
17 |
+
except Exception as e:
|
18 |
+
raise
|
19 |
+
return path
|
20 |
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
def histogram_tool(
|
23 |
+
file_path: str,
|
24 |
+
column: str,
|
25 |
+
output_dir: str = '/tmp',
|
26 |
+
bins: int = 30
|
27 |
+
) -> Union[Tuple[px.Figure, str], str]:
|
28 |
+
"""
|
29 |
+
Build a histogram for a numeric column, return a Plotly Figure and PNG path,
|
30 |
+
or an error string starting with '❌'.
|
31 |
+
"""
|
32 |
+
# Load data
|
33 |
+
ext = os.path.splitext(file_path)[1].lower()
|
34 |
+
try:
|
35 |
+
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
|
36 |
+
except Exception as exc:
|
37 |
+
return f"❌ Failed to load file: {exc}"
|
38 |
+
|
39 |
+
# Validate column
|
40 |
+
if column not in df.columns:
|
41 |
+
return f"❌ Column '{column}' not found."
|
42 |
+
|
43 |
+
# Coerce to numeric
|
44 |
+
df[column] = pd.to_numeric(df[column], errors='coerce')
|
45 |
+
series = df[column].dropna()
|
46 |
+
if series.empty:
|
47 |
+
return f"❌ No valid numeric data in '{column}'."
|
48 |
+
|
49 |
+
# Create figure
|
50 |
+
fig = px.histogram(
|
51 |
+
df,
|
52 |
+
x=column,
|
53 |
+
nbins=bins,
|
54 |
+
title=f"Histogram – {column}",
|
55 |
+
template='plotly_dark'
|
56 |
+
)
|
57 |
+
# Save PNG
|
58 |
+
img_path = _save_fig(fig, f"hist_{column}_", output_dir)
|
59 |
+
return fig, img_path
|
60 |
+
|
61 |
+
|
62 |
+
def scatter_matrix_tool(
|
63 |
+
file_path: str,
|
64 |
+
cols: list[str],
|
65 |
+
output_dir: str = '/tmp'
|
66 |
+
) -> Union[Tuple[px.Figure, str], str]:
|
67 |
+
"""
|
68 |
+
Build a scatter-matrix for selected numeric columns, return figure and PNG path,
|
69 |
+
or an error string starting with '❌'.
|
70 |
+
"""
|
71 |
+
# Load data
|
72 |
+
ext = os.path.splitext(file_path)[1].lower()
|
73 |
+
try:
|
74 |
+
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
|
75 |
+
except Exception as exc:
|
76 |
+
return f"❌ Failed to load file: {exc}"
|
77 |
+
|
78 |
+
# Validate columns
|
79 |
+
missing = [c for c in cols if c not in df.columns]
|
80 |
+
if missing:
|
81 |
+
return f"❌ Missing columns: {', '.join(missing)}"
|
82 |
+
|
83 |
+
# Filter numeric
|
84 |
+
df_num = df[cols].apply(pd.to_numeric, errors='coerce').dropna()
|
85 |
+
if df_num.empty:
|
86 |
+
return f"❌ No valid numeric data in selected columns."
|
87 |
+
|
88 |
+
# Create figure
|
89 |
+
fig = px.scatter_matrix(
|
90 |
+
df_num,
|
91 |
+
dimensions=cols,
|
92 |
+
title="Scatter-Matrix",
|
93 |
+
template='plotly_dark'
|
94 |
+
)
|
95 |
+
# Save PNG
|
96 |
+
img_path = _save_fig(fig, "scatter_matrix_", output_dir)
|
97 |
+
return fig, img_path
|
98 |
+
|
99 |
+
|
100 |
+
def corr_heatmap_tool(
|
101 |
+
file_path: str,
|
102 |
+
output_dir: str = '/tmp',
|
103 |
+
color_continuous_scale: str = 'RdBu'
|
104 |
+
) -> Union[Tuple[px.Figure, str], str]:
|
105 |
+
"""
|
106 |
+
Build a correlation heatmap for numeric columns, return figure and PNG path,
|
107 |
+
or an error string starting with '❌'.
|
108 |
+
"""
|
109 |
+
# Load data
|
110 |
+
ext = os.path.splitext(file_path)[1].lower()
|
111 |
+
try:
|
112 |
+
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
|
113 |
+
except Exception as exc:
|
114 |
+
return f"❌ Failed to load file: {exc}"
|
115 |
+
|
116 |
+
# Compute correlation
|
117 |
+
df_num = df.select_dtypes(include='number').apply(pd.to_numeric, errors='coerce')
|
118 |
+
if df_num.empty:
|
119 |
+
return "❌ No numeric columns available for correlation."
|
120 |
+
corr = df_num.corr()
|
121 |
+
|
122 |
+
# Create figure
|
123 |
fig = px.imshow(
|
124 |
corr,
|
125 |
+
color_continuous_scale=color_continuous_scale,
|
126 |
+
title="Correlation Heatmap",
|
127 |
+
labels=dict(color="Correlation"),
|
128 |
+
template='plotly_dark'
|
|
|
129 |
)
|
130 |
+
# Save PNG
|
131 |
+
img_path = _save_fig(fig, "corr_heatmap_", output_dir)
|
132 |
+
return fig, img_path
|