File size: 3,862 Bytes
dc5ae18
 
bf400de
 
dc5ae18
bf400de
dc5ae18
 
 
 
 
 
 
 
 
 
 
 
 
bf400de
 
dc5ae18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf400de
 
dc5ae18
 
 
 
bf400de
dc5ae18
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import tempfile
import pandas as pd
import plotly.express as px
from typing import Union, Tuple

def _save_fig(fig, prefix: str, output_dir: str) -> str:
    """
    Save a Plotly figure as a PNG to a temp file and return its path.
    """
    os.makedirs(output_dir, exist_ok=True)
    tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix=prefix, dir=output_dir, delete=False)
    path = tmp.name
    tmp.close()
    try:
        fig.write_image(path, scale=2)
    except Exception as e:
        raise
    return path


def histogram_tool(
    file_path: str,
    column: str,
    output_dir: str = '/tmp',
    bins: int = 30
) -> Union[Tuple[px.Figure, str], str]:
    """
    Build a histogram for a numeric column, return a Plotly Figure and PNG path,
    or an error string starting with '❌'.
    """
    # Load data
    ext = os.path.splitext(file_path)[1].lower()
    try:
        df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
    except Exception as exc:
        return f"❌ Failed to load file: {exc}"

    # Validate column
    if column not in df.columns:
        return f"❌ Column '{column}' not found."

    # Coerce to numeric
    df[column] = pd.to_numeric(df[column], errors='coerce')
    series = df[column].dropna()
    if series.empty:
        return f"❌ No valid numeric data in '{column}'."

    # Create figure
    fig = px.histogram(
        df,
        x=column,
        nbins=bins,
        title=f"Histogram – {column}",
        template='plotly_dark'
    )
    # Save PNG
    img_path = _save_fig(fig, f"hist_{column}_", output_dir)
    return fig, img_path


def scatter_matrix_tool(
    file_path: str,
    cols: list[str],
    output_dir: str = '/tmp'
) -> Union[Tuple[px.Figure, str], str]:
    """
    Build a scatter-matrix for selected numeric columns, return figure and PNG path,
    or an error string starting with '❌'.
    """
    # Load data
    ext = os.path.splitext(file_path)[1].lower()
    try:
        df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
    except Exception as exc:
        return f"❌ Failed to load file: {exc}"

    # Validate columns
    missing = [c for c in cols if c not in df.columns]
    if missing:
        return f"❌ Missing columns: {', '.join(missing)}"

    # Filter numeric
    df_num = df[cols].apply(pd.to_numeric, errors='coerce').dropna()
    if df_num.empty:
        return f"❌ No valid numeric data in selected columns."

    # Create figure
    fig = px.scatter_matrix(
        df_num,
        dimensions=cols,
        title="Scatter-Matrix",
        template='plotly_dark'
    )
    # Save PNG
    img_path = _save_fig(fig, "scatter_matrix_", output_dir)
    return fig, img_path


def corr_heatmap_tool(
    file_path: str,
    output_dir: str = '/tmp',
    color_continuous_scale: str = 'RdBu'
) -> Union[Tuple[px.Figure, str], str]:
    """
    Build a correlation heatmap for numeric columns, return figure and PNG path,
    or an error string starting with '❌'.
    """
    # Load data
    ext = os.path.splitext(file_path)[1].lower()
    try:
        df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
    except Exception as exc:
        return f"❌ Failed to load file: {exc}"

    # Compute correlation
    df_num = df.select_dtypes(include='number').apply(pd.to_numeric, errors='coerce')
    if df_num.empty:
        return "❌ No numeric columns available for correlation."
    corr = df_num.corr()

    # Create figure
    fig = px.imshow(
        corr,
        color_continuous_scale=color_continuous_scale,
        title="Correlation Heatmap",
        labels=dict(color="Correlation"),
        template='plotly_dark'
    )
    # Save PNG
    img_path = _save_fig(fig, "corr_heatmap_", output_dir)
    return fig, img_path