File size: 8,004 Bytes
964d389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce38001
964d389
 
 
 
 
 
 
 
 
 
ce38001
 
 
 
 
 
964d389
ce38001
 
 
 
 
 
 
 
 
964d389
 
 
ce38001
 
 
964d389
ce38001
 
 
964d389
ce38001
964d389
ce38001
 
964d389
ce38001
 
 
 
 
964d389
 
 
 
 
 
 
 
 
 
 
 
 
 
ce38001
964d389
ce38001
 
 
964d389
 
 
 
 
 
 
ce38001
964d389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce38001
964d389
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def ask_gpt4o_for_visualization(query, df, llm):
    columns = ', '.join(df.columns)
    prompt = f"""
    Analyze the query and suggest one or more relevant visualizations.
    Query: "{query}"
    Available Columns: {columns}
    Respond in this JSON format (as a list if multiple suggestions):
    [
      {{
        "chart_type": "bar/box/line/scatter",
        "x_axis": "column_name",
        "y_axis": "column_name",
        "group_by": "optional_column_name"
      }}
    ]
    """
    response = llm.generate(prompt)
    try:
        return json.loads(response)
    except json.JSONDecodeError:
        st.error("⚠️ GPT-4o failed to generate a valid suggestion.")
        return None

def add_stats_to_figure(fig, df, y_axis, chart_type):
    """
    Add relevant statistical annotations to the visualization 
    based on the chart type.
    """
    # Check if the y-axis column is numeric
    if not pd.api.types.is_numeric_dtype(df[y_axis]):
        st.warning(f"⚠️ Cannot compute statistics for non-numeric column: {y_axis}")
        return fig

    # Compute statistics for numeric data
    min_val = df[y_axis].min()
    max_val = df[y_axis].max()
    avg_val = df[y_axis].mean()
    median_val = df[y_axis].median()
    std_dev_val = df[y_axis].std()

    # Format the stats for display
    stats_text = (
        f"πŸ“Š **Statistics**\n\n"
        f"- **Min:** ${min_val:,.2f}\n"
        f"- **Max:** ${max_val:,.2f}\n"
        f"- **Average:** ${avg_val:,.2f}\n"
        f"- **Median:** ${median_val:,.2f}\n"
        f"- **Std Dev:** ${std_dev_val:,.2f}"
    )

    # Apply stats only to relevant chart types
    if chart_type in ["bar", "line"]:
        # Add annotation box for bar and line charts
        fig.add_annotation(
            text=stats_text,
            xref="paper", yref="paper",
            x=1.02, y=1,
            showarrow=False,
            align="left",
            font=dict(size=12, color="black"),
            bordercolor="gray",
            borderwidth=1,
            bgcolor="rgba(255, 255, 255, 0.85)"
        )

        # Add horizontal reference lines
        fig.add_hline(y=min_val, line_dash="dot", line_color="red", annotation_text="Min", annotation_position="bottom right")
        fig.add_hline(y=median_val, line_dash="dash", line_color="orange", annotation_text="Median", annotation_position="top right")
        fig.add_hline(y=avg_val, line_dash="dashdot", line_color="green", annotation_text="Avg", annotation_position="top right")
        fig.add_hline(y=max_val, line_dash="dot", line_color="blue", annotation_text="Max", annotation_position="top right")

    elif chart_type == "scatter":
        # Add stats annotation only, no lines for scatter plots
        fig.add_annotation(
            text=stats_text,
            xref="paper", yref="paper",
            x=1.02, y=1,
            showarrow=False,
            align="left",
            font=dict(size=12, color="black"),
            bordercolor="gray",
            borderwidth=1,
            bgcolor="rgba(255, 255, 255, 0.85)"
        )

    elif chart_type == "box":
        # Box plots inherently show distribution; no extra stats needed
        pass

    elif chart_type == "pie":
        # Pie charts represent proportions, not suitable for stats
        st.info("πŸ“Š Pie charts represent proportions. Additional stats are not applicable.")

    elif chart_type == "heatmap":
        # Heatmaps already reflect data intensity
        st.info("πŸ“Š Heatmaps inherently reflect distribution. No additional stats added.")

    else:
        st.warning(f"⚠️ No statistical overlays applied for unsupported chart type: '{chart_type}'.")

    return fig


# Dynamically generate Plotly visualizations based on GPT-4o suggestions
def generate_visualization(suggestion, df):
    """
    Generate a Plotly visualization based on GPT-4o's suggestion.
    If the Y-axis is missing, infer it intelligently.
    """
    chart_type = suggestion.get("chart_type", "bar").lower()
    x_axis = suggestion.get("x_axis")
    y_axis = suggestion.get("y_axis")
    group_by = suggestion.get("group_by")

    # Step 1: Infer Y-axis if not provided
    if not y_axis:
        numeric_columns = df.select_dtypes(include='number').columns.tolist()

        # Avoid using the same column for both axes
        if x_axis in numeric_columns:
            numeric_columns.remove(x_axis)

        # Smart guess: prioritize salary or relevant metrics if available
        priority_columns = ["salary_in_usd", "income", "earnings", "revenue"]
        for col in priority_columns:
            if col in numeric_columns:
                y_axis = col
                break

        # Fallback to the first numeric column if no priority columns exist
        if not y_axis and numeric_columns:
            y_axis = numeric_columns[0]

    # Step 2: Validate axes
    if not x_axis or not y_axis:
        st.warning("⚠️ Unable to determine appropriate columns for visualization.")
        return None

    #  Step 3: Dynamically select the Plotly function
    plotly_function = getattr(px, chart_type, None)
    if not plotly_function:
        st.warning(f"⚠️ Unsupported chart type '{chart_type}' suggested by GPT-4o.")
        return None

    #  Step 4: Prepare dynamic plot arguments
    plot_args = {"data_frame": df, "x": x_axis, "y": y_axis}
    if group_by and group_by in df.columns:
        plot_args["color"] = group_by

    try:
        # Step 5: Generate the visualization
        fig = plotly_function(**plot_args)
        fig.update_layout(
            title=f"{chart_type.title()} Plot of {y_axis.replace('_', ' ').title()} by {x_axis.replace('_', ' ').title()}",
            xaxis_title=x_axis.replace('_', ' ').title(),
            yaxis_title=y_axis.replace('_', ' ').title(),
        )

        # Step 6: Apply statistics intelligently
        fig = add_statistics_to_visualization(fig, df, y_axis, chart_type)

        return fig

    except Exception as e:
        st.error(f"⚠️ Failed to generate visualization: {e}")
        return None


def generate_multiple_visualizations(suggestions, df):
    """
    Generates one or more visualizations based on GPT-4o's suggestions.
    Handles both single and multiple suggestions.
    """
    visualizations = []

    for suggestion in suggestions:
        fig = generate_visualization(suggestion, df)
        if fig:
            # Apply chart-specific statistics
            fig = add_stats_to_figure(fig, df, suggestion["y_axis"], suggestion["chart_type"])
            visualizations.append(fig)

    if not visualizations and suggestions:
        st.warning("⚠️ No valid visualization found. Displaying the most relevant one.")
        best_suggestion = suggestions[0]
        fig = generate_visualization(best_suggestion, df)
        fig = add_stats_to_figure(fig, df, best_suggestion["y_axis"], best_suggestion["chart_type"])
        visualizations.append(fig)

    return visualizations


def handle_visualization_suggestions(suggestions, df):
    """
    Determines whether to generate a single or multiple visualizations.
    """
    visualizations = []

    # If multiple suggestions, generate multiple plots
    if isinstance(suggestions, list) and len(suggestions) > 1:
        visualizations = generate_multiple_visualizations(suggestions, df)
    
    # If only one suggestion, generate a single plot
    elif isinstance(suggestions, dict) or (isinstance(suggestions, list) and len(suggestions) == 1):
        suggestion = suggestions[0] if isinstance(suggestions, list) else suggestions
        fig = generate_visualization(suggestion, df)
        if fig:
            visualizations.append(fig)
    
    # Handle cases when no visualization could be generated
    if not visualizations:
        st.warning("⚠️ Unable to generate any visualization based on the suggestion.")

    # Display all generated visualizations
    for fig in visualizations:
        st.plotly_chart(fig, use_container_width=True)