Update app.py
Browse files
app.py
CHANGED
@@ -7,8 +7,6 @@ import matplotlib.pyplot as plt
|
|
7 |
import seaborn as sns
|
8 |
import os
|
9 |
from groq import Groq
|
10 |
-
from dataclasses import dataclass
|
11 |
-
import tempfile
|
12 |
import base64
|
13 |
import io
|
14 |
|
@@ -39,7 +37,11 @@ class GroqLLM:
|
|
39 |
stream=False
|
40 |
)
|
41 |
|
42 |
-
|
|
|
|
|
|
|
|
|
43 |
|
44 |
except Exception as e:
|
45 |
error_msg = f"Error generating response: {str(e)}"
|
@@ -76,21 +78,7 @@ class DataAnalysisAgent(CodeAgent):
|
|
76 |
|
77 |
@tool
|
78 |
def analyze_basic_stats(data: pd.DataFrame) -> str:
|
79 |
-
"""Calculate basic statistical measures for numerical columns in the dataset.
|
80 |
-
|
81 |
-
This function computes fundamental statistical metrics including mean, median,
|
82 |
-
standard deviation, skewness, and counts of missing values for all numerical
|
83 |
-
columns in the provided DataFrame.
|
84 |
-
|
85 |
-
Args:
|
86 |
-
data: A pandas DataFrame containing the dataset to analyze. The DataFrame
|
87 |
-
should contain at least one numerical column for meaningful analysis.
|
88 |
-
|
89 |
-
Returns:
|
90 |
-
str: A string containing formatted basic statistics for each numerical column,
|
91 |
-
including mean, median, standard deviation, skewness, and missing value counts.
|
92 |
-
"""
|
93 |
-
# Access dataset from agent if no data provided
|
94 |
if data is None:
|
95 |
data = tool.agent.dataset
|
96 |
|
@@ -110,21 +98,7 @@ def analyze_basic_stats(data: pd.DataFrame) -> str:
|
|
110 |
|
111 |
@tool
|
112 |
def generate_correlation_matrix(data: pd.DataFrame) -> str:
|
113 |
-
"""Generate a visual correlation matrix for numerical columns in the dataset.
|
114 |
-
|
115 |
-
This function creates a heatmap visualization showing the correlations between
|
116 |
-
all numerical columns in the dataset. The correlation values are displayed
|
117 |
-
using a color-coded matrix for easy interpretation.
|
118 |
-
|
119 |
-
Args:
|
120 |
-
data: A pandas DataFrame containing the dataset to analyze. The DataFrame
|
121 |
-
should contain at least two numerical columns for correlation analysis.
|
122 |
-
|
123 |
-
Returns:
|
124 |
-
str: A base64 encoded string representing the correlation matrix plot image,
|
125 |
-
which can be displayed in a web interface or saved as an image file.
|
126 |
-
"""
|
127 |
-
# Access dataset from agent if no data provided
|
128 |
if data is None:
|
129 |
data = tool.agent.dataset
|
130 |
|
@@ -141,20 +115,7 @@ def generate_correlation_matrix(data: pd.DataFrame) -> str:
|
|
141 |
|
142 |
@tool
|
143 |
def analyze_categorical_columns(data: pd.DataFrame) -> str:
|
144 |
-
"""Analyze categorical columns in the dataset for distribution and frequencies.
|
145 |
-
|
146 |
-
This function examines categorical columns to identify unique values, top categories,
|
147 |
-
and missing value counts, providing insights into the categorical data distribution.
|
148 |
-
|
149 |
-
Args:
|
150 |
-
data: A pandas DataFrame containing the dataset to analyze. The DataFrame
|
151 |
-
should contain at least one categorical column for meaningful analysis.
|
152 |
-
|
153 |
-
Returns:
|
154 |
-
str: A string containing formatted analysis results for each categorical column,
|
155 |
-
including unique value counts, top categories, and missing value counts.
|
156 |
-
"""
|
157 |
-
# Access dataset from agent if no data provided
|
158 |
if data is None:
|
159 |
data = tool.agent.dataset
|
160 |
|
@@ -172,20 +133,7 @@ def analyze_categorical_columns(data: pd.DataFrame) -> str:
|
|
172 |
|
173 |
@tool
|
174 |
def suggest_features(data: pd.DataFrame) -> str:
|
175 |
-
"""Suggest potential feature engineering steps based on data characteristics.
|
176 |
-
|
177 |
-
This function analyzes the dataset's structure and statistical properties to
|
178 |
-
recommend possible feature engineering steps that could improve model performance.
|
179 |
-
|
180 |
-
Args:
|
181 |
-
data: A pandas DataFrame containing the dataset to analyze. The DataFrame
|
182 |
-
can contain both numerical and categorical columns.
|
183 |
-
|
184 |
-
Returns:
|
185 |
-
str: A string containing suggestions for feature engineering based on
|
186 |
-
the characteristics of the input data.
|
187 |
-
"""
|
188 |
-
# Access dataset from agent if no data provided
|
189 |
if data is None:
|
190 |
data = tool.agent.dataset
|
191 |
|
@@ -215,7 +163,8 @@ def main():
|
|
215 |
if 'agent' not in st.session_state:
|
216 |
st.session_state['agent'] = None
|
217 |
|
218 |
-
|
|
|
219 |
|
220 |
try:
|
221 |
if uploaded_file is not None:
|
|
|
7 |
import seaborn as sns
|
8 |
import os
|
9 |
from groq import Groq
|
|
|
|
|
10 |
import base64
|
11 |
import io
|
12 |
|
|
|
37 |
stream=False
|
38 |
)
|
39 |
|
40 |
+
# Ensure the response is properly formatted
|
41 |
+
if completion.choices and hasattr(completion.choices[0].message, 'content'):
|
42 |
+
return completion.choices[0].message.content
|
43 |
+
else:
|
44 |
+
return "Error: No valid response generated from the model."
|
45 |
|
46 |
except Exception as e:
|
47 |
error_msg = f"Error generating response: {str(e)}"
|
|
|
78 |
|
79 |
@tool
|
80 |
def analyze_basic_stats(data: pd.DataFrame) -> str:
|
81 |
+
"""Calculate basic statistical measures for numerical columns in the dataset."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
if data is None:
|
83 |
data = tool.agent.dataset
|
84 |
|
|
|
98 |
|
99 |
@tool
|
100 |
def generate_correlation_matrix(data: pd.DataFrame) -> str:
|
101 |
+
"""Generate a visual correlation matrix for numerical columns in the dataset."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
if data is None:
|
103 |
data = tool.agent.dataset
|
104 |
|
|
|
115 |
|
116 |
@tool
|
117 |
def analyze_categorical_columns(data: pd.DataFrame) -> str:
|
118 |
+
"""Analyze categorical columns in the dataset for distribution and frequencies."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
if data is None:
|
120 |
data = tool.agent.dataset
|
121 |
|
|
|
133 |
|
134 |
@tool
|
135 |
def suggest_features(data: pd.DataFrame) -> str:
|
136 |
+
"""Suggest potential feature engineering steps based on data characteristics."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
if data is None:
|
138 |
data = tool.agent.dataset
|
139 |
|
|
|
163 |
if 'agent' not in st.session_state:
|
164 |
st.session_state['agent'] = None
|
165 |
|
166 |
+
# Drag-and-drop file upload
|
167 |
+
uploaded_file = st.file_uploader("Drag and drop a CSV file here", type="csv")
|
168 |
|
169 |
try:
|
170 |
if uploaded_file is not None:
|