mgbam commited on
Commit
16f65e2
·
verified ·
1 Parent(s): 28e2398

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -62
app.py CHANGED
@@ -7,8 +7,6 @@ import matplotlib.pyplot as plt
7
  import seaborn as sns
8
  import os
9
  from groq import Groq
10
- from dataclasses import dataclass
11
- import tempfile
12
  import base64
13
  import io
14
 
@@ -39,7 +37,11 @@ class GroqLLM:
39
  stream=False
40
  )
41
 
42
- return completion.choices[0].message.content if completion.choices else "Error: No response generated"
 
 
 
 
43
 
44
  except Exception as e:
45
  error_msg = f"Error generating response: {str(e)}"
@@ -76,21 +78,7 @@ class DataAnalysisAgent(CodeAgent):
76
 
77
  @tool
78
  def analyze_basic_stats(data: pd.DataFrame) -> str:
79
- """Calculate basic statistical measures for numerical columns in the dataset.
80
-
81
- This function computes fundamental statistical metrics including mean, median,
82
- standard deviation, skewness, and counts of missing values for all numerical
83
- columns in the provided DataFrame.
84
-
85
- Args:
86
- data: A pandas DataFrame containing the dataset to analyze. The DataFrame
87
- should contain at least one numerical column for meaningful analysis.
88
-
89
- Returns:
90
- str: A string containing formatted basic statistics for each numerical column,
91
- including mean, median, standard deviation, skewness, and missing value counts.
92
- """
93
- # Access dataset from agent if no data provided
94
  if data is None:
95
  data = tool.agent.dataset
96
 
@@ -110,21 +98,7 @@ def analyze_basic_stats(data: pd.DataFrame) -> str:
110
 
111
  @tool
112
  def generate_correlation_matrix(data: pd.DataFrame) -> str:
113
- """Generate a visual correlation matrix for numerical columns in the dataset.
114
-
115
- This function creates a heatmap visualization showing the correlations between
116
- all numerical columns in the dataset. The correlation values are displayed
117
- using a color-coded matrix for easy interpretation.
118
-
119
- Args:
120
- data: A pandas DataFrame containing the dataset to analyze. The DataFrame
121
- should contain at least two numerical columns for correlation analysis.
122
-
123
- Returns:
124
- str: A base64 encoded string representing the correlation matrix plot image,
125
- which can be displayed in a web interface or saved as an image file.
126
- """
127
- # Access dataset from agent if no data provided
128
  if data is None:
129
  data = tool.agent.dataset
130
 
@@ -141,20 +115,7 @@ def generate_correlation_matrix(data: pd.DataFrame) -> str:
141
 
142
  @tool
143
  def analyze_categorical_columns(data: pd.DataFrame) -> str:
144
- """Analyze categorical columns in the dataset for distribution and frequencies.
145
-
146
- This function examines categorical columns to identify unique values, top categories,
147
- and missing value counts, providing insights into the categorical data distribution.
148
-
149
- Args:
150
- data: A pandas DataFrame containing the dataset to analyze. The DataFrame
151
- should contain at least one categorical column for meaningful analysis.
152
-
153
- Returns:
154
- str: A string containing formatted analysis results for each categorical column,
155
- including unique value counts, top categories, and missing value counts.
156
- """
157
- # Access dataset from agent if no data provided
158
  if data is None:
159
  data = tool.agent.dataset
160
 
@@ -172,20 +133,7 @@ def analyze_categorical_columns(data: pd.DataFrame) -> str:
172
 
173
  @tool
174
  def suggest_features(data: pd.DataFrame) -> str:
175
- """Suggest potential feature engineering steps based on data characteristics.
176
-
177
- This function analyzes the dataset's structure and statistical properties to
178
- recommend possible feature engineering steps that could improve model performance.
179
-
180
- Args:
181
- data: A pandas DataFrame containing the dataset to analyze. The DataFrame
182
- can contain both numerical and categorical columns.
183
-
184
- Returns:
185
- str: A string containing suggestions for feature engineering based on
186
- the characteristics of the input data.
187
- """
188
- # Access dataset from agent if no data provided
189
  if data is None:
190
  data = tool.agent.dataset
191
 
@@ -215,7 +163,8 @@ def main():
215
  if 'agent' not in st.session_state:
216
  st.session_state['agent'] = None
217
 
218
- uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
 
219
 
220
  try:
221
  if uploaded_file is not None:
 
7
  import seaborn as sns
8
  import os
9
  from groq import Groq
 
 
10
  import base64
11
  import io
12
 
 
37
  stream=False
38
  )
39
 
40
+ # Ensure the response is properly formatted
41
+ if completion.choices and hasattr(completion.choices[0].message, 'content'):
42
+ return completion.choices[0].message.content
43
+ else:
44
+ return "Error: No valid response generated from the model."
45
 
46
  except Exception as e:
47
  error_msg = f"Error generating response: {str(e)}"
 
78
 
79
  @tool
80
  def analyze_basic_stats(data: pd.DataFrame) -> str:
81
+ """Calculate basic statistical measures for numerical columns in the dataset."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  if data is None:
83
  data = tool.agent.dataset
84
 
 
98
 
99
  @tool
100
  def generate_correlation_matrix(data: pd.DataFrame) -> str:
101
+ """Generate a visual correlation matrix for numerical columns in the dataset."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  if data is None:
103
  data = tool.agent.dataset
104
 
 
115
 
116
  @tool
117
  def analyze_categorical_columns(data: pd.DataFrame) -> str:
118
+ """Analyze categorical columns in the dataset for distribution and frequencies."""
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  if data is None:
120
  data = tool.agent.dataset
121
 
 
133
 
134
  @tool
135
  def suggest_features(data: pd.DataFrame) -> str:
136
+ """Suggest potential feature engineering steps based on data characteristics."""
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  if data is None:
138
  data = tool.agent.dataset
139
 
 
163
  if 'agent' not in st.session_state:
164
  st.session_state['agent'] = None
165
 
166
+ # Drag-and-drop file upload
167
+ uploaded_file = st.file_uploader("Drag and drop a CSV file here", type="csv")
168
 
169
  try:
170
  if uploaded_file is not None: