Ari commited on
Commit
ec5af14
·
verified ·
1 Parent(s): a3c9c61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -36
app.py CHANGED
@@ -20,7 +20,7 @@ if not openai_api_key:
20
  st.stop()
21
 
22
  # Step 1: Upload CSV data file (or use default)
23
- st.title("Natural Language to SQL Query App with Dynamic Insights")
24
  st.write("Upload a CSV file to get started, or use the default dataset.")
25
 
26
  csv_file = st.file_uploader("Upload your CSV file", type=["csv"])
@@ -71,27 +71,29 @@ sql_generation_chain = LLMChain(llm=llm, prompt=sql_prompt)
71
 
72
  # Insights Generation Chain
73
  insights_template = """
74
- You are an expert data scientist. Based on the SQL query result provided below, generate a concise and informative analysis that includes specific data-driven insights.
 
 
75
 
76
  SQL Query Result:
77
  {result}
78
 
79
- Analysis:
80
  """
81
- insights_prompt = PromptTemplate(template=insights_template, input_variables=['result'])
82
  insights_chain = LLMChain(llm=llm, prompt=insights_prompt)
83
 
84
- # Recommendations Generation Chain
85
- recommendations_template = """
86
- You are an expert data scientist. Based on the SQL query result provided below, generate actionable recommendations for improving performance.
87
 
88
- SQL Query Result:
89
- {result}
90
 
91
- Recommendations:
92
  """
93
- recommendations_prompt = PromptTemplate(template=recommendations_template, input_variables=['result'])
94
- recommendations_chain = LLMChain(llm=llm, prompt=recommendations_prompt)
95
 
96
  # Optional: Clean up function to remove incorrect COLLATE NOCASE usage
97
  def clean_sql_query(query):
@@ -119,7 +121,7 @@ def clean_sql_query(query):
119
  def classify_query(question):
120
  """Classify the user query as either 'SQL' or 'INSIGHTS'."""
121
  classification_template = """
122
- You are an AI assistant that classifies user queries into two categories: 'SQL' for specific data retrieval queries and 'INSIGHTS' for general analytical queries.
123
 
124
  Determine the appropriate category for the following user question.
125
 
@@ -135,6 +137,22 @@ def classify_query(question):
135
  else:
136
  return 'INSIGHTS'
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  # Define the callback function
139
  def process_input():
140
  user_prompt = st.session_state['user_input']
@@ -160,8 +178,19 @@ def process_input():
160
  }).strip()
161
 
162
  if generated_sql.upper() == "NO_SQL":
163
- assistant_response = "This query is too vague for generating SQL. Please ask a more specific question."
164
- st.session_state.history.append({"role": "assistant", "content": assistant_response})
 
 
 
 
 
 
 
 
 
 
 
165
  else:
166
  # Clean the SQL query
167
  cleaned_sql = clean_sql_query(generated_sql)
@@ -178,14 +207,14 @@ def process_input():
178
  # Convert the result to a string for the insights prompt
179
  result_str = result.head(10).to_string(index=False) # Limit to first 10 rows
180
 
181
- # Generate insights based on the query result
182
  insights = insights_chain.run({
 
183
  'result': result_str
184
  })
185
 
186
- # Display insights in a scrollable text area
187
- st.text_area("Insights", value=insights, height=300)
188
-
189
  # Append the result DataFrame to the history
190
  st.session_state.history.append({"role": "assistant", "content": result})
191
  except Exception as e:
@@ -193,25 +222,16 @@ def process_input():
193
  assistant_response = f"Error executing SQL query: {e}"
194
  st.session_state.history.append({"role": "assistant", "content": assistant_response})
195
  else: # INSIGHTS category
196
- if "recommendations" in user_prompt.lower():
197
- # Generate recommendations based on the query result
198
- dataset_summary = data.describe().to_string() # Summary for recommendations
199
- recommendations = recommendations_chain.run({
200
- 'result': dataset_summary
201
- })
202
 
203
- # Display recommendations in a scrollable text area
204
- st.text_area("Recommendations", value=recommendations, height=300)
205
-
206
- else:
207
- # Generate insights based on general insights (without recommendations)
208
- dataset_summary = data.describe().to_string() # Summary for insights
209
- insights = insights_chain.run({
210
- 'result': dataset_summary
211
- })
212
 
213
- # Display insights in a scrollable text area
214
- st.text_area("Insights", value=insights, height=300)
215
 
216
  except Exception as e:
217
  logging.error(f"An error occurred: {e}")
 
20
  st.stop()
21
 
22
  # Step 1: Upload CSV data file (or use default)
23
+ st.title("Natural Language to SQL Query App with Enhanced Insights")
24
  st.write("Upload a CSV file to get started, or use the default dataset.")
25
 
26
  csv_file = st.file_uploader("Upload your CSV file", type=["csv"])
 
71
 
72
  # Insights Generation Chain
73
  insights_template = """
74
+ You are an expert data scientist. Based on the user's question and the SQL query result provided below, generate a concise and informative analysis that includes data insights and actionable recommendations.
75
+
76
+ User's Question: {question}
77
 
78
  SQL Query Result:
79
  {result}
80
 
81
+ Analysis and Recommendations:
82
  """
83
+ insights_prompt = PromptTemplate(template=insights_template, input_variables=['question', 'result'])
84
  insights_chain = LLMChain(llm=llm, prompt=insights_prompt)
85
 
86
+ # General Insights and Recommendations Chain
87
+ general_insights_template = """
88
+ You are an expert data scientist. Based on the entire dataset provided below, generate a comprehensive analysis that includes key insights and actionable recommendations.
89
 
90
+ Dataset Summary:
91
+ {dataset_summary}
92
 
93
+ Analysis and Recommendations:
94
  """
95
+ general_insights_prompt = PromptTemplate(template=general_insights_template, input_variables=['dataset_summary'])
96
+ general_insights_chain = LLMChain(llm=llm, prompt=general_insights_prompt)
97
 
98
  # Optional: Clean up function to remove incorrect COLLATE NOCASE usage
99
  def clean_sql_query(query):
 
121
  def classify_query(question):
122
  """Classify the user query as either 'SQL' or 'INSIGHTS'."""
123
  classification_template = """
124
+ You are an AI assistant that classifies user queries into two categories: 'SQL' for specific data retrieval queries and 'INSIGHTS' for general analytical or recommendation queries.
125
 
126
  Determine the appropriate category for the following user question.
127
 
 
137
  else:
138
  return 'INSIGHTS'
139
 
140
+ # Function to generate dataset summary
141
+ def generate_dataset_summary(data):
142
+ """Generate a summary of the dataset for general insights."""
143
+ summary_template = """
144
+ You are an expert data scientist. Based on the dataset provided below, generate a concise summary that includes the number of records, number of columns, data types, and any notable features.
145
+
146
+ Dataset:
147
+ {data}
148
+
149
+ Dataset Summary:
150
+ """
151
+ summary_prompt = PromptTemplate(template=summary_template, input_variables=['data'])
152
+ summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
153
+ summary = summary_chain.run({'data': data.head().to_string(index=False)})
154
+ return summary
155
+
156
  # Define the callback function
157
  def process_input():
158
  user_prompt = st.session_state['user_input']
 
178
  }).strip()
179
 
180
  if generated_sql.upper() == "NO_SQL":
181
+ # Handle cases where no SQL should be generated
182
+ assistant_response = "Sure, let's discuss some general insights and recommendations based on the data."
183
+
184
+ # Generate dataset summary
185
+ dataset_summary = generate_dataset_summary(data)
186
+
187
+ # Generate general insights and recommendations
188
+ general_insights = general_insights_chain.run({
189
+ 'dataset_summary': dataset_summary
190
+ })
191
+
192
+ # Append the assistant's insights to the history
193
+ st.session_state.history.append({"role": "assistant", "content": general_insights})
194
  else:
195
  # Clean the SQL query
196
  cleaned_sql = clean_sql_query(generated_sql)
 
207
  # Convert the result to a string for the insights prompt
208
  result_str = result.head(10).to_string(index=False) # Limit to first 10 rows
209
 
210
+ # Generate insights and recommendations based on the query result
211
  insights = insights_chain.run({
212
+ 'question': user_prompt,
213
  'result': result_str
214
  })
215
 
216
+ # Append the assistant's insights to the history
217
+ st.session_state.history.append({"role": "assistant", "content": insights})
 
218
  # Append the result DataFrame to the history
219
  st.session_state.history.append({"role": "assistant", "content": result})
220
  except Exception as e:
 
222
  assistant_response = f"Error executing SQL query: {e}"
223
  st.session_state.history.append({"role": "assistant", "content": assistant_response})
224
  else: # INSIGHTS category
225
+ # Generate dataset summary
226
+ dataset_summary = generate_dataset_summary(data)
 
 
 
 
227
 
228
+ # Generate general insights and recommendations
229
+ general_insights = general_insights_chain.run({
230
+ 'dataset_summary': dataset_summary
231
+ })
 
 
 
 
 
232
 
233
+ # Append the assistant's insights to the history
234
+ st.session_state.history.append({"role": "assistant", "content": general_insights})
235
 
236
  except Exception as e:
237
  logging.error(f"An error occurred: {e}")