LeonceNsh commited on
Commit
f5a9d48
·
verified ·
1 Parent(s): f4801ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -111
app.py CHANGED
@@ -46,6 +46,10 @@ def get_schema():
46
 
47
  COLUMN_TYPES = {col['column_name']: col['column_type'] for col in get_schema()}
48
 
 
 
 
 
49
  def load_dataset_schema():
50
  con = duckdb.connect()
51
  try:
@@ -58,8 +62,6 @@ def load_dataset_schema():
58
  finally:
59
  con.close()
60
 
61
- load_dataset_schema()
62
-
63
  # =========================
64
  # OpenAI API Integration
65
  # =========================
@@ -71,68 +73,40 @@ def parse_query(nl_query):
71
  ]
72
 
73
  try:
74
- response = openai.chat.completions.create(
75
  model="gpt-4",
76
  messages=messages,
77
  temperature=0,
78
  max_tokens=150,
79
  )
80
- sql_query = response.choices[0].message.content.strip()
81
  return sql_query
82
  except Exception as e:
83
  return f"Error generating SQL query: {e}"
84
 
85
  def detect_plot_intent(nl_query):
86
- plot_keywords = ['plot', 'graph', 'chart', 'distribution', 'visualize', 'trend', 'histogram', 'bar', 'line', 'scatter', 'pie']
87
  return any(keyword in nl_query.lower() for keyword in plot_keywords)
88
 
89
-
90
- def execute_query(sql_query):
91
- """
92
- Executes the SQL query and returns the results.
93
- """
94
- if sql_query.startswith("Error"):
95
- return None, sql_query
96
-
97
- try:
98
- con = duckdb.connect()
99
- con.execute(f"CREATE OR REPLACE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
100
- result_df = con.execute(sql_query).fetchdf()
101
- con.close()
102
- return result_df, ""
103
- except Exception as e:
104
- return None, f"Error executing query: {e}"
105
-
106
- def generate_plot(nl_query, result_df):
107
- if not detect_plot_intent(nl_query):
108
- return None, ""
109
-
110
- columns = result_df.columns.tolist()
111
- if len(columns) < 2:
112
- return None, "Not enough data to generate a plot."
113
-
114
- if 'bar' in nl_query.lower():
115
- fig = px.bar(result_df, x=columns[0], y=columns[1], title='Bar Chart')
116
- elif 'line' in nl_query.lower():
117
- fig = px.line(result_df, x=columns[0], y=columns[1], title='Line Chart')
118
- elif 'scatter' in nl_query.lower():
119
- fig = px.scatter(result_df, x=columns[0], y=columns[1], title='Scatter Plot')
120
- elif 'pie' in nl_query.lower():
121
- fig = px.pie(result_df, names=columns[0], values=columns[1], title='Pie Chart')
122
- else:
123
- fig = px.bar(result_df, x=columns[0], y=columns[1], title='Bar Chart')
124
-
125
- fig.update_layout(title_x=0.5)
126
- return fig, ""
127
-
128
  # =========================
129
  # Gradio Application UI
130
  # =========================
131
 
132
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
133
  gr.Markdown("""
134
- <h1 style="text-align: center; font-size: 2.5em; color: #333333;">Parquet Data Explorer</h1>
135
- <p style="text-align: center; color: #666666;">Query and visualize your data effortlessly.</p>
 
 
 
 
 
 
 
 
 
 
 
136
  """)
137
 
138
  with gr.Row():
@@ -142,74 +116,39 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
142
  placeholder='e.g., "What are the total awards over 1M in California?"',
143
  lines=1
144
  )
145
- gr.Markdown("### Example Queries")
146
- with gr.Row():
147
- btn_example1 = gr.Button("Show awards over 1M in CA")
148
- btn_example2 = gr.Button("List all contracts in New York")
149
- btn_example3 = gr.Button("Show top 5 departments by award amount")
150
- btn_example4 = gr.Button("Execute: SELECT * from contract_data LIMIT 10;")
151
-
152
- error_out = gr.Markdown(
153
- value="",
154
- visible=False
155
- )
156
- with gr.Column(scale=2):
157
- results_out = gr.DataFrame(
158
- label="Results"
159
- )
160
-
161
- # Instructions
162
- gr.Markdown("""
163
- ## Instructions
164
- 1. **Enter a query**: Type in a natural language query in the textbox.
165
- 2. **Use Example Queries**: Click on any example query button above.
166
- 3. **Generate SQL and Plot**: Click "Execute" to see results.
167
- """)
168
 
169
  # =========================
170
- # Click Event Handlers
171
  # =========================
172
 
173
- def on_query_submit(nl_query):
174
- if not nl_query.strip():
175
- return gr.update(visible=True, value="Please enter a query."), None, None
176
-
177
  sql_query = parse_query(nl_query)
178
- if sql_query.startswith("Error"):
179
- return gr.update(visible=True, value=sql_query), None, None
180
-
181
- result_df, error_msg = execute_query(sql_query)
182
- if error_msg:
183
- return gr.update(visible=True, value=error_msg), None, None
184
-
185
- fig, plot_error = generate_plot(nl_query, result_df)
186
- if plot_error:
187
- return gr.update(visible=True, value=plot_error), None, None
188
-
189
- return gr.update(visible=False, value=""), result_df, fig
190
-
191
- def on_example_click(query_text):
192
- sql_query = parse_query(query_text)
193
- result_df, error_msg = execute_query(sql_query)
194
- if error_msg:
195
- return sql_query, None, None, error_msg
196
- fig, plot_error = generate_plot(query_text, result_df)
197
- return sql_query, result_df, fig, plot_error if plot_error else ""
198
-
199
- btn_example1.click(lambda: on_example_click("Show awards over 1M in CA"), outputs=[results_out, error_out])
200
- btn_example2.click(lambda: on_example_click("List all contracts in New York"), outputs=[results_out, error_out])
201
- btn_example3.click(lambda: on_example_click("Show top 5 departments by award amount"), outputs=[results_out, error_out])
202
- btn_example4.click(lambda: on_example_click("SELECT * from contract_data LIMIT 10;"), outputs=[results_out, error_out])
203
-
204
- query.submit(
205
- fn=on_query_submit,
206
- inputs=query,
207
- outputs=[error_out, results_out]
208
- )
209
 
210
- # =========================
211
- # Launch the Gradio App
212
- # =========================
 
 
 
 
 
 
 
 
 
 
213
 
214
- if __name__ == "__main__":
215
- demo.launch()
 
46
 
47
  COLUMN_TYPES = {col['column_name']: col['column_type'] for col in get_schema()}
48
 
49
+ # =========================
50
+ # Database Interaction
51
+ # =========================
52
+
53
  def load_dataset_schema():
54
  con = duckdb.connect()
55
  try:
 
62
  finally:
63
  con.close()
64
 
 
 
65
  # =========================
66
  # OpenAI API Integration
67
  # =========================
 
73
  ]
74
 
75
  try:
76
+ response = openai.ChatCompletion.create(
77
  model="gpt-4",
78
  messages=messages,
79
  temperature=0,
80
  max_tokens=150,
81
  )
82
+ sql_query = response.choices[0].message['content'].strip()
83
  return sql_query
84
  except Exception as e:
85
  return f"Error generating SQL query: {e}"
86
 
87
  def detect_plot_intent(nl_query):
88
+ plot_keywords = ['plot', 'graph', 'chart', 'distribution', 'visualize']
89
  return any(keyword in nl_query.lower() for keyword in plot_keywords)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # =========================
92
  # Gradio Application UI
93
  # =========================
94
 
95
+ with gr.Blocks() as demo:
96
  gr.Markdown("""
97
+ # Parquet SQL Query and Plotting App
98
+
99
+ **Query and visualize data** in `sample_contract_df.parquet`
100
+
101
+ ## Instructions
102
+
103
+ 1. **Describe the data you want**: e.g., `Show awards over 1M in CA`
104
+ 2. **Use Example Queries**: Click on any example query button below to execute.
105
+ 3. **Generate SQL**: Or, enter your own query and click "Generate SQL" to see the SQL query.
106
+ 4. **Execute Query**: Run the query to view results and plots.
107
+ 5. **Dataset Schema**: See available columns and types in the "Schema" tab.
108
+
109
+ ## Example Queries
110
  """)
111
 
112
  with gr.Row():
 
116
  placeholder='e.g., "What are the total awards over 1M in California?"',
117
  lines=1
118
  )
119
+ # Button to generate the SQL query from NL
120
+ btn_generate_sql = gr.Button("Generate SQL Query")
121
+ # Textbox to display generated SQL
122
+ sql_query_out = gr.Textbox(label="Generated SQL Query", interactive=False)
123
+ # Execute button
124
+ btn_execute_query = gr.Button("Execute Query")
125
+ error_out = gr.Markdown("", visible=False)
126
+
127
+ # Results and Plot output
128
+ results_out = gr.DataFrame(label="Query Results")
129
+ plot_out = gr.Plot(label="Plot")
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  # =========================
132
+ # Event Functions
133
  # =========================
134
 
135
+ def generate_sql(nl_query):
 
 
 
136
  sql_query = parse_query(nl_query)
137
+ return sql_query
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ def execute_sql_query(sql_query):
140
+ try:
141
+ con = duckdb.connect()
142
+ con.execute(f"CREATE OR REPLACE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
143
+ result_df = con.execute(sql_query).fetchdf()
144
+ con.close()
145
+ return result_df, ""
146
+ except Exception as e:
147
+ return None, f"Error executing query: {e}"
148
+
149
+ # Button click event handlers
150
+ btn_generate_sql.click(fn=generate_sql, inputs=query, outputs=sql_query_out)
151
+ btn_execute_query.click(fn=execute_sql_query, inputs=sql_query_out, outputs=[results_out, error_out])
152
 
153
+ # Launch the Gradio App
154
+ demo.launch()