Spaces:

LeonceNsh
/

usgov-contracts-rag

Sleeping

App Files Files Community

LeonceNsh commited on Nov 1, 2024

Commit

a1792a1

verified ·

1 Parent(s): c6f04cb

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -73

app.py CHANGED Viewed

@@ -73,31 +73,25 @@ def load_dataset_schema():
 # OpenAI API Integration
 # =========================
-def parse_query(nl_query):
     """
     Converts a natural language query into a SQL query using OpenAI's GPT-4-turbo model.
     """
-    # new
-    from openai import AsyncOpenAI
-    client = AsyncOpenAI()
-    completion = await client.chat.completions.create(model="gpt-3.5-turbo",
-                                                      messages = [{"role": "system",
-                                                                   "content": (
-                                                                       "You are an assistant that converts natural language queries into SQL queries "
-                                                                       "for a DuckDB database named 'contract_data'. Use the provided schema to form accurate SQL queries.")
-                                                                  },
-                                                                  {"role": "user",
-                                                                   "content": ( f"Schema:\n{json.dumps(schema, indent=2)}\n\n" f"Natural Language Query:\n\"{nl_query}\"\n\nSQL Query:"
-                                                                              )}
-                                                                 ])
     try:
-        response = openai.ChatCompletion.create(
-            model="gpt-4-turbo",
             messages=messages,
-            temperature=0,  # Set to 0 for deterministic output
             max_tokens=150,
         )
         sql_query = response.choices[0].message['content'].strip()
@@ -122,16 +116,15 @@ def detect_plot_intent(nl_query):
             return True
     return False
-def generate_sql_and_plot_code(query):
     """
     Generates SQL query and plotting code based on the natural language input.
     """
     is_plot = detect_plot_intent(query)
-    sql_query = parse_query(query)
     plot_code = ""
     if is_plot and not sql_query.startswith("Error"):
         # Generate plot code based on the query
-        # For simplicity, we'll generate a basic plot code
         plot_code = """
 import plotly.express as px
 fig = px.bar(result_df, x='x_column', y='y_column', title='Generated Plot')
@@ -148,13 +141,11 @@ def execute_query(sql_query):
     try:
         con = duckdb.connect()
-        # Ensure the view is created
         con.execute(f"CREATE OR REPLACE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
         result_df = con.execute(sql_query).fetchdf()
         con.close()
         return result_df, ""
     except Exception as e:
-        # In case of error, return None and error message
         return None, f"Error executing query: {e}"
 def generate_plot(plot_code, result_df):
@@ -164,7 +155,6 @@ def generate_plot(plot_code, result_df):
     if not plot_code.strip():
         return None, "No plot code provided."
     try:
-        # Replace placeholders in plot_code with actual column names
         if result_df.empty:
             return None, "Result DataFrame is empty."
         columns = result_df.columns.tolist()
@@ -173,14 +163,10 @@ def generate_plot(plot_code, result_df):
         plot_code = plot_code.replace('x_column', columns[0])
         plot_code = plot_code.replace('y_column', columns[1])
-        # Execute the plot code
         local_vars = {'result_df': result_df, 'px': px}
         exec(plot_code, {}, local_vars)
         fig = local_vars.get('fig', None)
-        if fig:
-            return fig, ""
-        else:
-            return None, "Plot could not be generated."
     except Exception as e:
         return None, f"Error generating plot: {e}"
@@ -208,31 +194,9 @@ with gr.Blocks() as demo:
     # Parquet SQL Query and Plotting App
     **Query and visualize data** in `sample_contract_df.parquet`
-    ## Instructions
-    1. **Describe the data you want to retrieve or plot**: For example:
-       - `Show all awards greater than 1,000,000 in California`
-       - `Plot the distribution of awards by state`
-       - `Show a bar chart of total awards per department`
-       - `List awardees who received multiple awards along with award amounts`
-       - `Number of awards issued by each department division`
-    2. **Generate SQL**: Click "Generate SQL" to see the SQL query that will be executed.
-    3. **Execute Query**: Click "Execute Query" to run the query and view the results.
-    4. **View Plot**: If your query involves plotting, the plot will be displayed.
-    5. **View Dataset Schema**: Check the "Dataset Schema" tab to understand available columns and their types.
-    ## Example Queries
-    - `Plot the total award amount by state`
-    - `Show a histogram of awards over time`
-    - `award greater than 1000000 and state equal to "CA"`
-    - `List awards where department_ind_agency contains "Defense"`
     """)
     with gr.Tabs():
-        # Query Tab
         with gr.TabItem("Query Data"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -250,35 +214,21 @@ with gr.Blocks() as demo:
                     results_out = gr.Dataframe(label="Query Results", interactive=False)
                     plot_out = gr.Plot(label="Plot")
-        # Schema Tab
         with gr.TabItem("Dataset Schema"):
             gr.Markdown("### Dataset Schema")
             schema_display = gr.JSON(label="Schema", value=json.loads(get_schema_json()))
-    # =========================
-    # Click Event Handlers
-    # =========================
-    def on_generate_click(nl_query):
-        """
-        Handles the "Generate SQL" button click event.
-        """
-        sql_query, plot_code = generate_sql_and_plot_code(nl_query)
         return sql_query, plot_code
     def on_execute_click(sql_query, plot_code):
-        """
-        Handles the "Execute Query" button click event.
-        """
         result_df, error_msg = execute_query(sql_query)
         if error_msg:
             return None, None, error_msg
         if plot_code.strip():
             fig, plot_error = generate_plot(plot_code, result_df)
-            if plot_error:
-                return result_df, None, plot_error
-            else:
-                return result_df, fig, ""
         else:
             return result_df, None, ""
@@ -293,8 +243,4 @@ with gr.Blocks() as demo:
         outputs=[results_out, plot_out, error_out],
     )
-# =========================
-# Launch the Gradio App
-# =========================
 demo.launch()

 # OpenAI API Integration
 # =========================
+async def parse_query(nl_query):
     """
     Converts a natural language query into a SQL query using OpenAI's GPT-4-turbo model.
     """
+    messages = [
+        {"role": "system", "content": (
+            "You are an assistant that converts natural language queries into SQL queries "
+            "for a DuckDB database named 'contract_data'. Use the provided schema to form accurate SQL queries."
+        )},
+        {"role": "user", "content": (
+            f"Schema:\n{json.dumps(schema, indent=2)}\n\nNatural Language Query:\n\"{nl_query}\"\n\nSQL Query:"
+        )}
+    ]
     try:
+        response = await openai.ChatCompletion.acreate(
+            model="gpt-3.5-turbo",
             messages=messages,
+            temperature=0,
             max_tokens=150,
         )
         sql_query = response.choices[0].message['content'].strip()
             return True
     return False
+async def generate_sql_and_plot_code(query):
     """
     Generates SQL query and plotting code based on the natural language input.
     """
     is_plot = detect_plot_intent(query)
+    sql_query = await parse_query(query)
     plot_code = ""
     if is_plot and not sql_query.startswith("Error"):
         # Generate plot code based on the query
         plot_code = """
 import plotly.express as px
 fig = px.bar(result_df, x='x_column', y='y_column', title='Generated Plot')
     try:
         con = duckdb.connect()
         con.execute(f"CREATE OR REPLACE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
         result_df = con.execute(sql_query).fetchdf()
         con.close()
         return result_df, ""
     except Exception as e:
         return None, f"Error executing query: {e}"
 def generate_plot(plot_code, result_df):
     if not plot_code.strip():
         return None, "No plot code provided."
     try:
         if result_df.empty:
             return None, "Result DataFrame is empty."
         columns = result_df.columns.tolist()
         plot_code = plot_code.replace('x_column', columns[0])
         plot_code = plot_code.replace('y_column', columns[1])
         local_vars = {'result_df': result_df, 'px': px}
         exec(plot_code, {}, local_vars)
         fig = local_vars.get('fig', None)
+        return fig, "" if fig else "Plot could not be generated."
     except Exception as e:
         return None, f"Error generating plot: {e}"
     # Parquet SQL Query and Plotting App
     **Query and visualize data** in `sample_contract_df.parquet`
     """)
     with gr.Tabs():
         with gr.TabItem("Query Data"):
             with gr.Row():
                 with gr.Column(scale=1):
                     results_out = gr.Dataframe(label="Query Results", interactive=False)
                     plot_out = gr.Plot(label="Plot")
         with gr.TabItem("Dataset Schema"):
             gr.Markdown("### Dataset Schema")
             schema_display = gr.JSON(label="Schema", value=json.loads(get_schema_json()))
+    async def on_generate_click(nl_query):
+        sql_query, plot_code = await generate_sql_and_plot_code(nl_query)
         return sql_query, plot_code
     def on_execute_click(sql_query, plot_code):
         result_df, error_msg = execute_query(sql_query)
         if error_msg:
             return None, None, error_msg
         if plot_code.strip():
             fig, plot_error = generate_plot(plot_code, result_df)
+            return result_df, fig, plot_error if plot_error else ""
         else:
             return result_df, None, ""
         outputs=[results_out, plot_out, error_out],
     )
 demo.launch()