Spaces:

LeonceNsh
/

usgov-contracts-rag

Sleeping

App Files Files Community

LeonceNsh commited on Nov 1, 2024

Commit

00c05fa

verified ·

1 Parent(s): a1792a1

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -12

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ import plotly.express as px
 import openai
 import os
 # =========================
 # Configuration and Setup
 # =========================
@@ -75,23 +78,25 @@ def load_dataset_schema():
 async def parse_query(nl_query):
     """
-    Converts a natural language query into a SQL query using OpenAI's GPT-4-turbo model.
     """
     messages = [
-        {"role": "system", "content": (
-            "You are an assistant that converts natural language queries into SQL queries "
-            "for a DuckDB database named 'contract_data'. Use the provided schema to form accurate SQL queries."
-        )},
-        {"role": "user", "content": (
-            f"Schema:\n{json.dumps(schema, indent=2)}\n\nNatural Language Query:\n\"{nl_query}\"\n\nSQL Query:"
-        )}
     ]
     try:
         response = await openai.ChatCompletion.acreate(
             model="gpt-3.5-turbo",
             messages=messages,
-            temperature=0,
             max_tokens=150,
         )
         sql_query = response.choices[0].message['content'].strip()
@@ -114,7 +119,7 @@ def detect_plot_intent(nl_query):
     for keyword in plot_keywords:
         if keyword in nl_query.lower():
             return True
-    return False
 async def generate_sql_and_plot_code(query):
     """
@@ -125,6 +130,7 @@ async def generate_sql_and_plot_code(query):
     plot_code = ""
     if is_plot and not sql_query.startswith("Error"):
         # Generate plot code based on the query
         plot_code = """
 import plotly.express as px
 fig = px.bar(result_df, x='x_column', y='y_column', title='Generated Plot')
@@ -141,11 +147,13 @@ def execute_query(sql_query):
     try:
         con = duckdb.connect()
         con.execute(f"CREATE OR REPLACE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
         result_df = con.execute(sql_query).fetchdf()
         con.close()
         return result_df, ""
     except Exception as e:
         return None, f"Error executing query: {e}"
 def generate_plot(plot_code, result_df):
@@ -155,6 +163,7 @@ def generate_plot(plot_code, result_df):
     if not plot_code.strip():
         return None, "No plot code provided."
     try:
         if result_df.empty:
             return None, "Result DataFrame is empty."
         columns = result_df.columns.tolist()
@@ -163,10 +172,14 @@ def generate_plot(plot_code, result_df):
         plot_code = plot_code.replace('x_column', columns[0])
         plot_code = plot_code.replace('y_column', columns[1])
         local_vars = {'result_df': result_df, 'px': px}
         exec(plot_code, {}, local_vars)
         fig = local_vars.get('fig', None)
-        return fig, "" if fig else "Plot could not be generated."
     except Exception as e:
         return None, f"Error generating plot: {e}"
@@ -194,9 +207,31 @@ with gr.Blocks() as demo:
     # Parquet SQL Query and Plotting App
     **Query and visualize data** in `sample_contract_df.parquet`
     """)
     with gr.Tabs():
         with gr.TabItem("Query Data"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -214,21 +249,35 @@ with gr.Blocks() as demo:
                     results_out = gr.Dataframe(label="Query Results", interactive=False)
                     plot_out = gr.Plot(label="Plot")
         with gr.TabItem("Dataset Schema"):
             gr.Markdown("### Dataset Schema")
             schema_display = gr.JSON(label="Schema", value=json.loads(get_schema_json()))
     async def on_generate_click(nl_query):
         sql_query, plot_code = await generate_sql_and_plot_code(nl_query)
         return sql_query, plot_code
     def on_execute_click(sql_query, plot_code):
         result_df, error_msg = execute_query(sql_query)
         if error_msg:
             return None, None, error_msg
         if plot_code.strip():
             fig, plot_error = generate_plot(plot_code, result_df)
-            return result_df, fig, plot_error if plot_error else ""
         else:
             return result_df, None, ""
@@ -243,4 +292,8 @@ with gr.Blocks() as demo:
         outputs=[results_out, plot_out, error_out],
     )
 demo.launch()

 import openai
 import os
+# Set OpenAI API key
+openai.api_key = os.getenv("OPENAI_API_KEY")
 # =========================
 # Configuration and Setup
 # =========================
 async def parse_query(nl_query):
     """
+    Converts a natural language query into a SQL query using OpenAI's GPT-3.5-turbo model.
     """
     messages = [
+        {"role": "system",
+         "content": (
+             "You are an assistant that converts natural language queries into SQL queries "
+             "for a DuckDB database named 'contract_data'. Use the provided schema to form accurate SQL queries.")
+        },
+        {"role": "user",
+         "content": f"Schema:\n{json.dumps(schema, indent=2)}\n\nNatural Language Query:\n\"{nl_query}\"\n\nSQL Query:"
+        }
     ]
     try:
         response = await openai.ChatCompletion.acreate(
             model="gpt-3.5-turbo",
             messages=messages,
+            temperature=0,  # Set to 0 for deterministic output
             max_tokens=150,
         )
         sql_query = response.choices[0].message['content'].strip()
     for keyword in plot_keywords:
         if keyword in nl_query.lower():
             return True
+        return False
 async def generate_sql_and_plot_code(query):
     """
     plot_code = ""
     if is_plot and not sql_query.startswith("Error"):
         # Generate plot code based on the query
+        # For simplicity, we'll generate a basic plot code
         plot_code = """
 import plotly.express as px
 fig = px.bar(result_df, x='x_column', y='y_column', title='Generated Plot')
     try:
         con = duckdb.connect()
+        # Ensure the view is created
         con.execute(f"CREATE OR REPLACE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
         result_df = con.execute(sql_query).fetchdf()
         con.close()
         return result_df, ""
     except Exception as e:
+        # In case of error, return None and error message
         return None, f"Error executing query: {e}"
 def generate_plot(plot_code, result_df):
     if not plot_code.strip():
         return None, "No plot code provided."
     try:
+        # Replace placeholders in plot_code with actual column names
         if result_df.empty:
             return None, "Result DataFrame is empty."
         columns = result_df.columns.tolist()
         plot_code = plot_code.replace('x_column', columns[0])
         plot_code = plot_code.replace('y_column', columns[1])
+        # Execute the plot code
         local_vars = {'result_df': result_df, 'px': px}
         exec(plot_code, {}, local_vars)
         fig = local_vars.get('fig', None)
+        if fig:
+            return fig, ""
+        else:
+            return None, "Plot could not be generated."
     except Exception as e:
         return None, f"Error generating plot: {e}"
     # Parquet SQL Query and Plotting App
     **Query and visualize data** in `sample_contract_df.parquet`
+    ## Instructions
+    1. **Describe the data you want to retrieve or plot**: For example:
+       - `Show all awards greater than 1,000,000 in California`
+       - `Plot the distribution of awards by state`
+       - `Show a bar chart of total awards per department`
+       - `List awardees who received multiple awards along with award amounts`
+       - `Number of awards issued by each department division`
+    2. **Generate SQL**: Click "Generate SQL" to see the SQL query that will be executed.
+    3. **Execute Query**: Click "Execute Query" to run the query and view the results.
+    4. **View Plot**: If your query involves plotting, the plot will be displayed.
+    5. **View Dataset Schema**: Check the "Dataset Schema" tab to understand available columns and their types.
+    ## Example Queries
+    - `Plot the total award amount by state`
+    - `Show a histogram of awards over time`
+    - `award greater than 1000000 and state equal to "CA"`
+    - `List awards where department_ind_agency contains "Defense"`
     """)
     with gr.Tabs():
+        # Query Tab
         with gr.TabItem("Query Data"):
             with gr.Row():
                 with gr.Column(scale=1):
                     results_out = gr.Dataframe(label="Query Results", interactive=False)
                     plot_out = gr.Plot(label="Plot")
+        # Schema Tab
         with gr.TabItem("Dataset Schema"):
             gr.Markdown("### Dataset Schema")
             schema_display = gr.JSON(label="Schema", value=json.loads(get_schema_json()))
+    # =========================
+    # Click Event Handlers
+    # =========================
     async def on_generate_click(nl_query):
+        """
+        Handles the "Generate SQL" button click event.
+        """
         sql_query, plot_code = await generate_sql_and_plot_code(nl_query)
         return sql_query, plot_code
     def on_execute_click(sql_query, plot_code):
+        """
+        Handles the "Execute Query" button click event.
+        """
         result_df, error_msg = execute_query(sql_query)
         if error_msg:
             return None, None, error_msg
         if plot_code.strip():
             fig, plot_error = generate_plot(plot_code, result_df)
+            if plot_error:
+                return result_df, None, plot_error
+            else:
+                return result_df, fig, ""
         else:
             return result_df, None, ""
         outputs=[results_out, plot_out, error_out],
     )
+# =========================
+# Launch the Gradio App
+# =========================
 demo.launch()