Spaces:

LeonceNsh
/

usgov-contracts-rag

Sleeping

App Files Files Community

LeonceNsh commited on Nov 2, 2024

Commit

78f16f0

verified ·

1 Parent(s): 48aeb6e

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -88

app.py CHANGED Viewed

@@ -11,13 +11,9 @@ import os
 # Configuration and Setup
 # =========================
-# Set OpenAI API key
 openai.api_key = os.getenv("OPENAI_API_KEY")
-# Load the Parquet dataset path
 dataset_path = 'sample_contract_df.parquet'  # Update with your Parquet file path
-# Provided schema
 schema = [
     {"column_name": "department_ind_agency", "column_type": "VARCHAR"},
     {"column_name": "cgac", "column_type": "BIGINT"},
@@ -50,14 +46,7 @@ def get_schema():
 COLUMN_TYPES = {col['column_name']: col['column_type'] for col in get_schema()}
-# =========================
-# Database Interaction
-# =========================
 def load_dataset_schema():
-    """
-    Loads the dataset schema into DuckDB by creating a view.
-    """
     con = duckdb.connect()
     try:
         con.execute("DROP VIEW IF EXISTS contract_data")
@@ -69,7 +58,6 @@ def load_dataset_schema():
     finally:
         con.close()
-# Load the dataset schema at startup
 load_dataset_schema()
 # =========================
@@ -77,9 +65,6 @@ load_dataset_schema()
 # =========================
 def parse_query(nl_query):
-    """
-    Converts a natural language query into a SQL query using OpenAI's API.
-    """
     messages = [
         {"role": "system", "content": "You are an assistant that converts natural language queries into SQL queries for the 'contract_data' table."},
         {"role": "user", "content": f"Schema:\n{json.dumps(schema, indent=2)}\n\nQuery:\n\"{nl_query}\"\n\nSQL:"}
@@ -87,7 +72,7 @@ def parse_query(nl_query):
     try:
         response = openai.chat.completions.create(
-            model="gpt-4o-mini",
             messages=messages,
             temperature=0,
             max_tokens=150,
@@ -97,21 +82,11 @@ def parse_query(nl_query):
     except Exception as e:
         return f"Error generating SQL query: {e}"
-# =========================
-# Plotting Utilities
-# =========================
 def detect_plot_intent(nl_query):
-    """
-    Detects if the user's query involves plotting.
-    """
     plot_keywords = ['plot', 'graph', 'chart', 'distribution', 'visualize', 'trend', 'histogram', 'bar', 'line', 'scatter', 'pie']
     return any(keyword in nl_query.lower() for keyword in plot_keywords)
 def generate_plot(nl_query, result_df):
-    """
-    Generates a Plotly figure based on the result DataFrame and the user's intent.
-    """
     if not detect_plot_intent(nl_query):
         return None, ""
@@ -119,7 +94,6 @@ def generate_plot(nl_query, result_df):
     if len(columns) < 2:
         return None, "Not enough data to generate a plot."
-    # Simple heuristic to choose plot type based on keywords
     if 'bar' in nl_query.lower():
         fig = px.bar(result_df, x=columns[0], y=columns[1], title='Bar Chart')
     elif 'line' in nl_query.lower():
@@ -129,7 +103,6 @@ def generate_plot(nl_query, result_df):
     elif 'pie' in nl_query.lower():
         fig = px.pie(result_df, names=columns[0], values=columns[1], title='Pie Chart')
     else:
-        # Default to bar chart
         fig = px.bar(result_df, x=columns[0], y=columns[1], title='Bar Chart')
     fig.update_layout(title_x=0.5)
@@ -143,7 +116,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     gr.Markdown("""
     <h1 style="text-align: center; font-size: 2.5em; color: #333333;">Parquet Data Explorer</h1>
     <p style="text-align: center; color: #666666;">Query and visualize your data effortlessly.</p>
-    """, elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=1):
@@ -152,12 +125,13 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
                 placeholder='e.g., "What are the total awards over 1M in California?"',
                 lines=1
             )
-            # Hidden schema display that appears on focus
-            schema_display = gr.JSON(
-                label="Dataset Schema",
-                value=get_schema(),
-                visible=False
-            )
             error_out = gr.Markdown(
                 value="",
                 visible=False
@@ -170,24 +144,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
                 label="Visualization"
             )
     gr.Markdown("""
-    <style>
-    /* Center the content */
-    .gradio-container {
-        max-width: 1000px;
-        margin: auto;
-    }
-    /* Style the main title */
-    #main-title h1 {
-        font-weight: bold;
-    }
-    /* Style the error alert */
-    .gradio-container .alert-error {
-        background-color: #ffe6e6;
-        color: #cc0000;
-        border: 1px solid #cc0000;
-    }
-    </style>
     """)
     # =========================
@@ -195,9 +157,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     # =========================
     def on_query_submit(nl_query):
-        """
-        Handles the submission of a natural language query.
-        """
         if not nl_query.strip():
             return gr.update(visible=True, value="Please enter a query."), None, None
@@ -215,15 +174,18 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
         return gr.update(visible=False, value=""), result_df, fig
-    def on_input_focus():
-        """
-        Shows the dataset schema when the input box is focused.
-        """
-        return gr.update(visible=True)
-    # =========================
-    # Assign Event Handlers
-    # =========================
     query.submit(
         fn=on_query_submit,
@@ -231,31 +193,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
         outputs=[error_out, results_out, plot_out]
     )
-    query.focus(
-        fn=lambda: gr.update(visible=True),
-        inputs=None,
-        outputs=schema_display
-    )
-# =========================
-# Helper Functions
-# =========================
-def execute_query(sql_query):
-    """
-    Executes the SQL query and returns the results.
-    """
-    try:
-        con = duckdb.connect()
-        con.execute("PRAGMA threads=4")  # Optimize for performance
-        con.execute("DROP VIEW IF EXISTS contract_data")
-        con.execute(f"CREATE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
-        result_df = con.execute(sql_query).fetchdf()
-        con.close()
-        return result_df, ""
-    except Exception as e:
-        return None, f"Error executing query: {e}"
 # =========================
 # Launch the Gradio App
 # =========================

 # Configuration and Setup
 # =========================
 openai.api_key = os.getenv("OPENAI_API_KEY")
 dataset_path = 'sample_contract_df.parquet'  # Update with your Parquet file path
 schema = [
     {"column_name": "department_ind_agency", "column_type": "VARCHAR"},
     {"column_name": "cgac", "column_type": "BIGINT"},
 COLUMN_TYPES = {col['column_name']: col['column_type'] for col in get_schema()}
 def load_dataset_schema():
     con = duckdb.connect()
     try:
         con.execute("DROP VIEW IF EXISTS contract_data")
     finally:
         con.close()
 load_dataset_schema()
 # =========================
 # =========================
 def parse_query(nl_query):
     messages = [
         {"role": "system", "content": "You are an assistant that converts natural language queries into SQL queries for the 'contract_data' table."},
         {"role": "user", "content": f"Schema:\n{json.dumps(schema, indent=2)}\n\nQuery:\n\"{nl_query}\"\n\nSQL:"}
     try:
         response = openai.chat.completions.create(
+            model="gpt-4",
             messages=messages,
             temperature=0,
             max_tokens=150,
     except Exception as e:
         return f"Error generating SQL query: {e}"
 def detect_plot_intent(nl_query):
     plot_keywords = ['plot', 'graph', 'chart', 'distribution', 'visualize', 'trend', 'histogram', 'bar', 'line', 'scatter', 'pie']
     return any(keyword in nl_query.lower() for keyword in plot_keywords)
 def generate_plot(nl_query, result_df):
     if not detect_plot_intent(nl_query):
         return None, ""
     if len(columns) < 2:
         return None, "Not enough data to generate a plot."
     if 'bar' in nl_query.lower():
         fig = px.bar(result_df, x=columns[0], y=columns[1], title='Bar Chart')
     elif 'line' in nl_query.lower():
     elif 'pie' in nl_query.lower():
         fig = px.pie(result_df, names=columns[0], values=columns[1], title='Pie Chart')
     else:
         fig = px.bar(result_df, x=columns[0], y=columns[1], title='Bar Chart')
     fig.update_layout(title_x=0.5)
     gr.Markdown("""
     <h1 style="text-align: center; font-size: 2.5em; color: #333333;">Parquet Data Explorer</h1>
     <p style="text-align: center; color: #666666;">Query and visualize your data effortlessly.</p>
+    """)
     with gr.Row():
         with gr.Column(scale=1):
                 placeholder='e.g., "What are the total awards over 1M in California?"',
                 lines=1
             )
+            gr.Markdown("### Example Queries")
+            with gr.Row():
+                btn_example1 = gr.Button("Show awards over 1M in CA")
+                btn_example2 = gr.Button("List all contracts in New York")
+                btn_example3 = gr.Button("Show top 5 departments by award amount")
+                btn_example4 = gr.Button("Execute: SELECT * from contract_data LIMIT 10;")
             error_out = gr.Markdown(
                 value="",
                 visible=False
                 label="Visualization"
             )
+    # Instructions
     gr.Markdown("""
+    ## Instructions
+    1. **Enter a query**: Type in a natural language query in the textbox.
+    2. **Use Example Queries**: Click on any example query button above.
+    3. **Generate SQL and Plot**: Click "Execute" to see results and visualization.
     """)
     # =========================
     # =========================
     def on_query_submit(nl_query):
         if not nl_query.strip():
             return gr.update(visible=True, value="Please enter a query."), None, None
         return gr.update(visible=False, value=""), result_df, fig
+    def on_example_click(query_text):
+        sql_query = parse_query(query_text)
+        result_df, error_msg = execute_query(sql_query)
+        if error_msg:
+            return sql_query, None, None, error_msg
+        fig, plot_error = generate_plot(query_text, result_df)
+        return sql_query, result_df, fig, plot_error if plot_error else ""
+    btn_example1.click(lambda: on_example_click("Show awards over 1M in CA"), outputs=[results_out, plot_out, error_out])
+    btn_example2.click(lambda: on_example_click("List all contracts in New York"), outputs=[results_out, plot_out, error_out])
+    btn_example3.click(lambda: on_example_click("Show top 5 departments by award amount"), outputs=[results_out, plot_out, error_out])
+    btn_example4.click(lambda: on_example_click("SELECT * from contract_data LIMIT 10;"), outputs=[results_out, plot_out, error_out])
     query.submit(
         fn=on_query_submit,
         outputs=[error_out, results_out, plot_out]
     )
 # =========================
 # Launch the Gradio App
 # =========================