Spaces:

LeonceNsh
/

usgov-contracts-rag

Sleeping

App Files Files Community

LeonceNsh commited on Nov 1, 2024

Commit

ae610aa

verified ·

1 Parent(s): ff81feb

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -32

app.py CHANGED Viewed

@@ -7,6 +7,10 @@ import plotly.express as px
 import openai
 import os
 # Load the Parquet dataset path
 dataset_path = 'sample_contract_df.parquet'  # Update with your Parquet file path
@@ -45,9 +49,14 @@ def get_schema():
 # Map column names to their types
 COLUMN_TYPES = {col['column_name']: col['column_type'] for col in get_schema()}
-# Function to load the dataset schema into DuckDB
-@lru_cache(maxsize=1)
 def load_dataset_schema():
     con = duckdb.connect()
     try:
         # Drop the view if it exists to avoid errors
@@ -60,45 +69,51 @@ def load_dataset_schema():
     finally:
         con.close()
-# Advanced Natural Language to SQL Parser using OpenAI's ChatCompletion
 def parse_query(nl_query):
     """
-    Converts a natural language query into a SQL query using OpenAI's GPT-3.5-turbo.
     """
-    openai.api_key = os.getenv('OPENAI_API_KEY')  # It's recommended to set your API key as an environment variable
-    system_prompt = "You are an assistant that converts natural language queries into SQL queries for a DuckDB database named 'contract_data'. Use the provided schema to form accurate SQL queries."
-    user_prompt = f"""
     Schema:
     {json.dumps(schema, indent=2)}
-    Convert the following natural language query into a SQL query:
     "{nl_query}"
     """
     try:
-        response = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt}
-            ],
-            temperature=0,
             max_tokens=150,
             top_p=1,
-            n=1,
-            stop=None
         )
-        sql_query = response.choices[0].message['content'].strip()
         return sql_query
     except Exception as e:
         return f"Error generating SQL query: {e}"
-# Function to detect if the user wants a plot
 def detect_plot_intent(nl_query):
     """
-    Detects if the user's query involves plotting.
     """
     plot_keywords = [
         'plot', 'graph', 'chart', 'distribution', 'visualize', 'histogram',
@@ -109,7 +124,6 @@ def detect_plot_intent(nl_query):
             return True
     return False
-# Generate SQL and Plot Code based on user query
 def generate_sql_and_plot_code(query):
     """
     Generates SQL query and plotting code based on the natural language input.
@@ -127,10 +141,9 @@ fig.update_layout(title_x=0.5)
 """
     return sql_query, plot_code
-# Execute the SQL query and return results or error
 def execute_query(sql_query):
     """
-    Executes the SQL query and returns the results as a DataFrame.
     """
     if sql_query.startswith("Error"):
         return None, sql_query  # Pass the error message forward
@@ -146,7 +159,6 @@ def execute_query(sql_query):
         # In case of error, return None and error message
         return None, f"Error executing query: {e}"
-# Generate and display plot
 def generate_plot(plot_code, result_df):
     """
     Executes the plot code to generate a plot from the result DataFrame.
@@ -174,16 +186,25 @@ def generate_plot(plot_code, result_df):
     except Exception as e:
         return None, f"Error generating plot: {e}"
-# Cache the schema JSON for display
 @lru_cache(maxsize=1)
 def get_schema_json():
     return json.dumps(get_schema(), indent=2)
-# Initialize the dataset schema
 if not load_dataset_schema():
     raise Exception("Failed to load dataset schema. Please check the dataset path and format.")
-# Gradio app UI
 with gr.Blocks() as demo:
     gr.Markdown("""
     # Parquet SQL Query and Plotting App
@@ -202,7 +223,7 @@ with gr.Blocks() as demo:
     2. **Generate SQL**: Click "Generate SQL" to see the SQL query that will be executed.
     3. **Execute Query**: Click "Execute Query" to run the query and view the results.
     4. **View Plot**: If your query involves plotting, the plot will be displayed.
-    5. **View Dataset Schema**: to understand available columns and their types.
     ## Example Queries
@@ -236,12 +257,21 @@ with gr.Blocks() as demo:
             gr.Markdown("### Dataset Schema")
             schema_display = gr.JSON(label="Schema", value=json.loads(get_schema_json()))
-    # Set up click events
     def on_generate_click(nl_query):
         sql_query, plot_code = generate_sql_and_plot_code(nl_query)
         return sql_query, plot_code
     def on_execute_click(sql_query, plot_code):
         result_df, error_msg = execute_query(sql_query)
         if error_msg:
             return None, None, error_msg
@@ -265,5 +295,8 @@ with gr.Blocks() as demo:
         outputs=[results_out, plot_out, error_out],
     )
-# Launch the app
-demo.launch(share=True)

 import openai
 import os
+# =========================
+# Configuration and Setup
+# =========================
 # Load the Parquet dataset path
 dataset_path = 'sample_contract_df.parquet'  # Update with your Parquet file path
 # Map column names to their types
 COLUMN_TYPES = {col['column_name']: col['column_type'] for col in get_schema()}
+# =========================
+# Database Interaction
+# =========================
 def load_dataset_schema():
+    """
+    Loads the dataset schema into DuckDB by creating a view.
+    """
     con = duckdb.connect()
     try:
         # Drop the view if it exists to avoid errors
     finally:
         con.close()
+# =========================
+# OpenAI API Integration
+# =========================
 def parse_query(nl_query):
     """
+    Converts a natural language query into a SQL query using OpenAI's GPT-3 Completion API.
     """
+    openai.api_key = os.getenv('OPENAI_API_KEY')  # Ensure your API key is set as an environment variable
+    prompt = f"""
+    You are an assistant that converts natural language queries into SQL queries for a DuckDB database named 'contract_data'. Use the provided schema to form accurate SQL queries.
     Schema:
     {json.dumps(schema, indent=2)}
+    Natural Language Query:
     "{nl_query}"
+    SQL Query:
     """
     try:
+        response = openai.Completion.create(
+            engine="text-davinci-003",  # You can choose a different engine if preferred
+            prompt=prompt,
+            temperature=0,  # Set to 0 for deterministic output
             max_tokens=150,
             top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0,
+            stop=["\n\n"]  # Stop generation after two newlines
         )
+        sql_query = response.choices[0].text.strip()
         return sql_query
     except Exception as e:
         return f"Error generating SQL query: {e}"
+# =========================
+# Plotting Utilities
+# =========================
 def detect_plot_intent(nl_query):
     """
+    Detects if the user's query involves plotting based on the presence of specific keywords.
     """
     plot_keywords = [
         'plot', 'graph', 'chart', 'distribution', 'visualize', 'histogram',
             return True
     return False
 def generate_sql_and_plot_code(query):
     """
     Generates SQL query and plotting code based on the natural language input.
 """
     return sql_query, plot_code
 def execute_query(sql_query):
     """
+    Executes the SQL query and returns results or an error message.
     """
     if sql_query.startswith("Error"):
         return None, sql_query  # Pass the error message forward
         # In case of error, return None and error message
         return None, f"Error executing query: {e}"
 def generate_plot(plot_code, result_df):
     """
     Executes the plot code to generate a plot from the result DataFrame.
     except Exception as e:
         return None, f"Error generating plot: {e}"
+# =========================
+# Schema Display
+# =========================
 @lru_cache(maxsize=1)
 def get_schema_json():
     return json.dumps(get_schema(), indent=2)
+# =========================
+# Initialize Dataset Schema
+# =========================
 if not load_dataset_schema():
     raise Exception("Failed to load dataset schema. Please check the dataset path and format.")
+# =========================
+# Gradio Application UI
+# =========================
 with gr.Blocks() as demo:
     gr.Markdown("""
     # Parquet SQL Query and Plotting App
     2. **Generate SQL**: Click "Generate SQL" to see the SQL query that will be executed.
     3. **Execute Query**: Click "Execute Query" to run the query and view the results.
     4. **View Plot**: If your query involves plotting, the plot will be displayed.
+    5. **View Dataset Schema**: Check the "Dataset Schema" tab to understand available columns and their types.
     ## Example Queries
             gr.Markdown("### Dataset Schema")
             schema_display = gr.JSON(label="Schema", value=json.loads(get_schema_json()))
+    # =========================
+    # Click Event Handlers
+    # =========================
     def on_generate_click(nl_query):
+        """
+        Handles the "Generate SQL" button click event.
+        """
         sql_query, plot_code = generate_sql_and_plot_code(nl_query)
         return sql_query, plot_code
     def on_execute_click(sql_query, plot_code):
+        """
+        Handles the "Execute Query" button click event.
+        """
         result_df, error_msg = execute_query(sql_query)
         if error_msg:
             return None, None, error_msg
         outputs=[results_out, plot_out, error_out],
     )
+# =========================
+# Launch the Gradio App
+# =========================
+demo.launch()