Spaces:

LeonceNsh
/

usgov-contracts-rag

Sleeping

App Files Files Community

LeonceNsh commited on Nov 1, 2024

Commit

1fa796c

verified ·

1 Parent(s): e3824aa

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -16

app.py CHANGED Viewed

@@ -2,10 +2,10 @@ import json
 import gradio as gr
 import duckdb
 from functools import lru_cache
-from transformers import pipeline
 import pandas as pd
 import plotly.express as px
 import openai
 # Load the Parquet dataset path
 dataset_path = 'sample_contract_df.parquet'  # Update with your Parquet file path
@@ -60,31 +60,37 @@ def load_dataset_schema():
     finally:
         con.close()
-# Advanced Natural Language to SQL Parser using OpenAI's GPT-3
 def parse_query(nl_query):
     """
-    Converts a natural language query into SQL query using OpenAI GPT-3.
     """
-    openai.api_key = 'YOUR_OPENAI_API_KEY'  # Replace with your OpenAI API key
-    prompt = f"""
-    Convert the following natural language query into a SQL query for a DuckDB database. Use 'contract_data' as the table name.
     Schema:
     {json.dumps(schema, indent=2)}
-    Query:
     "{nl_query}"
     """
     try:
-        response = openai.Completion.create(
-            engine="text-davinci-003",
-            prompt=prompt,
             temperature=0,
             max_tokens=150,
             top_p=1,
             n=1,
             stop=None
         )
-        sql_query = response.choices[0].text.strip()
         return sql_query
     except Exception as e:
         return f"Error generating SQL query: {e}"
@@ -94,7 +100,10 @@ def detect_plot_intent(nl_query):
     """
     Detects if the user's query involves plotting.
     """
-    plot_keywords = ['plot', 'graph', 'chart', 'distribution', 'visualize', 'histogram', 'bar chart', 'line chart', 'scatter plot', 'pie chart']
     for keyword in plot_keywords:
         if keyword in nl_query.lower():
             return True
@@ -108,12 +117,13 @@ def generate_sql_and_plot_code(query):
     is_plot = detect_plot_intent(query)
     sql_query = parse_query(query)
     plot_code = ""
-    if is_plot:
         # Generate plot code based on the query
         # For simplicity, we'll generate a basic plot code
         plot_code = """
 import plotly.express as px
-fig = px.bar(result_df, x='x_column', y='y_column')
 """
     return sql_query, plot_code
@@ -122,6 +132,9 @@ def execute_query(sql_query):
     """
     Executes the SQL query and returns the results as a DataFrame.
     """
     try:
         con = duckdb.connect()
         # Ensure the view is created
@@ -151,8 +164,8 @@ def generate_plot(plot_code, result_df):
         plot_code = plot_code.replace('y_column', columns[1])
         # Execute the plot code
-        local_vars = {'result_df': result_df}
-        exec(plot_code, {'px': px}, local_vars)
         fig = local_vars.get('fig', None)
         if fig:
             return fig, ""

 import gradio as gr
 import duckdb
 from functools import lru_cache
 import pandas as pd
 import plotly.express as px
 import openai
+import os
 # Load the Parquet dataset path
 dataset_path = 'sample_contract_df.parquet'  # Update with your Parquet file path
     finally:
         con.close()
+# Advanced Natural Language to SQL Parser using OpenAI's ChatCompletion
 def parse_query(nl_query):
     """
+    Converts a natural language query into a SQL query using OpenAI's GPT-3.5-turbo.
     """
+    openai.api_key = os.getenv('OPENAI_API_KEY')  # It's recommended to set your API key as an environment variable
+    system_prompt = "You are an assistant that converts natural language queries into SQL queries for a DuckDB database named 'contract_data'. Use the provided schema to form accurate SQL queries."
+    user_prompt = f"""
     Schema:
     {json.dumps(schema, indent=2)}
+    Convert the following natural language query into a SQL query:
     "{nl_query}"
     """
     try:
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ],
             temperature=0,
             max_tokens=150,
             top_p=1,
             n=1,
             stop=None
         )
+        sql_query = response.choices[0].message['content'].strip()
         return sql_query
     except Exception as e:
         return f"Error generating SQL query: {e}"
     """
     Detects if the user's query involves plotting.
     """
+    plot_keywords = [
+        'plot', 'graph', 'chart', 'distribution', 'visualize', 'histogram',
+        'bar chart', 'line chart', 'scatter plot', 'pie chart'
+    ]
     for keyword in plot_keywords:
         if keyword in nl_query.lower():
             return True
     is_plot = detect_plot_intent(query)
     sql_query = parse_query(query)
     plot_code = ""
+    if is_plot and not sql_query.startswith("Error"):
         # Generate plot code based on the query
         # For simplicity, we'll generate a basic plot code
         plot_code = """
 import plotly.express as px
+fig = px.bar(result_df, x='x_column', y='y_column', title='Generated Plot')
+fig.update_layout(title_x=0.5)
 """
     return sql_query, plot_code
     """
     Executes the SQL query and returns the results as a DataFrame.
     """
+    if sql_query.startswith("Error"):
+        return None, sql_query  # Pass the error message forward
     try:
         con = duckdb.connect()
         # Ensure the view is created
         plot_code = plot_code.replace('y_column', columns[1])
         # Execute the plot code
+        local_vars = {'result_df': result_df, 'px': px}
+        exec(plot_code, {}, local_vars)
         fig = local_vars.get('fig', None)
         if fig:
             return fig, ""