Spaces:

LeonceNsh
/

baho

Sleeping

App Files Files Community

LeonceNsh commited on Nov 8, 2024

Commit

1ee39a9

verified ·

1 Parent(s): 8c62fa6

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -42

app.py CHANGED Viewed

@@ -1,18 +1,18 @@
 import json
 import openai
-import gradio as gr
 import duckdb
 from functools import lru_cache
-import os
 # =========================
 # Configuration and Setup
 # =========================
 openai.api_key = os.getenv("OPENAI_API_KEY")
-dataset_path = 'hsas.parquet'  # Update with your Parquet file path
-schema = [
     {"column_name": "total_charges", "column_type": "BIGINT"},
     {"column_name": "medicare_prov_num", "column_type": "BIGINT"},
     {"column_name": "zip_cd_of_residence", "column_type": "VARCHAR"},
@@ -22,7 +22,7 @@ schema = [
 @lru_cache(maxsize=1)
 def get_schema():
-    return schema
 COLUMN_TYPES = {col['column_name']: col['column_type'] for col in get_schema()}
@@ -32,13 +32,22 @@ COLUMN_TYPES = {col['column_name']: col['column_type'] for col in get_schema()}
 def parse_query(nl_query):
     messages = [
-        {"role": "system", "content": "You are an assistant that converts natural language queries into SQL queries for the 'hsa_data' table."},
-        {"role": "user", "content": f"Schema:\n{json.dumps(schema, indent=2)}\n\nQuery:\n\"{nl_query}\"\n\nSQL:"}
     ]
     try:
         response = openai.chat.completions.create(
-            model="gpt-4",
             messages=messages,
             temperature=0,
             max_tokens=150,
@@ -54,8 +63,8 @@ def parse_query(nl_query):
 def execute_sql_query(sql_query):
     try:
-        con = duckdb.connect()
-        con.execute(f"CREATE OR REPLACE VIEW hsa_data AS SELECT * FROM '{dataset_path}'")
         result_df = con.execute(sql_query).fetchdf()
         con.close()
         return result_df, ""
@@ -68,40 +77,41 @@ def execute_sql_query(sql_query):
 with gr.Blocks() as demo:
     gr.Markdown("""
-    #  Text to SQL healthcare AI data Analyst agent to analyze U.S prescription data from the Center of Medicare and Medicaid
-    #  Replicate papers from academic journals on prescription drug prices
     ## Instructions
-    ### 1. **Describe the data you want**: e.g., `Show total days of care by zip`
-    ### 2. **Use Example Queries**: Click on any example query button below to execute.
-    ### 3. **Generate SQL**: Or, enter your own query and click "Generate SQL" to see the SQL query.
     ## Example Queries
     """)
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### Click on an example query:")
-            with gr.Row():
-                btn_example1 = gr.Button("Calculate the average total_charges by zip_cd_of_residence")
-                btn_example2 = gr.Button("For each zip_cd_of_residence, calculate the sum of total_charges")
-                btn_example3 = gr.Button("SELECT * from hsa_data where total_days_of_care > 40 LIMIT 30;")
             query_input = gr.Textbox(
                 label="Your Query",
-                placeholder='e.g., "What are the total awards over 1M in California?"',
-                lines=1
             )
             btn_generate_sql = gr.Button("Generate SQL Query")
             sql_query_out = gr.Code(label="Generated SQL Query", language="sql")
             btn_execute_query = gr.Button("Execute Query")
-            error_out = gr.Markdown("", visible=False)
         with gr.Column(scale=2):
-            results_out = gr.Dataframe(label="Query Results", interactive=False)
     with gr.Tab("Dataset Schema"):
         gr.Markdown("### Dataset Schema")
@@ -113,22 +123,27 @@ with gr.Blocks() as demo:
     def generate_sql(nl_query):
         sql_query, error = parse_query(nl_query)
         return sql_query, error
     def execute_query(sql_query):
         result_df, error = execute_sql_query(sql_query)
         return result_df, error
     def handle_example_click(example_query):
         if example_query.strip().upper().startswith("SELECT"):
             sql_query = example_query
             result_df, error = execute_sql_query(sql_query)
-            return sql_query, "", result_df, error
         else:
             sql_query, error = parse_query(example_query)
             if error:
                 return sql_query, error, None, error
             result_df, exec_error = execute_sql_query(sql_query)
             return sql_query, exec_error, result_df, exec_error
     # =========================
@@ -138,27 +153,21 @@ with gr.Blocks() as demo:
     btn_generate_sql.click(
         fn=generate_sql,
         inputs=query_input,
-        outputs=[sql_query_out, error_out]
     )
     btn_execute_query.click(
         fn=execute_query,
         inputs=sql_query_out,
-        outputs=[results_out, error_out]
     )
-    btn_example1.click(
-        fn=lambda: handle_example_click("Calculate the average total_charges by zip_cd_of_residence"),
-        outputs=[sql_query_out, error_out, results_out, error_out]
-    )
-    btn_example2.click(
-        fn=lambda: handle_example_click("For each zip_cd_of_residence, calculate the sum of total_charges"),
-        outputs=[sql_query_out, error_out, results_out, error_out]
-    )
-    btn_example3.click(
-        fn=lambda: handle_example_click("SELECT * from hsa_data where total_days_of_care > 40 LIMIT 30;"),
-        outputs=[sql_query_out, error_out, results_out, error_out]
-    )
 # Launch the Gradio App
-demo.launch()

+import os
 import json
 import openai
 import duckdb
+import gradio as gr
 from functools import lru_cache
 # =========================
 # Configuration and Setup
 # =========================
 openai.api_key = os.getenv("OPENAI_API_KEY")
+DATASET_PATH = 'hsas.parquet'  # Update with your Parquet file path
+SCHEMA = [
     {"column_name": "total_charges", "column_type": "BIGINT"},
     {"column_name": "medicare_prov_num", "column_type": "BIGINT"},
     {"column_name": "zip_cd_of_residence", "column_type": "VARCHAR"},
 @lru_cache(maxsize=1)
 def get_schema():
+    return SCHEMA
 COLUMN_TYPES = {col['column_name']: col['column_type'] for col in get_schema()}
 def parse_query(nl_query):
     messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are an assistant that converts natural language queries into SQL queries for the 'hsa_data' table. "
+                "Ensure the SQL query is syntactically correct and uses only the columns provided in the schema."
+            ),
+        },
+        {
+            "role": "user",
+            "content": f"Schema:\n{json.dumps(get_schema(), indent=2)}\n\nQuery:\n\"{nl_query}\"\n\nSQL:",
+        },
     ]
     try:
         response = openai.chat.completions.create(
+            model="gpt-4o-mini",
             messages=messages,
             temperature=0,
             max_tokens=150,
 def execute_sql_query(sql_query):
     try:
+        con = duckdb.connect(database=':memory:')
+        con.execute(f"CREATE OR REPLACE VIEW hsa_data AS SELECT * FROM '{DATASET_PATH}'")
         result_df = con.execute(sql_query).fetchdf()
         con.close()
         return result_df, ""
 with gr.Blocks() as demo:
     gr.Markdown("""
+    # Text-to-SQL Healthcare Data Analyst Agent
+    Analyze U.S. prescription data from the Center of Medicare and Medicaid.
     ## Instructions
+    1. **Describe the data you want**: e.g., `Show total days of care by zip`
+    2. **Use Example Queries**: Click on any example query button below to execute.
+    3. **Generate SQL**: Or, enter your own query and click "Generate SQL".
     ## Example Queries
     """)
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### Example Queries:")
+            query_buttons = [
+                "Calculate the average total_charges by zip_cd_of_residence",
+                "For each zip_cd_of_residence, calculate the sum of total_charges",
+                "SELECT * FROM hsa_data WHERE total_days_of_care > 40 LIMIT 30;",
+            ]
+            btn_queries = [gr.Button(q) for q in query_buttons]
             query_input = gr.Textbox(
                 label="Your Query",
+                placeholder='e.g., "Show total charges over 1M by state"',
+                lines=1,
             )
             btn_generate_sql = gr.Button("Generate SQL Query")
             sql_query_out = gr.Code(label="Generated SQL Query", language="sql")
             btn_execute_query = gr.Button("Execute Query")
+            error_out = gr.Markdown(visible=False)
         with gr.Column(scale=2):
+            results_out = gr.Dataframe(label="Query Results")
     with gr.Tab("Dataset Schema"):
         gr.Markdown("### Dataset Schema")
     def generate_sql(nl_query):
         sql_query, error = parse_query(nl_query)
+        error_out.update(visible=bool(error))
         return sql_query, error
     def execute_query(sql_query):
         result_df, error = execute_sql_query(sql_query)
+        error_out.update(visible=bool(error))
         return result_df, error
     def handle_example_click(example_query):
         if example_query.strip().upper().startswith("SELECT"):
             sql_query = example_query
             result_df, error = execute_sql_query(sql_query)
+            error_out.update(visible=bool(error))
+            return sql_query, "", result_df, ""
         else:
             sql_query, error = parse_query(example_query)
             if error:
+                error_out.update(visible=True)
                 return sql_query, error, None, error
             result_df, exec_error = execute_sql_query(sql_query)
+            error_out.update(visible=bool(exec_error))
             return sql_query, exec_error, result_df, exec_error
     # =========================
     btn_generate_sql.click(
         fn=generate_sql,
         inputs=query_input,
+        outputs=[sql_query_out, error_out],
     )
     btn_execute_query.click(
         fn=execute_query,
         inputs=sql_query_out,
+        outputs=[results_out, error_out],
     )
+    for btn, query in zip(btn_queries, query_buttons):
+        btn.click(
+            fn=lambda q=query: handle_example_click(q),
+            outputs=[sql_query_out, error_out, results_out, error_out],
+        )
 # Launch the Gradio App
+if __name__ == "__main__":
+    demo.launch()