File size: 2,588 Bytes
06f01b3
b474ae1
06f01b3
b474ae1
5b4c268
b474ae1
7161a9c
5b4c268
b474ae1
 
 
 
 
 
 
5b4c268
b474ae1
 
06f01b3
 
 
b474ae1
06f01b3
 
 
b474ae1
06f01b3
 
 
 
 
 
 
b474ae1
 
 
 
 
 
 
 
 
 
 
5b4c268
06f01b3
 
 
b474ae1
 
06f01b3
b474ae1
 
 
 
 
 
 
 
 
 
 
 
8cb3a33
b474ae1
 
8cb3a33
b474ae1
 
 
 
 
 
 
 
 
 
 
 
 
5b4c268
06f01b3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import json
import gradio as gr
import duckdb

# Load the Parquet dataset
dataset_path = 'sample_contract_df.parquet'  # Update with your Parquet file's path

# Load the dataset with DuckDB and get schema information
def load_dataset():
    con = duckdb.connect()
    con.execute(f"CREATE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
    schema = con.execute("DESCRIBE contract_data").fetchdf()
    con.close()
    return schema.to_dict(orient="records")

# Generate SQL based on schema and user query
def generate_sql_query(features, query):
    messages = [
        {
            "role": "system",
            "content": "You are a SQL query expert assistant that generates DuckDB SQL queries based on the user's natural language query and dataset schema.",
        },
        {
            "role": "user",
            "content": f"""table contract_data
# Features
{features}
# Query
{query}
""",
        },
    ]
    # Here we use DuckDB directly instead of an external API
    sql_query = f"SELECT * FROM contract_data WHERE {query}"  # Simple example; adapt for complex queries
    return sql_query

# Execute the SQL query and display results
def execute_query(sql_query):
    con = duckdb.connect()
    con.execute(f"CREATE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
    result_df = con.execute(sql_query).fetchdf()
    con.close()
    return result_df.to_markdown()  # Convert result to markdown for display

# Gradio app UI
with gr.Blocks() as demo:
    gr.Markdown("""
    # Local Parquet SQL Query App
    Query and explore the data stored in `sample_contract_df.parquet` using DuckDB and SQL queries.
    """)
    
    # Display schema
    schema = load_dataset()
    features = json.dumps(schema, indent=2)
    gr.Markdown(f"### Dataset Schema:\n\n```json\n{features}\n```")

    # User inputs for natural language query
    query = gr.Textbox(label="Natural Language Query", placeholder="Enter a condition, e.g., 'amount > 1000'")
    sql_out = gr.Code(label="Generated SQL Query", language="sql")
    results_out = gr.Markdown(label="Query Results")

    # Buttons to generate and execute SQL
    with gr.Row():
        btn_generate = gr.Button("Generate SQL")
        btn_execute = gr.Button("Execute Query")

    # Generate SQL on button click
    btn_generate.click(
        fn=generate_sql_query,
        inputs=[features, query],
        outputs=sql_out,
    )

    # Execute SQL on button click
    btn_execute.click(
        fn=execute_query,
        inputs=sql_out,
        outputs=results_out,
    )

# Launch the app
demo.launch()