Spaces:
Sleeping
Sleeping
import os | |
import json | |
import gradio as gr | |
import duckdb | |
# Load the Parquet dataset | |
dataset_path = 'sample_contract_df.parquet' # Update with your Parquet file's path | |
# Load the dataset with DuckDB and get schema information | |
def load_dataset(): | |
con = duckdb.connect() | |
con.execute(f"CREATE VIEW contract_data AS SELECT * FROM '{dataset_path}'") | |
schema = con.execute("DESCRIBE contract_data").fetchdf() | |
con.close() | |
return schema.to_dict(orient="records") | |
# Generate SQL based on schema and user query | |
def generate_sql_query(features, query): | |
messages = [ | |
{ | |
"role": "system", | |
"content": "You are a SQL query expert assistant that generates DuckDB SQL queries based on the user's natural language query and dataset schema.", | |
}, | |
{ | |
"role": "user", | |
"content": f"""table contract_data | |
# Features | |
{features} | |
# Query | |
{query} | |
""", | |
}, | |
] | |
# Here we use DuckDB directly instead of an external API | |
sql_query = f"SELECT * FROM contract_data WHERE {query}" # Simple example; adapt for complex queries | |
return sql_query | |
# Execute the SQL query and display results | |
def execute_query(sql_query): | |
con = duckdb.connect() | |
con.execute(f"CREATE VIEW contract_data AS SELECT * FROM '{dataset_path}'") | |
result_df = con.execute(sql_query).fetchdf() | |
con.close() | |
return result_df.to_markdown() # Convert result to markdown for display | |
# Gradio app UI | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
# Local Parquet SQL Query App | |
Query and explore the data stored in `sample_contract_df.parquet` using DuckDB and SQL queries. | |
""") | |
# Display schema | |
schema = load_dataset() | |
features = json.dumps(schema, indent=2) | |
gr.Markdown(f"### Dataset Schema:\n\n```json\n{features}\n```") | |
# User inputs for natural language query | |
query = gr.Textbox(label="Natural Language Query", placeholder="Enter a condition, e.g., 'amount > 1000'") | |
sql_out = gr.Code(label="Generated SQL Query", language="sql") | |
results_out = gr.Markdown(label="Query Results") | |
# Buttons to generate and execute SQL | |
with gr.Row(): | |
btn_generate = gr.Button("Generate SQL") | |
btn_execute = gr.Button("Execute Query") | |
# Generate SQL on button click | |
btn_generate.click( | |
fn=generate_sql_query, | |
inputs=[features, query], | |
outputs=sql_out, | |
) | |
# Execute SQL on button click | |
btn_execute.click( | |
fn=execute_query, | |
inputs=sql_out, | |
outputs=results_out, | |
) | |
# Launch the app | |
demo.launch() | |