Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,53 +1,29 @@
|
|
1 |
-
import json
|
2 |
import os
|
3 |
-
import
|
4 |
import gradio as gr
|
5 |
-
import
|
6 |
-
from huggingface_hub import InferenceClient
|
7 |
-
|
8 |
-
# InferenceClient setup (you must add your HF token as an environment variable in Colab)
|
9 |
-
client = InferenceClient(
|
10 |
-
"meta-llama/Meta-Llama-3.1-70B-Instruct",
|
11 |
-
token=os.environ["HF_TOKEN"],
|
12 |
-
)
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
if not hub_repo_id:
|
17 |
-
raise ValueError("Hub repo id is required")
|
18 |
-
if sql_query:
|
19 |
-
sql_query = urllib.parse.quote(sql_query)
|
20 |
-
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer?sql_console=true&sql={sql_query}"
|
21 |
-
else:
|
22 |
-
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer"
|
23 |
-
iframe = f"""
|
24 |
-
<iframe src="{url}" frameborder="0" width="100%" height="800px"></iframe>
|
25 |
-
"""
|
26 |
-
return iframe
|
27 |
|
28 |
-
#
|
29 |
-
def
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
key = list(dataset_info.keys())[0]
|
36 |
-
features = json.dumps(dataset_info.get(key).get("features"), indent=2)
|
37 |
-
except Exception as e:
|
38 |
-
return f"Error getting column info: {e}"
|
39 |
-
return features
|
40 |
|
41 |
-
#
|
42 |
-
def
|
43 |
messages = [
|
44 |
{
|
45 |
"role": "system",
|
46 |
-
"content": "You are a SQL query expert assistant that
|
47 |
},
|
48 |
{
|
49 |
"role": "user",
|
50 |
-
"content": f"""table
|
51 |
# Features
|
52 |
{features}
|
53 |
# Query
|
@@ -55,36 +31,53 @@ def query_dataset(hub_repo_id, features, query):
|
|
55 |
""",
|
56 |
},
|
57 |
]
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
65 |
|
66 |
# Gradio app UI
|
67 |
with gr.Blocks() as demo:
|
68 |
gr.Markdown("""
|
69 |
-
#
|
70 |
-
|
71 |
-
Built with DuckDB, Huggingface's Inference API, and LLaMA 3.1 70B.
|
72 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
with gr.Row():
|
74 |
-
|
75 |
-
|
76 |
-
query = gr.Textbox(label="Natural Language Query", placeholder="Enter a query to generate SQL")
|
77 |
-
sql_out = gr.Code(label="SQL Query", language="sql")
|
78 |
-
with gr.Row():
|
79 |
-
btn = gr.Button("Show Dataset")
|
80 |
-
btn2 = gr.Button("Query Dataset")
|
81 |
-
with gr.Row():
|
82 |
-
search_out = gr.HTML(label="Search Results")
|
83 |
-
features = gr.Code(label="Features", language="json")
|
84 |
|
85 |
-
#
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
# Launch the app
|
90 |
demo.launch()
|
|
|
|
|
1 |
import os
|
2 |
+
import json
|
3 |
import gradio as gr
|
4 |
+
import duckdb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
# Load the Parquet dataset
|
7 |
+
dataset_path = '/content/sample_contract_df.parquet' # Update with your Parquet file's path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# Load the dataset with DuckDB and get schema information
|
10 |
+
def load_dataset():
|
11 |
+
con = duckdb.connect()
|
12 |
+
con.execute(f"CREATE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
|
13 |
+
schema = con.execute("DESCRIBE contract_data").fetchdf()
|
14 |
+
con.close()
|
15 |
+
return schema.to_dict(orient="records")
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# Generate SQL based on schema and user query
|
18 |
+
def generate_sql_query(features, query):
|
19 |
messages = [
|
20 |
{
|
21 |
"role": "system",
|
22 |
+
"content": "You are a SQL query expert assistant that generates DuckDB SQL queries based on the user's natural language query and dataset schema.",
|
23 |
},
|
24 |
{
|
25 |
"role": "user",
|
26 |
+
"content": f"""table contract_data
|
27 |
# Features
|
28 |
{features}
|
29 |
# Query
|
|
|
31 |
""",
|
32 |
},
|
33 |
]
|
34 |
+
# Here we use DuckDB directly instead of an external API
|
35 |
+
sql_query = f"SELECT * FROM contract_data WHERE {query}" # Simple example; adapt for complex queries
|
36 |
+
return sql_query
|
37 |
+
|
38 |
+
# Execute the SQL query and display results
|
39 |
+
def execute_query(sql_query):
|
40 |
+
con = duckdb.connect()
|
41 |
+
con.execute(f"CREATE VIEW contract_data AS SELECT * FROM '{dataset_path}'")
|
42 |
+
result_df = con.execute(sql_query).fetchdf()
|
43 |
+
con.close()
|
44 |
+
return result_df.to_markdown() # Convert result to markdown for display
|
45 |
|
46 |
# Gradio app UI
|
47 |
with gr.Blocks() as demo:
|
48 |
gr.Markdown("""
|
49 |
+
# Local Parquet SQL Query App
|
50 |
+
Query and explore the data stored in `sample_contract_df.parquet` using DuckDB and SQL queries.
|
|
|
51 |
""")
|
52 |
+
|
53 |
+
# Display schema
|
54 |
+
schema = load_dataset()
|
55 |
+
features = json.dumps(schema, indent=2)
|
56 |
+
gr.Markdown(f"### Dataset Schema:\n\n```json\n{features}\n```")
|
57 |
+
|
58 |
+
# User inputs for natural language query
|
59 |
+
query = gr.Textbox(label="Natural Language Query", placeholder="Enter a condition, e.g., 'amount > 1000'")
|
60 |
+
sql_out = gr.Code(label="Generated SQL Query", language="sql")
|
61 |
+
results_out = gr.Markdown(label="Query Results")
|
62 |
+
|
63 |
+
# Buttons to generate and execute SQL
|
64 |
with gr.Row():
|
65 |
+
btn_generate = gr.Button("Generate SQL")
|
66 |
+
btn_execute = gr.Button("Execute Query")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
+
# Generate SQL on button click
|
69 |
+
btn_generate.click(
|
70 |
+
fn=generate_sql_query,
|
71 |
+
inputs=[features, query],
|
72 |
+
outputs=sql_out,
|
73 |
+
)
|
74 |
+
|
75 |
+
# Execute SQL on button click
|
76 |
+
btn_execute.click(
|
77 |
+
fn=execute_query,
|
78 |
+
inputs=sql_out,
|
79 |
+
outputs=results_out,
|
80 |
+
)
|
81 |
|
82 |
# Launch the app
|
83 |
demo.launch()
|