Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,104 +1,93 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
import pyperclip
|
4 |
-
import openai
|
5 |
-
import os
|
6 |
-
import pandas as pd
|
7 |
-
from sqlalchemy import create_engine, inspect
|
8 |
-
from llama_index.legacy import (
|
9 |
-
VectorStoreIndex,
|
10 |
-
SQLDatabase,
|
11 |
-
ServiceContext,
|
12 |
-
)
|
13 |
-
from llama_index.legacy.indices.struct_store import NLSQLTableQueryEngine
|
14 |
-
from llama_index.legacy.llms import OpenAI
|
15 |
-
import sqlite3
|
16 |
-
|
17 |
-
# Set up OpenAI API Key
|
18 |
-
# Load the OpenAI API key from the environment variable
|
19 |
-
openai.api_key = os.getenv("OPENAI_API_KEY")
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
engine = create_engine("sqlite:///gov-contracts.db")
|
28 |
-
sql_database = SQLDatabase(engine)
|
29 |
-
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo-1106")
|
30 |
-
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")
|
31 |
-
return sql_database, service_context, engine
|
32 |
|
33 |
-
#
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
)
|
38 |
|
39 |
-
#
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
#
|
44 |
-
def
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
#
|
52 |
-
def
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
#
|
62 |
-
with gr.Blocks() as
|
63 |
-
gr.Markdown("
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
67 |
with gr.Row():
|
68 |
with gr.Column():
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
query_btn = gr.Button("Generate Query")
|
79 |
-
|
80 |
-
with gr.Column():
|
81 |
-
user_query = gr.Textbox(
|
82 |
-
label="Enter your natural language query about the database",
|
83 |
-
placeholder="Ask your question here..."
|
84 |
-
)
|
85 |
-
chat_output = gr.Textbox(
|
86 |
-
label="Generated SQL Query",
|
87 |
-
placeholder="SQL query will appear here..."
|
88 |
-
)
|
89 |
-
|
90 |
-
# Function to call on click
|
91 |
-
def query_callback(user_input, table_name, example_prompt):
|
92 |
-
return generate_response(user_input, selected_table=table_name, example_prompt=example_prompt)
|
93 |
-
|
94 |
-
# Button click event
|
95 |
-
query_btn.click(query_callback, inputs=[user_query, table_dropdown, example_prompt_box], outputs=chat_output)
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
- [GitHub](https://github.com/LNshuti)
|
101 |
-
""")
|
102 |
|
103 |
-
|
104 |
-
|
|
|
1 |
+
# Install necessary libraries
|
2 |
+
!pip install gradio requests duckdb huggingface_hub
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
import urllib.parse
|
7 |
+
import gradio as gr
|
8 |
+
import requests
|
9 |
+
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
# InferenceClient setup (you must add your HF token as an environment variable in Colab)
|
12 |
+
client = InferenceClient(
|
13 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct",
|
14 |
+
token=os.environ["HF_TOKEN"],
|
15 |
)
|
16 |
|
17 |
+
# Function to generate iframe for dataset viewer
|
18 |
+
def get_iframe(hub_repo_id, sql_query=None):
|
19 |
+
if not hub_repo_id:
|
20 |
+
raise ValueError("Hub repo id is required")
|
21 |
+
if sql_query:
|
22 |
+
sql_query = urllib.parse.quote(sql_query)
|
23 |
+
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer?sql_console=true&sql={sql_query}"
|
24 |
+
else:
|
25 |
+
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer"
|
26 |
+
iframe = f"""
|
27 |
+
<iframe src="{url}" frameborder="0" width="100%" height="800px"></iframe>
|
28 |
+
"""
|
29 |
+
return iframe
|
30 |
|
31 |
+
# Function to fetch dataset column information
|
32 |
+
def get_column_info(hub_repo_id):
|
33 |
+
url = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}"
|
34 |
+
response = requests.get(url)
|
35 |
+
try:
|
36 |
+
data = response.json()
|
37 |
+
dataset_info = data.get("dataset_info")
|
38 |
+
key = list(dataset_info.keys())[0]
|
39 |
+
features = json.dumps(dataset_info.get(key).get("features"), indent=2)
|
40 |
+
except Exception as e:
|
41 |
+
return f"Error getting column info: {e}"
|
42 |
+
return features
|
43 |
|
44 |
+
# Function to generate SQL query based on natural language input
|
45 |
+
def query_dataset(hub_repo_id, features, query):
|
46 |
+
messages = [
|
47 |
+
{
|
48 |
+
"role": "system",
|
49 |
+
"content": "You are a SQL query expert assistant that returns a DuckDB SQL query based on the user's natural language query and dataset features.",
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"role": "user",
|
53 |
+
"content": f"""table train
|
54 |
+
# Features
|
55 |
+
{features}
|
56 |
+
# Query
|
57 |
+
{query}
|
58 |
+
""",
|
59 |
+
},
|
60 |
+
]
|
61 |
+
response = client.chat_completion(
|
62 |
+
messages=messages,
|
63 |
+
max_tokens=1000,
|
64 |
+
stream=False,
|
65 |
+
)
|
66 |
+
query = response.choices[0].message.content
|
67 |
+
return query, get_iframe(hub_repo_id, query)
|
68 |
|
69 |
+
# Gradio app UI
|
70 |
+
with gr.Blocks() as demo:
|
71 |
+
gr.Markdown("""
|
72 |
+
# π₯ π¦ π€ Text To SQL Hub Datasets π€ π¦ π₯
|
73 |
+
Use this tool to search and query datasets on Huggingface Hub.
|
74 |
+
Built with DuckDB, Huggingface's Inference API, and LLaMA 3.1 70B.
|
75 |
+
""")
|
76 |
with gr.Row():
|
77 |
with gr.Column():
|
78 |
+
search_in = gr.Textbox(label="Search Huggingface Hub", placeholder="Search for datasets")
|
79 |
+
query = gr.Textbox(label="Natural Language Query", placeholder="Enter a query to generate SQL")
|
80 |
+
sql_out = gr.Code(label="SQL Query", language="sql")
|
81 |
+
with gr.Row():
|
82 |
+
btn = gr.Button("Show Dataset")
|
83 |
+
btn2 = gr.Button("Query Dataset")
|
84 |
+
with gr.Row():
|
85 |
+
search_out = gr.HTML(label="Search Results")
|
86 |
+
features = gr.Code(label="Features", language="json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
# Event handling
|
89 |
+
btn.click(fn=get_iframe, inputs=[search_in], outputs=[search_out])
|
90 |
+
btn2.click(fn=query_dataset, inputs=[search_in, features, query], outputs=[sql_out, search_out])
|
|
|
|
|
91 |
|
92 |
+
# Launch the app
|
93 |
+
demo.launch()
|