davidberenstein1957's picture
docs: update title
bde0dbe
raw
history blame
3.67 kB
import json
import os
import urllib.parse
import gradio as gr
import requests
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from huggingface_hub import InferenceClient
example = HuggingfaceHubSearch().example_value()
client = InferenceClient(
"meta-llama/Meta-Llama-3.1-70B-Instruct",
token=os.environ["HF_TOKEN"],
)
def get_iframe(hub_repo_id, sql_query=None):
if sql_query:
sql_query = urllib.parse.quote(sql_query)
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer?sql_console=true&sql={sql_query}"
else:
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer"
iframe = f"""
<iframe
src="{url}"
frameborder="0"
width="100%"
height="800px"
></iframe>
"""
return iframe
def get_column_info(hub_repo_id):
url: str = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}"
response = requests.get(url)
try:
data = response.json()
data = data.get("dataset_info")
key = list(data.keys())[0]
features: str = json.dumps(data.get(key).get("features"))
except Exception as e:
gr.Error(f"Error getting column info: {e}")
return features
def query_dataset(hub_repo_id, features, query):
messages = [
{
"role": "system",
"content": "You are a helpful assistant that returns a DuckDB SQL query based on the user's query and dataset features. Only return the SQL query, no other text.",
},
{
"role": "user",
"content": f"""table train
# Features
{features}
# Query
{query}
""",
},
]
response = client.chat_completion(
messages=messages,
max_tokens=1000,
stream=False,
)
query = response.choices[0].message.content
return query, get_iframe(hub_repo_id, query)
with gr.Blocks() as demo:
gr.Markdown("""# πŸ₯ πŸ¦™ πŸ€— Text To SQL Hub Datasets πŸ€— πŸ¦™ πŸ₯
This is a basic text to SQL tool that allows you to query datasets on Huggingface Hub.
It is built with [DuckDB](https://duckdb.org/), [Huggingface's Inference API](https://huggingface.co/docs/api-inference/index), and [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct).
Also, it uses the [dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset).
""")
with gr.Row():
with gr.Column():
search_in = HuggingfaceHubSearch(
label="Search Huggingface Hub",
placeholder="Search for models on Huggingface",
search_type="dataset",
)
btn = gr.Button("Show Dataset")
with gr.Row():
search_out = gr.HTML(label="Search Results")
with gr.Row():
features = gr.Code(label="Features", language="json", visible=False)
with gr.Row():
query = gr.Textbox(
label="Natural Language Query",
placeholder="Enter a natural language query to generate SQL",
)
with gr.Row():
sql_out = gr.Code(label="SQL Query")
with gr.Row():
btn2 = gr.Button("Query Dataset")
gr.on(
[btn.click, search_in.submit],
fn=get_iframe,
inputs=[search_in],
outputs=[search_out],
).then(
fn=get_column_info,
inputs=[search_in],
outputs=[features],
)
btn2.click(
fn=query_dataset,
inputs=[search_in, features, query],
outputs=[sql_out, search_out],
)
if __name__ == "__main__":
demo.launch()