LeonceNsh's picture
Update app.py
06f01b3 verified
raw
history blame
3.21 kB
# Install necessary libraries
!pip install gradio requests duckdb huggingface_hub
import json
import os
import urllib.parse
import gradio as gr
import requests
from huggingface_hub import InferenceClient
# InferenceClient setup (you must add your HF token as an environment variable in Colab)
client = InferenceClient(
"meta-llama/Meta-Llama-3.1-70B-Instruct",
token=os.environ["HF_TOKEN"],
)
# Function to generate iframe for dataset viewer
def get_iframe(hub_repo_id, sql_query=None):
if not hub_repo_id:
raise ValueError("Hub repo id is required")
if sql_query:
sql_query = urllib.parse.quote(sql_query)
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer?sql_console=true&sql={sql_query}"
else:
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer"
iframe = f"""
<iframe src="{url}" frameborder="0" width="100%" height="800px"></iframe>
"""
return iframe
# Function to fetch dataset column information
def get_column_info(hub_repo_id):
url = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}"
response = requests.get(url)
try:
data = response.json()
dataset_info = data.get("dataset_info")
key = list(dataset_info.keys())[0]
features = json.dumps(dataset_info.get(key).get("features"), indent=2)
except Exception as e:
return f"Error getting column info: {e}"
return features
# Function to generate SQL query based on natural language input
def query_dataset(hub_repo_id, features, query):
messages = [
{
"role": "system",
"content": "You are a SQL query expert assistant that returns a DuckDB SQL query based on the user's natural language query and dataset features.",
},
{
"role": "user",
"content": f"""table train
# Features
{features}
# Query
{query}
""",
},
]
response = client.chat_completion(
messages=messages,
max_tokens=1000,
stream=False,
)
query = response.choices[0].message.content
return query, get_iframe(hub_repo_id, query)
# Gradio app UI
with gr.Blocks() as demo:
gr.Markdown("""
# πŸ₯ πŸ¦™ πŸ€— Text To SQL Hub Datasets πŸ€— πŸ¦™ πŸ₯
Use this tool to search and query datasets on Huggingface Hub.
Built with DuckDB, Huggingface's Inference API, and LLaMA 3.1 70B.
""")
with gr.Row():
with gr.Column():
search_in = gr.Textbox(label="Search Huggingface Hub", placeholder="Search for datasets")
query = gr.Textbox(label="Natural Language Query", placeholder="Enter a query to generate SQL")
sql_out = gr.Code(label="SQL Query", language="sql")
with gr.Row():
btn = gr.Button("Show Dataset")
btn2 = gr.Button("Query Dataset")
with gr.Row():
search_out = gr.HTML(label="Search Results")
features = gr.Code(label="Features", language="json")
# Event handling
btn.click(fn=get_iframe, inputs=[search_in], outputs=[search_out])
btn2.click(fn=query_dataset, inputs=[search_in, features, query], outputs=[sql_out, search_out])
# Launch the app
demo.launch()