Spaces:
Sleeping
Sleeping
# Install necessary libraries | |
!pip install gradio requests duckdb huggingface_hub | |
import json | |
import os | |
import urllib.parse | |
import gradio as gr | |
import requests | |
from huggingface_hub import InferenceClient | |
# InferenceClient setup (you must add your HF token as an environment variable in Colab) | |
client = InferenceClient( | |
"meta-llama/Meta-Llama-3.1-70B-Instruct", | |
token=os.environ["HF_TOKEN"], | |
) | |
# Function to generate iframe for dataset viewer | |
def get_iframe(hub_repo_id, sql_query=None): | |
if not hub_repo_id: | |
raise ValueError("Hub repo id is required") | |
if sql_query: | |
sql_query = urllib.parse.quote(sql_query) | |
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer?sql_console=true&sql={sql_query}" | |
else: | |
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer" | |
iframe = f""" | |
<iframe src="{url}" frameborder="0" width="100%" height="800px"></iframe> | |
""" | |
return iframe | |
# Function to fetch dataset column information | |
def get_column_info(hub_repo_id): | |
url = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}" | |
response = requests.get(url) | |
try: | |
data = response.json() | |
dataset_info = data.get("dataset_info") | |
key = list(dataset_info.keys())[0] | |
features = json.dumps(dataset_info.get(key).get("features"), indent=2) | |
except Exception as e: | |
return f"Error getting column info: {e}" | |
return features | |
# Function to generate SQL query based on natural language input | |
def query_dataset(hub_repo_id, features, query): | |
messages = [ | |
{ | |
"role": "system", | |
"content": "You are a SQL query expert assistant that returns a DuckDB SQL query based on the user's natural language query and dataset features.", | |
}, | |
{ | |
"role": "user", | |
"content": f"""table train | |
# Features | |
{features} | |
# Query | |
{query} | |
""", | |
}, | |
] | |
response = client.chat_completion( | |
messages=messages, | |
max_tokens=1000, | |
stream=False, | |
) | |
query = response.choices[0].message.content | |
return query, get_iframe(hub_repo_id, query) | |
# Gradio app UI | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
# π₯ π¦ π€ Text To SQL Hub Datasets π€ π¦ π₯ | |
Use this tool to search and query datasets on Huggingface Hub. | |
Built with DuckDB, Huggingface's Inference API, and LLaMA 3.1 70B. | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
search_in = gr.Textbox(label="Search Huggingface Hub", placeholder="Search for datasets") | |
query = gr.Textbox(label="Natural Language Query", placeholder="Enter a query to generate SQL") | |
sql_out = gr.Code(label="SQL Query", language="sql") | |
with gr.Row(): | |
btn = gr.Button("Show Dataset") | |
btn2 = gr.Button("Query Dataset") | |
with gr.Row(): | |
search_out = gr.HTML(label="Search Results") | |
features = gr.Code(label="Features", language="json") | |
# Event handling | |
btn.click(fn=get_iframe, inputs=[search_in], outputs=[search_out]) | |
btn2.click(fn=query_dataset, inputs=[search_in, features, query], outputs=[sql_out, search_out]) | |
# Launch the app | |
demo.launch() | |