asoria's picture
asoria HF Staff
Adding parameters
554bcd2
raw
history blame
2 kB
import gradio as gr
import duckdb
from huggingface_hub import HfFileSystem
from huggingface_hub.hf_file_system import safe_quote
import pandas as pd
PARQUET_REVISION="refs/convert/parquet"
TABLE_WILDCARD="{table}"
fs = HfFileSystem()
duckdb.register_filesystem(fs)
def greet(dataset, config, split, sql):
try:
if TABLE_WILDCARD not in sql:
raise Exception(f"Query must contains {TABLE_WILDCARD} wildcard.")
# dataset="glue"
# config="mnli"
path=f"{config}/{dataset}-{split}.parquet" # Only from one split
location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
print(location)
sql = sql.replace(TABLE_WILDCARD, f"'{location}'")
# result = duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").to_df()
result = duckdb.query(sql).to_df()
print("QUERY SUCCESSED")
except Exception as error:
print(f"Error: {str(error)}")
return pd.DataFrame({"Error": [f"❌ {str(error)}"]})
return result
with gr.Blocks() as demo:
gr.Markdown(" ## DuckDB demo using parquet revision")
dataset = gr.Textbox(label="dataset", placeholder="mstz/iris")
config = gr.Textbox(label="config", placeholder="iris")
split = gr.Textbox(label="split", placeholder="train")
sql = gr.Textbox(label="sql", placeholder=f"SELECT sepal_length FROM {TABLE_WILDCARD} LIMIT 3")
run_button = gr.Button("Run")
gr.Markdown("### Result")
cached_responses_table = gr.DataFrame()
run_button.click(greet, inputs=[dataset, config, split, sql], outputs=cached_responses_table)
if __name__ == "__main__":
demo.launch()
# duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").show()
# duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2")
# duckdb.query(f"SELECT max(idx) as max FROM '{location}' LIMIT 2")
# duckdb.query(f"SELECT idx FROM '{location}' ORDER BY idx DESC LIMIT 1").show()