Spaces:
Runtime error
Runtime error
import gradio as gr | |
import duckdb | |
from huggingface_hub import HfFileSystem | |
from huggingface_hub.hf_file_system import safe_quote | |
import pandas as pd | |
fs = HfFileSystem() | |
duckdb.register_filesystem(fs) | |
dataset="glue" | |
PARQUET_REVISION="refs/convert/parquet" | |
path=f"mnli/glue-train.parquet" # Only from one split | |
# path="mnli/*.parquet" # To read all parquets but it should be grouped by split getting from datasets server | |
location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}" | |
print(location) | |
def greet(dataset, config, split, sql): | |
try: | |
result = duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").to_df() | |
print("QUERY SUCCESSED") | |
except Exception as error: | |
print(f"Error: {str(error)}") | |
return pd.DataFrame({"Error": [f"❌ {str(error)}"]}) | |
return result | |
with gr.Blocks() as demo: | |
gr.Markdown(" ## DuckDB demo using parquet revision") | |
dataset = gr.Textbox(label="dataset", placeholder="mstz/iris") | |
config = gr.Textbox(label="config", placeholder="iris") | |
split = gr.Textbox(label="split", placeholder="train") | |
sql = gr.Textbox(label="sql", placeholder="SELECT 1") | |
run_button = gr.Button("Run") | |
gr.Markdown("### Result") | |
cached_responses_table = gr.DataFrame() | |
run_button.click(greet, inputs=[dataset, config, split, sql], outputs=cached_responses_table) | |
if __name__ == "__main__": | |
demo.launch() | |
# duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").show() | |
# duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2") | |
# duckdb.query(f"SELECT max(idx) as max FROM '{location}' LIMIT 2") | |
# duckdb.query(f"SELECT idx FROM '{location}' ORDER BY idx DESC LIMIT 1").show() | |