Spaces:

asoria
/

duckdb-parquet-demo

Runtime error

App Files Files Community

duckdb-parquet-demo / app.py

asoria HF Staff

Trying to run query

9b95e7f about 2 years ago

raw

history blame

1.74 kB

	import gradio as gr
	import duckdb
	from huggingface_hub import HfFileSystem
	from huggingface_hub.hf_file_system import safe_quote
	import pandas as pd

	fs = HfFileSystem()
	duckdb.register_filesystem(fs)
	dataset="glue"
	PARQUET_REVISION="refs/convert/parquet"
	path=f"mnli/glue-train.parquet" # Only from one split
	# path="mnli/*.parquet" # To read all parquets but it should be grouped by split getting from datasets server
	location=f"hf://datasets/{dataset}@{safe_quote(PARQUET_REVISION)}/{path}"
	print(location)


	def greet(dataset, config, split, sql):
	try:
	result = duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").to_df()
	print("QUERY SUCCESSED")
	except Exception as error:
	print(f"Error: {str(error)}")
	return pd.DataFrame({"Error": [f"❌ {str(error)}"]})
	return result

	with gr.Blocks() as demo:
	gr.Markdown(" ## DuckDB demo using parquet revision")
	dataset = gr.Textbox(label="dataset", placeholder="mstz/iris")
	config = gr.Textbox(label="config", placeholder="iris")
	split = gr.Textbox(label="split", placeholder="train")
	sql = gr.Textbox(label="sql", placeholder="SELECT 1")
	run_button = gr.Button("Run")
	gr.Markdown("### Result")
	cached_responses_table = gr.DataFrame()
	run_button.click(greet, inputs=[dataset, config, split, sql], outputs=cached_responses_table)



	if __name__ == "__main__":
	demo.launch()




	# duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2").show()

	# duckdb.query(f"SELECT idx as id, premise as p FROM '{location}' LIMIT 2")
	# duckdb.query(f"SELECT max(idx) as max FROM '{location}' LIMIT 2")
	# duckdb.query(f"SELECT idx FROM '{location}' ORDER BY idx DESC LIMIT 1").show()