import { AsyncDuckDB, AsyncDuckDBConnection } from "@duckdb/duckdb-wasm"; import { useCallback, useEffect, useRef, useState } from "react"; // https://duckdb.org/docs/api/wasm/query#arrow-table-to-json // TODO: import the arrow lib and use the correct type // eslint-disable-next-line @typescript-eslint/no-explicit-any const arrowResultToJson = (arrowResult: any) => { // eslint-disable-next-line @typescript-eslint/no-explicit-any return arrowResult.toArray().map((row: any) => row.toJSON()) || []; }; export const useParquetTable = ( db: AsyncDuckDB | null, { datasetUrl, datasetQuery, setDatasetQuery: _setDatasetQuery, onQueryChanged, }: { datasetUrl: string; datasetQuery: string; setDatasetQuery: (query: string) => void; onQueryChanged?: (query: string) => void; } ) => { const [datasetLoaded, setDatasetLoaded] = useState(false); const [loading, setLoading] = useState(false); const [querying, setQuerying] = useState(false); const [dataset, setDataset] = useState(null); const [error, setError] = useState(null); const onQueryChangedRef = useRef(onQueryChanged); useEffect(() => { onQueryChangedRef.current = onQueryChanged; }, [onQueryChanged]); const datasetQueryRef = useRef(datasetQuery); useEffect(() => { datasetQueryRef.current = datasetQuery; }, [datasetQuery]); const setDatasetQuery = useCallback( (query: string) => { _setDatasetQuery(query); onQueryChangedRef.current?.(query); }, [_setDatasetQuery, onQueryChangedRef] ); const clearDataset = useCallback(() => { setDataset(null); setError(null); }, []); const loadDataset = useCallback(() => { if (db && datasetUrl) { setLoading(true); setDataset(null); setError(null); setDatasetLoaded(false); console.log("Loading", datasetUrl); const status: { conn: AsyncDuckDBConnection | null; killed: boolean } = { conn: null, killed: false, }; db.connect().then((conn) => { if (status.killed) { conn.close(); return; } status.conn = conn; conn // we get httpfs for free with cors restrictions // we get parquet downloaded as we load it // https://duckdb.org/docs/api/wasm/extensions // https://duckdb.org/docs/api/wasm/data_ingestion#parquet .query( `LOAD parquet;LOAD httpfs;DROP TABLE IF EXISTS dataset;CREATE TABLE dataset AS SELECT * FROM '${datasetUrl}';` ) .then(() => { setDatasetLoaded(true); if (!datasetQueryRef.current) { console.log("Setting default query"); setDatasetQuery(`SELECT * FROM dataset LIMIT 10;`); } }) .catch((err) => { console.error(err); setError(err.message); setDataset(null); setDatasetLoaded(false); setDatasetQuery(""); }) .finally(() => { conn.close(); setLoading(false); }); }); return () => { status.killed = true; if (status.conn) { console.log("Closing connection"); status.conn.close(); } }; } else if (db) { console.log("Resetting db"); setDataset(null); setError(null); setDatasetLoaded(false); setDatasetQuery(""); db.reset(); } }, [db, datasetUrl, setDatasetQuery, datasetQueryRef]); const performQuery = useCallback( (query: string) => { if (db && datasetLoaded) { setQuerying(true); db.connect() .then((conn) => { conn .query(query) .then((result) => setDataset(arrowResultToJson(result))) .catch(setError); }) .finally(() => setQuerying(false)); } }, [db, datasetLoaded] ); useEffect(() => { if (datasetLoaded && datasetQuery) { performQuery(datasetQuery); } }, [datasetLoaded, datasetQuery, performQuery]); useEffect(() => { loadDataset(); }, [loadDataset]); return { loading, dataset, error, clearDataset, loadDataset, querying, performQuery, }; };