Tony Powell commited on
Commit
5534d69
·
1 Parent(s): 843fee0

Move parquet table loading into custom hook

Browse files
Files changed (2) hide show
  1. src/App.tsx +12 -64
  2. src/useParquetTable.ts +76 -0
src/App.tsx CHANGED
@@ -1,22 +1,14 @@
1
- import { useCallback, useEffect, useState } from "react";
2
  import { Table } from "./table";
3
- import { AsyncDuckDBConnection } from "@duckdb/duckdb-wasm";
4
  import { Button } from "~/components/ui/button";
5
  import { Textarea } from "~/components/ui/textarea";
6
  import { useSearchParams } from "~/useSearchParams";
7
  import { useDuckDb } from "~/useDuckDb";
 
8
 
9
  const DEFAULT_DATASET_URL =
10
  "https://huggingface.co/datasets/openai/openai_humaneval/resolve/main/openai_humaneval/test-00000-of-00001.parquet";
11
 
12
- // https://duckdb.org/docs/api/wasm/query#arrow-table-to-json
13
- // TODO: import the arrow lib and use the correct type
14
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
15
- const arrowResultToJson = (arrowResult: any) => {
16
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
17
- return arrowResult.toArray().map((row: any) => row.toJSON());
18
- };
19
-
20
  const usePersistedTextfield = (fieldName: string) => {
21
  const { searchParams, setSearchParams } = useSearchParams();
22
  const searchParamsObj = new URLSearchParams(searchParams);
@@ -31,61 +23,13 @@ const usePersistedTextfield = (fieldName: string) => {
31
  };
32
 
33
  function App() {
34
- const [loading, setLoading] = useState(false);
35
  const [datasetUrl, setDatasetUrl] = usePersistedTextfield("datasetUrl");
36
  const [nextDatasetUrl, setNextDatasetUrl] = useState(() => datasetUrl);
37
- const [dataset, setDataset] = useState<null>(null);
38
- const [error, setError] = useState<string | null>(null);
39
  const db = useDuckDb();
40
-
41
- useEffect(() => {
42
- if (db && datasetUrl) {
43
- setLoading(true);
44
- setDataset(null);
45
- setError(null);
46
- console.log("Loading", datasetUrl);
47
- const status: { conn: AsyncDuckDBConnection | null; killed: boolean } = {
48
- conn: null,
49
- killed: false,
50
- };
51
- db.connect().then((conn) => {
52
- if (status.killed) {
53
- conn.close();
54
- return;
55
- }
56
- status.conn = conn;
57
- conn
58
- // we get httpfs for free with cors restrictions
59
- // we get parquet downloaded as we load it
60
- // https://duckdb.org/docs/api/wasm/extensions
61
- // https://duckdb.org/docs/api/wasm/data_ingestion#parquet
62
- .query(
63
- `LOAD parquet;LOAD httpfs;SELECT * FROM '${datasetUrl}' LIMIT 10`
64
- )
65
- .then((result) => setDataset(arrowResultToJson(result)))
66
- .catch((err) => {
67
- console.error(err);
68
- setError(err.message);
69
- setDataset(null);
70
- })
71
- .finally(() => {
72
- conn.close();
73
- setLoading(false);
74
- });
75
- });
76
-
77
- return () => {
78
- status.killed = true;
79
- if (status.conn) {
80
- console.log("Closing connection");
81
- status.conn.close();
82
- }
83
- };
84
- } else if (db) {
85
- console.log("Resetting db");
86
- db.reset();
87
- }
88
- }, [db, datasetUrl]);
89
 
90
  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
91
  e.preventDefault();
@@ -111,7 +55,11 @@ function App() {
111
  <Button
112
  className="w-full"
113
  type="submit"
114
- disabled={!nextDatasetUrl || nextDatasetUrl === datasetUrl}
 
 
 
 
115
  >
116
  Load
117
  </Button>
@@ -130,7 +78,7 @@ function App() {
130
  variant={"destructive"}
131
  onClick={() => {
132
  setNextDatasetUrl("");
133
- setDataset(null);
134
  setDatasetUrl("");
135
  }}
136
  >
 
1
+ import { useCallback, useState } from "react";
2
  import { Table } from "./table";
 
3
  import { Button } from "~/components/ui/button";
4
  import { Textarea } from "~/components/ui/textarea";
5
  import { useSearchParams } from "~/useSearchParams";
6
  import { useDuckDb } from "~/useDuckDb";
7
+ import { useParquetTable } from "~/useParquetTable";
8
 
9
  const DEFAULT_DATASET_URL =
10
  "https://huggingface.co/datasets/openai/openai_humaneval/resolve/main/openai_humaneval/test-00000-of-00001.parquet";
11
 
 
 
 
 
 
 
 
 
12
  const usePersistedTextfield = (fieldName: string) => {
13
  const { searchParams, setSearchParams } = useSearchParams();
14
  const searchParamsObj = new URLSearchParams(searchParams);
 
23
  };
24
 
25
  function App() {
 
26
  const [datasetUrl, setDatasetUrl] = usePersistedTextfield("datasetUrl");
27
  const [nextDatasetUrl, setNextDatasetUrl] = useState(() => datasetUrl);
 
 
28
  const db = useDuckDb();
29
+ const { loading, dataset, error, clearDataset } = useParquetTable(
30
+ db,
31
+ datasetUrl
32
+ );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
35
  e.preventDefault();
 
55
  <Button
56
  className="w-full"
57
  type="submit"
58
+ disabled={
59
+ !nextDatasetUrl ||
60
+ (nextDatasetUrl === datasetUrl && !!dataset) ||
61
+ loading
62
+ }
63
  >
64
  Load
65
  </Button>
 
78
  variant={"destructive"}
79
  onClick={() => {
80
  setNextDatasetUrl("");
81
+ clearDataset();
82
  setDatasetUrl("");
83
  }}
84
  >
src/useParquetTable.ts ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { AsyncDuckDB, AsyncDuckDBConnection } from "@duckdb/duckdb-wasm";
2
+ import { useCallback, useEffect, useState } from "react";
3
+
4
+ // https://duckdb.org/docs/api/wasm/query#arrow-table-to-json
5
+ // TODO: import the arrow lib and use the correct type
6
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
7
+ const arrowResultToJson = (arrowResult: any) => {
8
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
9
+ return arrowResult.toArray().map((row: any) => row.toJSON());
10
+ };
11
+
12
+ export const useParquetTable = (db: AsyncDuckDB | null, datasetUrl: string) => {
13
+ const [loading, setLoading] = useState(false);
14
+ const [dataset, setDataset] = useState<null>(null);
15
+ const [error, setError] = useState<string | null>(null);
16
+
17
+ const clearDataset = useCallback(() => {
18
+ setDataset(null);
19
+ setError(null);
20
+ }, []);
21
+
22
+ const loadDataset = useCallback(() => {
23
+ if (db && datasetUrl) {
24
+ setLoading(true);
25
+ setDataset(null);
26
+ setError(null);
27
+ console.log("Loading", datasetUrl);
28
+ const status: { conn: AsyncDuckDBConnection | null; killed: boolean } = {
29
+ conn: null,
30
+ killed: false,
31
+ };
32
+ db.connect().then((conn) => {
33
+ if (status.killed) {
34
+ conn.close();
35
+ return;
36
+ }
37
+ status.conn = conn;
38
+ conn
39
+ // we get httpfs for free with cors restrictions
40
+ // we get parquet downloaded as we load it
41
+ // https://duckdb.org/docs/api/wasm/extensions
42
+ // https://duckdb.org/docs/api/wasm/data_ingestion#parquet
43
+ .query(
44
+ `LOAD parquet;LOAD httpfs;SELECT * FROM '${datasetUrl}' LIMIT 10`
45
+ )
46
+ .then((result) => setDataset(arrowResultToJson(result)))
47
+ .catch((err) => {
48
+ console.error(err);
49
+ setError(err.message);
50
+ setDataset(null);
51
+ })
52
+ .finally(() => {
53
+ conn.close();
54
+ setLoading(false);
55
+ });
56
+ });
57
+
58
+ return () => {
59
+ status.killed = true;
60
+ if (status.conn) {
61
+ console.log("Closing connection");
62
+ status.conn.close();
63
+ }
64
+ };
65
+ } else if (db) {
66
+ console.log("Resetting db");
67
+ db.reset();
68
+ }
69
+ }, [db, datasetUrl]);
70
+
71
+ useEffect(() => {
72
+ loadDataset();
73
+ }, [loadDataset]);
74
+
75
+ return { loading, dataset, error, clearDataset, loadDataset };
76
+ };