File size: 4,098 Bytes
34cfb9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b7a4b4
 
34cfb9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b7a4b4
 
 
34cfb9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b7a4b4
34cfb9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b7a4b4
 
34cfb9f
0b7a4b4
 
34cfb9f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import { useEffect, useState } from "react";
import "./App.css";
import { createDb } from "./duck";
import { Table } from "./table";
import { AsyncDuckDBConnection } from "@duckdb/duckdb-wasm";

const DEFAULT_DATASET_URL =
  "https://huggingface.co/datasets/openai/openai_humaneval/resolve/main/openai_humaneval/test-00000-of-00001.parquet";

// https://duckdb.org/docs/api/wasm/query#arrow-table-to-json
// TODO: import the arrow lib and use the correct type
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const arrowResultToJson = (arrowResult: any) => {
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  return arrowResult.toArray().map((row: any) => row.toJSON());
};

function App() {
  const [loading, setLoading] = useState(false);
  const [datasetUrl, setDatasetUrl] = useState("");
  const [textField, setTextField] = useState("");
  const [db, setDb] = useState<Awaited<ReturnType<typeof createDb>> | null>(
    null
  );
  const [dataset, setDataset] = useState<null>(null);
  const [error, setError] = useState<string | null>(null);
  useEffect(() => {
    const status = {
      killed: false,
    };
    const initDb = async () => {
      const db = await createDb();
      if (status.killed) {
        db.terminate();
        return;
      }
      setDb(db);
    };
    initDb();

    return () => {
      status.killed = true;
      setDb((db) => {
        if (db) {
          db.terminate();
        }
        return null;
      });
    };
  }, []);

  useEffect(() => {
    if (db && datasetUrl) {
      setLoading(true);
      setDataset(null);
      setError(null);
      console.log("Loading", datasetUrl);
      const status: { conn: AsyncDuckDBConnection | null; killed: boolean } = {
        conn: null,
        killed: false,
      };
      db.connect().then((conn) => {
        if (status.killed) {
          conn.close();
          return;
        }
        status.conn = conn;
        conn
          // we get httpfs for free with cors restrictions
          // we get parquet downloaded as we load it
          // https://duckdb.org/docs/api/wasm/extensions
          // https://duckdb.org/docs/api/wasm/data_ingestion#parquet
          .query(
            `LOAD parquet;LOAD httpfs;SELECT * FROM '${datasetUrl}' LIMIT 10`
          )
          .then((result) => setDataset(arrowResultToJson(result)))
          .catch((err) => {
            console.error(err);
            setError(err.message);
            setDataset(null);
          })
          .finally(() => {
            conn.close();
            setLoading(false);
          });
      });

      return () => {
        status.killed = true;
        if (status.conn) {
          console.log("Closing connection");
          status.conn.close();
        }
      };
    } else if (db) {
      console.log("Resetting db");
      db.reset();
    }
  }, [db, datasetUrl]);

  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
    e.preventDefault();
    setDatasetUrl(textField);
  };

  return (
    <>
      <h1>Quack</h1>
      <h2>Enter a hugging face dataset url</h2>
      <form onSubmit={handleSubmit}>
        <input
          type="text"
          name="textField"
          onChange={(e) => setTextField(e.target.value)}
          value={textField}
        />
        <button type="submit" disabled={!textField || textField === datasetUrl}>
          Load
        </button>
        <button
          type="button"
          onClick={() => {
            setTextField(DEFAULT_DATASET_URL);
            setDatasetUrl(DEFAULT_DATASET_URL);
          }}
        >
          Load Default
        </button>
        <button
          type="button"
          onClick={() => {
            setTextField("");
            setDataset(null);
            setDatasetUrl("");
          }}
        >
          Clear
        </button>
      </form>
      <div>
        <h2>Dataset</h2>
        {loading && <p>Loading...</p>}
        {error && <p style={{ color: "red" }}>{error}</p>}
        {dataset && <Table data={dataset} />}
      </div>
    </>
  );
}

export default App;