julien-c HF staff commited on
Commit
7349b7e
Β·
verified Β·
1 Parent(s): 05db436

Working prototype

Browse files
Files changed (3) hide show
  1. README.md +4 -4
  2. encoder.py +9 -1
  3. index.html +20 -6
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Gpt2 Tokenizer
3
- emoji: πŸŒ–
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: static
7
  pinned: false
8
  ---
 
1
  ---
2
+ title: Pyodide GPT-2 Tokenizer
3
+ emoji: 🐍
4
+ colorFrom: green
5
+ colorTo: green
6
  sdk: static
7
  pinned: false
8
  ---
encoder.py CHANGED
@@ -114,4 +114,12 @@ def get_encoder(model_name, models_dir):
114
  return Encoder(
115
  encoder=encoder,
116
  bpe_merges=bpe_merges,
117
- )
 
 
 
 
 
 
 
 
 
114
  return Encoder(
115
  encoder=encoder,
116
  bpe_merges=bpe_merges,
117
+ )
118
+
119
+ def get_encoder_from_strings(vocab, bpe_data):
120
+ encoder = json.loads(vocab)
121
+ bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]]
122
+ return Encoder(
123
+ encoder=encoder,
124
+ bpe_merges=bpe_merges,
125
+ )
index.html CHANGED
@@ -7,15 +7,29 @@
7
  Pyodide test page <br>
8
  Open your browser console to see Pyodide output
9
  <script type="text/javascript">
10
- (async function main(){
 
 
 
 
 
 
 
 
 
 
11
  const pyodide = await loadPyodide({
12
  indexURL : "https://cdn.jsdelivr.net/pyodide/v0.19.1/full/"
13
  });
14
- console.log(pyodide.runPython(`
15
- import sys
16
- sys.version
17
- `));
18
- console.log(pyodide.runPython("print(1 + 2)"));
 
 
 
 
19
  })();
20
  </script>
21
  </body>
 
7
  Pyodide test page <br>
8
  Open your browser console to see Pyodide output
9
  <script type="text/javascript">
10
+ const URL_VOCAB = "https://huggingface.co/gpt2/resolve/main/vocab.json";
11
+ const URL_MERGES = "https://huggingface.co/gpt2/resolve/main/merges.txt";
12
+
13
+
14
+ (async function main() {
15
+ const vocab = await (await fetch(URL_VOCAB)).text();
16
+ const merges = await (await fetch(URL_MERGES)).text();
17
+
18
+ const py_code = await (await fetch("./encoder.py")).text();
19
+ const c = console;
20
+
21
  const pyodide = await loadPyodide({
22
  indexURL : "https://cdn.jsdelivr.net/pyodide/v0.19.1/full/"
23
  });
24
+
25
+ await pyodide.loadPackagesFromImports(py_code);
26
+ pyodide.runPython(py_code);
27
+
28
+ pyodide.globals.set("vocab", vocab);
29
+ pyodide.globals.set("merges", merges);
30
+ pyodide.runPython(`encoder = get_encoder_from_strings(vocab, merges)`);
31
+ const out = pyodide.runPython(`encoder.encode(${JSON.stringify("Hello my name is")})`);
32
+ c.log(Array.from(out));
33
  })();
34
  </script>
35
  </body>