jonasknobloch commited on
Commit
f75c015
·
verified ·
1 Parent(s): 31976f9

Upload 5 files

Browse files
Files changed (4) hide show
  1. app.js +105 -0
  2. index.html +35 -212
  3. main.wasm +2 -2
  4. style.css +176 -0
app.js ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const go = new Go();
2
+
3
+ document.addEventListener("DOMContentLoaded", function () {
4
+ const tokenizeButton = document.getElementById("js-tokenize");
5
+ const loadingMessage = document.getElementById("js-message");
6
+
7
+ WebAssembly.instantiateStreaming(fetch("main.wasm"), go.importObject).then(result => {
8
+ go.run(result.instance);
9
+
10
+ tokenizeButton.disabled = false;
11
+ loadingMessage.style.display = "none";
12
+ });
13
+
14
+ const pinchZoom = document.getElementById("js-mermaid");
15
+
16
+ pinchZoom.setTransform({
17
+ scale: 1,
18
+ x: 0,
19
+ y: 220,
20
+ allowChangeEvent: false,
21
+ });
22
+
23
+ tokenizeButton.addEventListener("click", () => tokenizeText());
24
+ });
25
+
26
+ let data = [];
27
+
28
+ async function tokenizeText() {
29
+ const input = document.getElementById("js-input").value.trim();
30
+ const model = document.getElementById("js-model").value;
31
+ const vocab = parseInt(document.getElementById("js-vocab").value, 10) || -1;
32
+
33
+ const message = document.getElementById("js-message");
34
+ const mermaid = document.getElementById("js-mermaid");
35
+
36
+ if (!input || typeof tokenizeWeb == "undefined") {
37
+ return;
38
+ }
39
+
40
+ const result = JSON.parse(tokenizeWeb(input, model, vocab));
41
+
42
+ if (!Array.isArray(result) || result.length < 1) {
43
+ message.textContent = "No tokens found.";
44
+ mermaid.innerHTML = "";
45
+
46
+ return;
47
+ }
48
+
49
+ data = result;
50
+
51
+ updateTabs(result);
52
+ showResult(0);
53
+ }
54
+
55
+ function updateTabs() {
56
+ const tabs = document.getElementById("js-tabs");
57
+
58
+ tabs.innerHTML = "";
59
+
60
+ data.forEach((chunk, index) => {
61
+ const tab = document.createElement("button");
62
+
63
+ tab.classList.add("mbpe-tab");
64
+ tab.dataset.index = index.toString();
65
+ tab.onclick = () => {
66
+ tab.scrollIntoView({
67
+ behavior: 'smooth',
68
+ block: 'nearest',
69
+ inline: 'center',
70
+ });
71
+
72
+ showResult(index);
73
+ }
74
+
75
+ const list = document.createElement("ol");
76
+
77
+ list.classList.add("mbpe-chunk");
78
+
79
+ chunk.Segmentations[chunk.Segmentations.length - 1].map(token => {
80
+ const item = document.createElement("li");
81
+
82
+ item.textContent = token.Token;
83
+ item.classList.add("mbpe-token");
84
+ item.title = token.ID;
85
+
86
+ list.appendChild(item)
87
+ });
88
+
89
+ tab.appendChild(list);
90
+ tabs.appendChild(tab);
91
+ });
92
+ }
93
+
94
+ function showResult(index) {
95
+ if (index < 0 || index >= data.length) {
96
+ return;
97
+ }
98
+
99
+ mermaid.render("mermaidDiagram", data[index].Mermaid).then(({ svg }) => {
100
+ document.getElementById("js-mermaid").innerHTML = svg;
101
+ });
102
+
103
+ document.querySelectorAll(".mbpe-tab").forEach(tab => tab.classList.remove("mbpe-tab--active"));
104
+ document.querySelector(`.mbpe-tab[data-index="${index}"]`).classList.add("mbpe-tab--active");
105
+ }
index.html CHANGED
@@ -5,221 +5,44 @@
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
  <title>mbpe-dyn</title>
7
  <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
 
8
  <script src="wasm_exec.js"></script>
9
- <script>
10
- const go = new Go();
11
-
12
- document.addEventListener("DOMContentLoaded", function () {
13
- const tokenizeButton = document.querySelector("button[onclick='tokenizeText()']");
14
- const loadingMessage = document.getElementById("loadingMessage");
15
-
16
- WebAssembly.instantiateStreaming(fetch("main.wasm"), go.importObject).then(result => {
17
- go.run(result.instance);
18
-
19
- tokenizeButton.disabled = false;
20
- loadingMessage.style.display = "none";
21
- });
22
- });
23
-
24
- async function tokenizeText() {
25
- const input = document.getElementById("textInput").value.trim();
26
- const modelChoice = document.getElementById("modelSelect").value;
27
- const vocabSize = parseInt(document.getElementById("vocabSize").value, 10) || -1;
28
-
29
- if (!input) {
30
- alert("Please enter text to tokenize.");
31
- return;
32
- }
33
-
34
- if (typeof tokenizeWeb !== "undefined") {
35
- const resultJSON = tokenizeWeb(input, modelChoice, vocabSize);
36
-
37
- try {
38
- const result = JSON.parse(resultJSON);
39
- console.log(result);
40
-
41
- if (Array.isArray(result) && result.length > 0) {
42
- displayTabs(result);
43
- showResult(0); // Show first tab by default
44
- } else {
45
- document.getElementById("code").innerText = "No tokens found.";
46
- document.getElementById("mermaidContainer").innerHTML = "";
47
- }
48
- } catch (e) {
49
- console.error("Error parsing result:", e);
50
- }
51
- }
52
- }
53
-
54
- function displayTabs(results) {
55
- const tabsContainer = document.getElementById("wordTabs");
56
- tabsContainer.innerHTML = "";
57
-
58
- results.forEach((entry, index) => {
59
- const finalSegmentation = entry.Segmentations[entry.Segmentations.length - 1];
60
- const label = finalSegmentation.map(t => t.Token).join("");
61
-
62
- const tabButton = document.createElement("button");
63
- tabButton.textContent = label;
64
- tabButton.classList.add("word-tab");
65
- tabButton.dataset.index = index;
66
- tabButton.onclick = () => showResult(index);
67
-
68
- tabsContainer.appendChild(tabButton);
69
- });
70
- }
71
-
72
- function showResult(index) {
73
- const results = JSON.parse(tokenizeWeb(
74
- document.getElementById("textInput").value.trim(),
75
- document.getElementById("modelSelect").value,
76
- parseInt(document.getElementById("vocabSize").value, 10) || -1
77
- ));
78
-
79
- const selectedResult = results[index];
80
- const diagramCode = selectedResult.Mermaid.trim();
81
-
82
- if (diagramCode) {
83
- mermaid.render("mermaidDiagram", diagramCode).then(({ svg }) => {
84
- document.getElementById("mermaidContainer").innerHTML = svg;
85
- }).catch(error => {
86
- console.error("Mermaid rendering error:", error);
87
- });
88
- }
89
-
90
- // Update active tab
91
- document.querySelectorAll(".word-tab").forEach(tab => tab.classList.remove("active"));
92
- document.querySelector(`.word-tab[data-index="${index}"]`).classList.add("active");
93
- }
94
- </script>
95
- <style>
96
- /* General Styles */
97
- body {
98
- font-family: Arial, sans-serif;
99
- background: #f8f9fa;
100
- color: #333;
101
- text-align: center;
102
- padding: 20px;
103
- }
104
-
105
- h1 {
106
- color: #007bff;
107
- }
108
-
109
- /* Input and Select */
110
- .input-group {
111
- display: flex;
112
- justify-content: center;
113
- gap: 10px;
114
- margin-bottom: 20px;
115
- }
116
-
117
- input, select, button {
118
- padding: 10px;
119
- border: 1px solid #ccc;
120
- border-radius: 5px;
121
- font-size: 16px;
122
- }
123
-
124
- input {
125
- width: 300px;
126
- }
127
-
128
- select {
129
- width: 150px;
130
- background: white;
131
- cursor: pointer;
132
- }
133
-
134
- button {
135
- background: #007bff;
136
- color: white;
137
- font-weight: bold;
138
- cursor: pointer;
139
- transition: 0.3s;
140
- border: none;
141
- }
142
-
143
- button:hover {
144
- background: #0056b3;
145
- }
146
-
147
- button:disabled {
148
- background: #ccc;
149
- color: #666;
150
- cursor: not-allowed;
151
- }
152
-
153
- #modelSelect {
154
- width: 80px;
155
- }
156
-
157
- #vocabSize {
158
- width: 110px;
159
- }
160
-
161
- #loadingMessage {
162
- text-align: center;
163
- font-weight: bold;
164
- color: #555;
165
- margin-top: 10px;
166
- }
167
-
168
- /* Word Tabs */
169
- .word-tabs {
170
- display: flex;
171
- justify-content: center;
172
- gap: 10px;
173
- margin-top: 20px;
174
- margin-bottom: 20px;
175
- flex-wrap: wrap;
176
- }
177
-
178
- .word-tab {
179
- padding: 8px 15px;
180
- background: #e1ecf4;
181
- border: none;
182
- border-radius: 20px;
183
- cursor: pointer;
184
- font-weight: bold;
185
- transition: 0.3s;
186
- }
187
-
188
- .word-tab.active {
189
- background: #007bff;
190
- color: white;
191
- }
192
- </style>
193
  </head>
194
  <body>
195
-
196
- <h1>mbpe-dyn</h1>
197
-
198
- <div class="input-group">
199
- <select id="modelSelect">
200
- <option value="m000">m000</option>
201
- <option value="m010">m010</option>
202
- <option value="m020">m020</option>
203
- <option value="m030">m030</option>
204
- <option value="m040">m040</option>
205
- <option value="m050">m050</option>
206
- <option value="m060">m060</option>
207
- <option value="m070">m070</option>
208
- <option value="m080">m080</option>
209
- <option value="m090">m090</option>
210
- <option value="m100" selected>m100</option>
211
- </select>
212
- <select id="vocabSize">
213
- <option value="16384">2¹⁴ (16K)</option>
214
- <option value="32768">2¹⁵ (32K)</option>
215
- <option value="65536">2¹⁶ (64K)</option>
216
- <option value="131072" selected>2¹⁷ (128K)</option>
217
- </select>
218
- <input type="text" id="textInput" placeholder="Enter text here" value="airsickness">
219
- <button onclick="tokenizeText()" disabled>Tokenize</button>
 
 
 
 
 
 
220
  </div>
221
- <div id="loadingMessage">Initializing tokenizers...</div>
222
- <div id="wordTabs" class="word-tabs"></div>
223
- <div id="mermaidContainer"></div>
224
  </body>
225
  </html>
 
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
  <title>mbpe-dyn</title>
7
  <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
8
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/pinch-zoom-min.js"></script>
9
  <script src="wasm_exec.js"></script>
10
+ <script src="app.js"></script>
11
+ <link rel="stylesheet" href="style.css">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  </head>
13
  <body>
14
+ <div class="mbpe-panel">
15
+ <div class="mbpe-panel__form">
16
+ <input type="text" id="js-input" class="mbpe-input" placeholder="Enter text here" value="I dislike flying because of my airsickness"v>
17
+ <div class="mbpe-panel__row">
18
+ <div class="mbpe-panel__group">
19
+ <select id="js-model" class="mbpe-select">
20
+ <option value="m000">m000</option>
21
+ <option value="m010">m010</option>
22
+ <option value="m020">m020</option>
23
+ <option value="m030">m030</option>
24
+ <option value="m040">m040</option>
25
+ <option value="m050">m050</option>
26
+ <option value="m060">m060</option>
27
+ <option value="m070">m070</option>
28
+ <option value="m080">m080</option>
29
+ <option value="m090">m090</option>
30
+ <option value="m100" selected>m100</option>
31
+ </select>
32
+ <select id="js-vocab" class="mbpe-select">
33
+ <option value="16384">2¹⁴ (16K)</option>
34
+ <option value="32768">2¹⁵ (32K)</option>
35
+ <option value="65536">2¹⁶ (64K)</option>
36
+ <option value="131072" selected>2¹⁷ (128K)</option>
37
+ </select>
38
+ </div>
39
+ <button id="js-tokenize" class="mbpe-button" disabled>Tokenize</button>
40
+ </div>
41
+ </div>
42
+ <div id="js-tabs" class="mbpe-panel__result">
43
+ <div id="js-message" class="mbpe-panel__message">Initializing tokenizers...</div>
44
+ </div>
45
  </div>
46
+ <pinch-zoom id="js-mermaid" class="my-pinch-zoom mbpe-container"></pinch-zoom>
 
 
47
  </body>
48
  </html>
main.wasm CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8b0c692cf0fd88106ec4636e9fdfbc2ca6723efd104aeefb4c344780132a5c4
3
- size 43684316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2429e25bb515989962bf15bccd600abc2799582fbbce28edc50255a7b9080fe
3
+ size 43689867
style.css ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --primary-color: #080808;
3
+ --secondary-color: #1d1d1d;
4
+ --background-color: #f8f9fa;
5
+ --border-color: #cccccc;
6
+ }
7
+
8
+ body {
9
+ box-sizing: border-box;
10
+ display: flex;
11
+ flex-direction: column;
12
+ align-items: center;
13
+ justify-content: center;
14
+ width: 100%;
15
+ min-height: 100vh;
16
+ margin: 0;
17
+ font-family: Arial, sans-serif;
18
+ font-size: 16px;
19
+ text-align: center;
20
+ background: var(--background-color);
21
+ background:
22
+ linear-gradient(90deg, var(--dot-bg) calc(var(--dot-space) - var(--dot-size)), transparent 1%) center / var(--dot-space) var(--dot-space),
23
+ linear-gradient(var(--dot-bg) calc(var(--dot-space) - var(--dot-size)), transparent 1%) center / var(--dot-space) var(--dot-space),
24
+ var(--dot-color);
25
+ --dot-bg: white;
26
+ --dot-color: black;
27
+ --dot-size: 1px;
28
+ --dot-space: 22px;
29
+ }
30
+
31
+ .mbpe-panel {
32
+ position: absolute;
33
+ top: 0;
34
+ z-index: 100;
35
+ box-sizing: border-box;
36
+ display: flex;
37
+ flex-direction: column;
38
+ align-items: center;
39
+ justify-content: center;
40
+ width: 800px;
41
+ max-width: calc(100vw - 40px);
42
+ margin: 20px;
43
+ overflow: hidden;
44
+ background-color: var(--background-color);
45
+ border: 1px solid var(--border-color);
46
+ border-radius: 5px;
47
+ }
48
+
49
+ .mbpe-panel__row {
50
+ display: flex;
51
+ flex-direction: row;
52
+ gap: 10px;
53
+ justify-content: space-between;
54
+ width: 100%;
55
+ }
56
+
57
+ .mbpe-panel__group {
58
+ display: flex; flex-direction: row; gap: 10px;
59
+ }
60
+
61
+ .mbpe-panel__form {
62
+ box-sizing: border-box;
63
+ display: flex;
64
+ flex-direction: column;
65
+ gap: 10px;
66
+ width: 100%;
67
+ padding: 10px;
68
+ }
69
+
70
+ .mbpe-input {
71
+ box-sizing: border-box;
72
+ padding: 10px;
73
+ font-size: 16px;
74
+ border: 1px solid var(--border-color);
75
+ border-radius: 5px;
76
+ }
77
+
78
+ .mbpe-select {
79
+ padding: 10px 35px 10px 10px;
80
+ font-size: 16px;
81
+ appearance: none;
82
+ cursor: pointer;
83
+ background-color: white;
84
+ background-image: url('data:image/svg+xml;utf8,<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M6 9L12 15L18 9" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" stroke="currentColor"/></svg>');
85
+ background-repeat: no-repeat;
86
+ background-position: right 10px center;
87
+ background-size: 18px;
88
+ border: 1px solid var(--border-color);
89
+ border-radius: 5px;
90
+ }
91
+
92
+ .mbpe-button {
93
+ display: flex;
94
+ align-items: center;
95
+ justify-content: center;
96
+ padding: 10px;
97
+ font-size: 0.875rem;
98
+ font-weight: 700;
99
+ color: white;
100
+ text-transform: uppercase;
101
+ letter-spacing: 0.2px;
102
+ cursor: pointer;
103
+ background-color: var(--primary-color);
104
+ border: none;
105
+ border-radius: 5px;
106
+ transition: 0.3s;
107
+ }
108
+
109
+ .mbpe-button:disabled {
110
+ cursor: not-allowed;
111
+ }
112
+
113
+
114
+ .mbpe-panel__message {
115
+ font-weight: bold;
116
+ color: var(--secondary-color);
117
+ text-align: center;
118
+ }
119
+
120
+ .mbpe-panel__result {
121
+ box-sizing: border-box;
122
+ display: flex;
123
+ flex-direction: row;
124
+ flex-wrap: nowrap;
125
+ gap: 4px;
126
+ align-items: center;
127
+ justify-content: start;
128
+ width: 100%;
129
+ max-width: calc(100vw - 40px);
130
+ min-height: 66px;
131
+ padding: 10px;
132
+ overflow: scroll;
133
+ border-top: 1px solid var(--border-color);
134
+ }
135
+
136
+ .mbpe-tab {
137
+ padding: 4px;
138
+ font-weight: bold;
139
+ cursor: pointer;
140
+ background: transparent;
141
+ border: 1px dashed var(--primary-color);
142
+ border-radius: 24px;
143
+ transition: 0.3s;
144
+ }
145
+
146
+ .mbpe-tab:hover {
147
+ background: var(--primary-color);
148
+ }
149
+
150
+ .mbpe-tab--active {
151
+ background: var(--primary-color);
152
+ }
153
+
154
+ .mbpe-chunk {
155
+ display: flex;
156
+ gap: 4px;
157
+ padding: 0;
158
+ margin: 0;
159
+ list-style: none;
160
+ }
161
+
162
+ .mbpe-token {
163
+ display: flex;
164
+ flex-direction: row;
165
+ align-items: center;
166
+ justify-content: center;
167
+ padding: 8px;
168
+ font-size: 0.875rem;
169
+ background-color: var(--background-color);
170
+ border: 1px solid var(--primary-color);
171
+ border-radius: 20px;
172
+ }
173
+
174
+ .mbpe-container {
175
+ width: 100vw; height: 100vh;
176
+ }