File size: 2,869 Bytes
f75c015
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
const go = new Go();

document.addEventListener("DOMContentLoaded", function () {
    const tokenizeButton = document.getElementById("js-tokenize");
    const loadingMessage = document.getElementById("js-message");

    WebAssembly.instantiateStreaming(fetch("main.wasm"), go.importObject).then(result => {
        go.run(result.instance);

        tokenizeButton.disabled = false;
        loadingMessage.style.display = "none";
    });

    const pinchZoom = document.getElementById("js-mermaid");

    pinchZoom.setTransform({
        scale: 1,
        x: 0,
        y: 220,
        allowChangeEvent: false,
    });

    tokenizeButton.addEventListener("click", () => tokenizeText());
});

let data = [];

async function tokenizeText() {
    const input = document.getElementById("js-input").value.trim();
    const model = document.getElementById("js-model").value;
    const vocab = parseInt(document.getElementById("js-vocab").value, 10) || -1;

    const message = document.getElementById("js-message");
    const mermaid = document.getElementById("js-mermaid");

    if (!input || typeof tokenizeWeb == "undefined") {
        return;
    }

    const result = JSON.parse(tokenizeWeb(input, model, vocab));

    if (!Array.isArray(result) || result.length < 1) {
        message.textContent = "No tokens found.";
        mermaid.innerHTML = "";

        return;
    }

    data = result;

    updateTabs(result);
    showResult(0);
}

function updateTabs() {
    const tabs = document.getElementById("js-tabs");

    tabs.innerHTML = "";

    data.forEach((chunk, index) => {
        const tab = document.createElement("button");

        tab.classList.add("mbpe-tab");
        tab.dataset.index = index.toString();
        tab.onclick = () => {
            tab.scrollIntoView({
                behavior: 'smooth',
                block: 'nearest',
                inline: 'center',
            });

            showResult(index);
        }

        const list = document.createElement("ol");

        list.classList.add("mbpe-chunk");

        chunk.Segmentations[chunk.Segmentations.length - 1].map(token => {
            const item = document.createElement("li");

            item.textContent = token.Token;
            item.classList.add("mbpe-token");
            item.title = token.ID;

            list.appendChild(item)
        });

        tab.appendChild(list);
        tabs.appendChild(tab);
    });
}

function showResult(index) {
    if (index < 0 || index >= data.length) {
        return;
    }

    mermaid.render("mermaidDiagram", data[index].Mermaid).then(({ svg }) => {
        document.getElementById("js-mermaid").innerHTML = svg;
    });

    document.querySelectorAll(".mbpe-tab").forEach(tab => tab.classList.remove("mbpe-tab--active"));
    document.querySelector(`.mbpe-tab[data-index="${index}"]`).classList.add("mbpe-tab--active");
}