File size: 1,777 Bytes
50d8483 6887755 50d8483 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
<script lang="ts">
import type { Model } from "$lib/types/Model";
import { AutoTokenizer, PreTrainedTokenizer } from "@xenova/transformers";
export let classNames = "";
export let prompt = "";
export let modelTokenizer: Exclude<Model["tokenizer"], undefined>;
export let truncate: number | undefined = undefined;
let tokenizer: PreTrainedTokenizer | undefined = undefined;
async function getTokenizer(_modelTokenizer: Exclude<Model["tokenizer"], undefined>) {
if (typeof _modelTokenizer === "string") {
// return auto tokenizer
return await AutoTokenizer.from_pretrained(_modelTokenizer);
}
{
// construct & return pretrained tokenizer
const { tokenizerUrl, tokenizerConfigUrl } = _modelTokenizer satisfies {
tokenizerUrl: string;
tokenizerConfigUrl: string;
};
const tokenizerJSON = await (await fetch(tokenizerUrl)).json();
const tokenizerConfig = await (await fetch(tokenizerConfigUrl)).json();
return new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig);
}
}
async function tokenizeText(_prompt: string) {
if (!tokenizer) {
return;
}
const { input_ids } = await tokenizer(_prompt);
return input_ids.size;
}
$: (async () => {
tokenizer = await getTokenizer(modelTokenizer);
})();
</script>
{#if tokenizer}
{#await tokenizeText(prompt) then nTokens}
{@const exceedLimit = nTokens > (truncate || Infinity)}
<div class={classNames}>
<p
class="peer text-sm {exceedLimit
? 'text-red-500 opacity-100'
: 'opacity-60 hover:opacity-90'}"
>
{nTokens}{truncate ? `/${truncate}` : ""}
</p>
<div
class="invisible absolute -top-6 right-0 whitespace-nowrap rounded bg-black px-1 text-sm text-white peer-hover:visible"
>
Tokens usage
</div>
</div>
{/await}
{/if}
|