Spaces:
Running
Running
File size: 5,537 Bytes
b82d373 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import { main_api } from '../../../script.js';
import { getContext } from '../../extensions.js';
import { SlashCommand } from '../../slash-commands/SlashCommand.js';
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
import { getFriendlyTokenizerName, getTextTokens, getTokenCountAsync, tokenizers } from '../../tokenizers.js';
import { resetScrollHeight, debounce } from '../../utils.js';
import { debounce_timeout } from '../../constants.js';
import { POPUP_TYPE, callGenericPopup } from '../../popup.js';
function rgb2hex(rgb) {
rgb = rgb.match(/^rgba?[\s+]?\([\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?/i);
return (rgb && rgb.length === 4) ? '#' +
('0' + parseInt(rgb[1], 10).toString(16)).slice(-2) +
('0' + parseInt(rgb[2], 10).toString(16)).slice(-2) +
('0' + parseInt(rgb[3], 10).toString(16)).slice(-2) : '';
}
$('button').click(function () {
var hex = rgb2hex($('input').val());
$('.result').html(hex);
});
async function doTokenCounter() {
const { tokenizerName, tokenizerId } = getFriendlyTokenizerName(main_api);
const html = `
<div class="wide100p">
<h3>Token Counter</h3>
<div class="justifyLeft flex-container flexFlowColumn">
<h4>Type / paste in the box below to see the number of tokens in the text.</h4>
<p>Selected tokenizer: ${tokenizerName}</p>
<div>Input:</div>
<textarea id="token_counter_textarea" class="wide100p textarea_compact" rows="1"></textarea>
<div>Tokens: <span id="token_counter_result">0</span></div>
<hr>
<div>Tokenized text:</div>
<div id="tokenized_chunks_display" class="wide100p">β</div>
<hr>
<div>Token IDs:</div>
<textarea id="token_counter_ids" class="wide100p textarea_compact" readonly rows="1">β</textarea>
</div>
</div>`;
const dialog = $(html);
const countDebounced = debounce(async () => {
const text = String($('#token_counter_textarea').val());
const ids = main_api == 'openai' ? getTextTokens(tokenizers.OPENAI, text) : getTextTokens(tokenizerId, text);
if (Array.isArray(ids) && ids.length > 0) {
$('#token_counter_ids').text(`[${ids.join(', ')}]`);
$('#token_counter_result').text(ids.length);
if (Object.hasOwnProperty.call(ids, 'chunks')) {
drawChunks(Object.getOwnPropertyDescriptor(ids, 'chunks').value, ids);
}
} else {
const count = await getTokenCountAsync(text);
$('#token_counter_ids').text('β');
$('#token_counter_result').text(count);
$('#tokenized_chunks_display').text('β');
}
await resetScrollHeight($('#token_counter_textarea'));
await resetScrollHeight($('#token_counter_ids'));
}, debounce_timeout.relaxed);
dialog.find('#token_counter_textarea').on('input', () => countDebounced());
callGenericPopup(dialog, POPUP_TYPE.TEXT, '', { wide: true, large: true, allowVerticalScrolling: true });
}
/**
* Draws the tokenized chunks in the UI
* @param {string[]} chunks
* @param {number[]} ids
*/
function drawChunks(chunks, ids) {
const pastelRainbow = [
//main_text_color,
//italics_text_color,
//quote_text_color,
'#FFB3BA',
'#FFDFBA',
'#FFFFBA',
'#BFFFBF',
'#BAE1FF',
'#FFBAF3',
];
$('#tokenized_chunks_display').empty();
for (let i = 0; i < chunks.length; i++) {
let chunk = chunks[i].replace(/β/g, ' '); // This is a leading space in sentencepiece. More info: Lower one eighth block (U+2581)
// If <0xHEX>, decode it
if (/^<0x[0-9A-F]+>$/i.test(chunk)) {
const code = parseInt(chunk.substring(3, chunk.length - 1), 16);
chunk = String.fromCodePoint(code);
}
// If newline - insert a line break
if (chunk === '\n') {
$('#tokenized_chunks_display').append('<br>');
continue;
}
const color = pastelRainbow[i % pastelRainbow.length];
const chunkHtml = $('<code></code>');
chunkHtml.css('background-color', color);
chunkHtml.text(chunk);
chunkHtml.attr('title', ids[i]);
$('#tokenized_chunks_display').append(chunkHtml);
}
}
async function doCount() {
// get all of the messages in the chat
const context = getContext();
const messages = context.chat.filter(x => x.mes && !x.is_system).map(x => x.mes);
//concat all the messages into a single string
const allMessages = messages.join(' ');
console.debug('All messages:', allMessages);
//toastr success with the token count of the chat
const count = await getTokenCountAsync(allMessages);
toastr.success(`Token count: ${count}`);
return count;
}
jQuery(() => {
const buttonHtml = `
<div id="token_counter" class="list-group-item flex-container flexGap5">
<div class="fa-solid fa-1 extensionsMenuExtensionButton" /></div>
Token Counter
</div>`;
$('#token_counter_wand_container').append(buttonHtml);
$('#token_counter').on('click', doTokenCounter);
SlashCommandParser.addCommandObject(SlashCommand.fromProps({
name: 'count',
callback: async () => String(await doCount()),
returns: 'number of tokens',
helpString: 'Counts the number of tokens in the current chat.',
}));
});
|