Spaces:
Running
Running
/** | |
* Copyright (c) 2023 MERCENARIES.AI PTE. LTD. | |
* All rights reserved. | |
*/ | |
//@ts-check | |
import { encode, isWithinTokenLimit } from 'gpt-tokenizer'; | |
import { Tokenizer } from './tokenizer.js'; | |
// https://www.npmjs.com/package/gpt-tokenizer | |
// By default, importing from gpt-tokenizer uses cl100k_base encoding, used by gpt-3.5-turbo and gpt-4. | |
class Tokenizer_Openai extends Tokenizer { | |
constructor() { | |
super(); | |
} | |
// @ts-ignore | |
encodeText(text) { | |
return encode(text); | |
} | |
// @ts-ignore | |
countTextTokens(text) { | |
const tokens = encode(text); //encoding.encode(text); | |
if (tokens !== null && tokens !== undefined && tokens.length > 0) { | |
const num_tokens = tokens.length; | |
return num_tokens; | |
} else { | |
return 0; | |
} | |
} | |
// @ts-ignore | |
textIsWithinTokenLimit(text, token_limit) { | |
return isWithinTokenLimit(text, token_limit); | |
} | |
} | |
export { Tokenizer_Openai }; | |