File size: 919 Bytes
b39afbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
/**
 * Copyright (c) 2023 MERCENARIES.AI PTE. LTD.
 * All rights reserved.
 */

//@ts-check
import { encode, isWithinTokenLimit } from 'gpt-tokenizer';
import { Tokenizer } from './tokenizer.js';
// https://www.npmjs.com/package/gpt-tokenizer
// By default, importing from gpt-tokenizer uses cl100k_base encoding, used by gpt-3.5-turbo and gpt-4.

class Tokenizer_Openai extends Tokenizer {
  constructor() {
    super();
  }

  // @ts-ignore
  encodeText(text) {
    return encode(text);
  }

  // @ts-ignore
  countTextTokens(text) {
    const tokens = encode(text); //encoding.encode(text);
    if (tokens !== null && tokens !== undefined && tokens.length > 0) {
      const num_tokens = tokens.length;
      return num_tokens;
    } else {
      return 0;
    }
  }

  // @ts-ignore
  textIsWithinTokenLimit(text, token_limit) {
    return isWithinTokenLimit(text, token_limit);
  }
}

export { Tokenizer_Openai };