/** | |
* @license | |
* Copyright Big Vision Authors | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
import 'jasmine'; | |
describe('sentencepiece bpe test', () => { | |
it('computes a thing when asked', () => {}); | |
}); | |
import * as bpe from './sentencepiece_bpe'; | |
import {TOKEN_SEPARATOR, Vocabulary} from './common'; | |
const vocab: Vocabulary = [ | |
[TOKEN_SEPARATOR, 0], // 0 | |
['a', 0], // 1 | |
['e', 0], // 2 | |
['s', 0], // 3 | |
['t', 0], // 4 | |
['te', -1], // 5 | |
['st', -2], // 6 | |
['test', -3], // 7 | |
['tes', -4], // 8 | |
]; | |
describe('BPE Tokenizer', () => { | |
let tokenizer: bpe.Tokenizer; | |
beforeAll(() => { | |
tokenizer = new bpe.Tokenizer(vocab); | |
}); | |
it('should tokenize correctly', () => { | |
expect(tokenizer.encode('a test')).toEqual([0, 1, 0, 7]); | |
}); | |
}); | |