pranavSIT's picture
added pali inference
74e8f2f
/**
* @license
* Copyright Big Vision Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Copied from
// https://github.com/tensorflow/tfjs-models/blob/master/universal-sentence-encoder/src/tokenizer/trie.ts
import {stringToChars} from './common';
// [token, score, index]
type OutputNode = [string[], number, number];
class TrieNode {
parent: TrieNode|null;
end: boolean;
children: {[firstSymbol: string]: TrieNode};
word: OutputNode;
constructor() {
this.parent = null;
this.children = {};
this.end = false;
this.word = [[], 0, 0];
}
}
/**
* Simple Trie datastructure.
*/
export class Trie {
root: TrieNode;
constructor() {
this.root = new TrieNode();
}
/**
* Inserts a token into the trie.
*/
insert(word: string, score: number, index: number) {
let node = this.root;
const symbols = stringToChars(word);
for (let i = 0; i < symbols.length; i++) {
if (!node.children[symbols[i]]) {
node.children[symbols[i]] = new TrieNode();
node.children[symbols[i]].parent = node;
node.children[symbols[i]].word[0] = node.word[0].concat(symbols[i]);
}
node = node.children[symbols[i]];
if (i === symbols.length - 1) {
node.end = true;
node.word[1] = score;
node.word[2] = index;
}
}
}
/**
* Returns an array of all tokens starting with ss.
*
* @param ss The prefix to match on.
*/
commonPrefixSearch(ss: string[]): OutputNode[] {
const output: OutputNode[] = [];
let node = this.root.children[ss[0]];
for (let i = 0; i < ss.length && node; i++) {
if (node.end) {
output.push(node.word);
}
node = node.children[ss[i + 1]];
}
if (!output.length) {
output.push([[ss[0]], 0, 0]);
}
return output;
}
}