|
|
|
|
|
|
|
|
|
import flatbuffers |
|
from flatbuffers.compat import import_numpy |
|
np = import_numpy() |
|
|
|
class GrammarTokenizerOptions(object): |
|
__slots__ = ['_tab'] |
|
|
|
@classmethod |
|
def GetRootAsGrammarTokenizerOptions(cls, buf, offset): |
|
n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset) |
|
x = GrammarTokenizerOptions() |
|
x.Init(buf, n + offset) |
|
return x |
|
|
|
@classmethod |
|
def GrammarTokenizerOptionsBufferHasIdentifier(cls, buf, offset, size_prefixed=False): |
|
return flatbuffers.util.BufferHasIdentifier(buf, offset, b"\x54\x43\x32\x20", size_prefixed=size_prefixed) |
|
|
|
|
|
def Init(self, buf, pos): |
|
self._tab = flatbuffers.table.Table(buf, pos) |
|
|
|
|
|
def TokenizationType(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return 2 |
|
|
|
|
|
def IcuPreserveWhitespaceTokens(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def TokenizationCodepointConfig(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) |
|
if o != 0: |
|
x = self._tab.Vector(o) |
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 |
|
x = self._tab.Indirect(x) |
|
from libtextclassifier3.TokenizationCodepointRange import TokenizationCodepointRange |
|
obj = TokenizationCodepointRange() |
|
obj.Init(self._tab.Bytes, x) |
|
return obj |
|
return None |
|
|
|
|
|
def TokenizationCodepointConfigLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def TokenizationCodepointConfigIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) |
|
return o == 0 |
|
|
|
|
|
def InternalTokenizerCodepointRanges(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10)) |
|
if o != 0: |
|
x = self._tab.Vector(o) |
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 |
|
x = self._tab.Indirect(x) |
|
from libtextclassifier3.CodepointRange import CodepointRange |
|
obj = CodepointRange() |
|
obj.Init(self._tab.Bytes, x) |
|
return obj |
|
return None |
|
|
|
|
|
def InternalTokenizerCodepointRangesLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def InternalTokenizerCodepointRangesIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10)) |
|
return o == 0 |
|
|
|
|
|
def TokenizeOnScriptChange(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
def GrammarTokenizerOptionsStart(builder): builder.StartObject(5) |
|
def GrammarTokenizerOptionsAddTokenizationType(builder, tokenizationType): builder.PrependInt32Slot(0, tokenizationType, 2) |
|
def GrammarTokenizerOptionsAddIcuPreserveWhitespaceTokens(builder, icuPreserveWhitespaceTokens): builder.PrependBoolSlot(1, icuPreserveWhitespaceTokens, 0) |
|
def GrammarTokenizerOptionsAddTokenizationCodepointConfig(builder, tokenizationCodepointConfig): builder.PrependUOffsetTRelativeSlot(2, flatbuffers.number_types.UOffsetTFlags.py_type(tokenizationCodepointConfig), 0) |
|
def GrammarTokenizerOptionsStartTokenizationCodepointConfigVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def GrammarTokenizerOptionsAddInternalTokenizerCodepointRanges(builder, internalTokenizerCodepointRanges): builder.PrependUOffsetTRelativeSlot(3, flatbuffers.number_types.UOffsetTFlags.py_type(internalTokenizerCodepointRanges), 0) |
|
def GrammarTokenizerOptionsStartInternalTokenizerCodepointRangesVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def GrammarTokenizerOptionsAddTokenizeOnScriptChange(builder, tokenizeOnScriptChange): builder.PrependBoolSlot(4, tokenizeOnScriptChange, 0) |
|
def GrammarTokenizerOptionsEnd(builder): return builder.EndObject() |
|
|