|
|
|
|
|
|
|
|
|
import flatbuffers |
|
from flatbuffers.compat import import_numpy |
|
np = import_numpy() |
|
|
|
class FeatureProcessorOptions(object): |
|
__slots__ = ['_tab'] |
|
|
|
@classmethod |
|
def GetRootAsFeatureProcessorOptions(cls, buf, offset): |
|
n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset) |
|
x = FeatureProcessorOptions() |
|
x.Init(buf, n + offset) |
|
return x |
|
|
|
@classmethod |
|
def FeatureProcessorOptionsBufferHasIdentifier(cls, buf, offset, size_prefixed=False): |
|
return flatbuffers.util.BufferHasIdentifier(buf, offset, b"\x54\x43\x32\x20", size_prefixed=size_prefixed) |
|
|
|
|
|
def Init(self, buf, pos): |
|
self._tab = flatbuffers.table.Table(buf, pos) |
|
|
|
|
|
def NumBuckets(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return -1 |
|
|
|
|
|
def EmbeddingSize(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return -1 |
|
|
|
|
|
def EmbeddingQuantizationBits(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return 8 |
|
|
|
|
|
def ContextSize(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return -1 |
|
|
|
|
|
def MaxSelectionSpan(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return -1 |
|
|
|
|
|
def ChargramOrders(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14)) |
|
if o != 0: |
|
a = self._tab.Vector(o) |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
|
return 0 |
|
|
|
|
|
def ChargramOrdersAsNumpy(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14)) |
|
if o != 0: |
|
return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Int32Flags, o) |
|
return 0 |
|
|
|
|
|
def ChargramOrdersLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def ChargramOrdersIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14)) |
|
return o == 0 |
|
|
|
|
|
def MaxWordLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(16)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return 20 |
|
|
|
|
|
def UnicodeAwareFeatures(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(18)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def ExtractCaseFeature(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(20)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def ExtractSelectionMaskFeature(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(22)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def RegexpFeature(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(24)) |
|
if o != 0: |
|
a = self._tab.Vector(o) |
|
return self._tab.String(a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
|
return "" |
|
|
|
|
|
def RegexpFeatureLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(24)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def RegexpFeatureIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(24)) |
|
return o == 0 |
|
|
|
|
|
def RemapDigits(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(26)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def LowercaseTokens(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(28)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def SelectionReducedOutputSpace(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(30)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return True |
|
|
|
|
|
def Collections(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(32)) |
|
if o != 0: |
|
a = self._tab.Vector(o) |
|
return self._tab.String(a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
|
return "" |
|
|
|
|
|
def CollectionsLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(32)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def CollectionsIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(32)) |
|
return o == 0 |
|
|
|
|
|
def DefaultCollection(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(34)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return -1 |
|
|
|
|
|
def OnlyUseLineWithClick(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(36)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def SplitTokensOnSelectionBoundaries(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(38)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def TokenizationCodepointConfig(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(40)) |
|
if o != 0: |
|
x = self._tab.Vector(o) |
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 |
|
x = self._tab.Indirect(x) |
|
from libtextclassifier3.TokenizationCodepointRange import TokenizationCodepointRange |
|
obj = TokenizationCodepointRange() |
|
obj.Init(self._tab.Bytes, x) |
|
return obj |
|
return None |
|
|
|
|
|
def TokenizationCodepointConfigLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(40)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def TokenizationCodepointConfigIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(40)) |
|
return o == 0 |
|
|
|
|
|
def CenterTokenSelectionMethod(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(42)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return 0 |
|
|
|
|
|
def SnapLabelSpanBoundariesToContainingTokens(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(44)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def SupportedCodepointRanges(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(46)) |
|
if o != 0: |
|
x = self._tab.Vector(o) |
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 |
|
x = self._tab.Indirect(x) |
|
from libtextclassifier3.CodepointRange import CodepointRange |
|
obj = CodepointRange() |
|
obj.Init(self._tab.Bytes, x) |
|
return obj |
|
return None |
|
|
|
|
|
def SupportedCodepointRangesLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(46)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def SupportedCodepointRangesIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(46)) |
|
return o == 0 |
|
|
|
|
|
def InternalTokenizerCodepointRanges(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(48)) |
|
if o != 0: |
|
x = self._tab.Vector(o) |
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 |
|
x = self._tab.Indirect(x) |
|
from libtextclassifier3.CodepointRange import CodepointRange |
|
obj = CodepointRange() |
|
obj.Init(self._tab.Bytes, x) |
|
return obj |
|
return None |
|
|
|
|
|
def InternalTokenizerCodepointRangesLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(48)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def InternalTokenizerCodepointRangesIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(48)) |
|
return o == 0 |
|
|
|
|
|
def MinSupportedCodepointRatio(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(50)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Float32Flags, o + self._tab.Pos) |
|
return 0.0 |
|
|
|
|
|
def FeatureVersion(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(52)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return 0 |
|
|
|
|
|
def TokenizationType(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(54)) |
|
if o != 0: |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
|
return 1 |
|
|
|
|
|
def IcuPreserveWhitespaceTokens(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(56)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def IgnoredSpanBoundaryCodepoints(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(58)) |
|
if o != 0: |
|
a = self._tab.Vector(o) |
|
return self._tab.Get(flatbuffers.number_types.Int32Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
|
return 0 |
|
|
|
|
|
def IgnoredSpanBoundaryCodepointsAsNumpy(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(58)) |
|
if o != 0: |
|
return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Int32Flags, o) |
|
return 0 |
|
|
|
|
|
def IgnoredSpanBoundaryCodepointsLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(58)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def IgnoredSpanBoundaryCodepointsIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(58)) |
|
return o == 0 |
|
|
|
|
|
def BoundsSensitiveFeatures(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(60)) |
|
if o != 0: |
|
x = self._tab.Indirect(o + self._tab.Pos) |
|
from libtextclassifier3.FeatureProcessorOptions_.BoundsSensitiveFeatures import BoundsSensitiveFeatures |
|
obj = BoundsSensitiveFeatures() |
|
obj.Init(self._tab.Bytes, x) |
|
return obj |
|
return None |
|
|
|
|
|
def AllowedChargrams(self, j): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(62)) |
|
if o != 0: |
|
a = self._tab.Vector(o) |
|
return self._tab.String(a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
|
return "" |
|
|
|
|
|
def AllowedChargramsLength(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(62)) |
|
if o != 0: |
|
return self._tab.VectorLen(o) |
|
return 0 |
|
|
|
|
|
def AllowedChargramsIsNone(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(62)) |
|
return o == 0 |
|
|
|
|
|
def TokenizeOnScriptChange(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(64)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return False |
|
|
|
|
|
def UsePipeCharacterForNewline(self): |
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(66)) |
|
if o != 0: |
|
return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
|
return True |
|
|
|
def FeatureProcessorOptionsStart(builder): builder.StartObject(32) |
|
def FeatureProcessorOptionsAddNumBuckets(builder, numBuckets): builder.PrependInt32Slot(0, numBuckets, -1) |
|
def FeatureProcessorOptionsAddEmbeddingSize(builder, embeddingSize): builder.PrependInt32Slot(1, embeddingSize, -1) |
|
def FeatureProcessorOptionsAddEmbeddingQuantizationBits(builder, embeddingQuantizationBits): builder.PrependInt32Slot(2, embeddingQuantizationBits, 8) |
|
def FeatureProcessorOptionsAddContextSize(builder, contextSize): builder.PrependInt32Slot(3, contextSize, -1) |
|
def FeatureProcessorOptionsAddMaxSelectionSpan(builder, maxSelectionSpan): builder.PrependInt32Slot(4, maxSelectionSpan, -1) |
|
def FeatureProcessorOptionsAddChargramOrders(builder, chargramOrders): builder.PrependUOffsetTRelativeSlot(5, flatbuffers.number_types.UOffsetTFlags.py_type(chargramOrders), 0) |
|
def FeatureProcessorOptionsStartChargramOrdersVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def FeatureProcessorOptionsAddMaxWordLength(builder, maxWordLength): builder.PrependInt32Slot(6, maxWordLength, 20) |
|
def FeatureProcessorOptionsAddUnicodeAwareFeatures(builder, unicodeAwareFeatures): builder.PrependBoolSlot(7, unicodeAwareFeatures, 0) |
|
def FeatureProcessorOptionsAddExtractCaseFeature(builder, extractCaseFeature): builder.PrependBoolSlot(8, extractCaseFeature, 0) |
|
def FeatureProcessorOptionsAddExtractSelectionMaskFeature(builder, extractSelectionMaskFeature): builder.PrependBoolSlot(9, extractSelectionMaskFeature, 0) |
|
def FeatureProcessorOptionsAddRegexpFeature(builder, regexpFeature): builder.PrependUOffsetTRelativeSlot(10, flatbuffers.number_types.UOffsetTFlags.py_type(regexpFeature), 0) |
|
def FeatureProcessorOptionsStartRegexpFeatureVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def FeatureProcessorOptionsAddRemapDigits(builder, remapDigits): builder.PrependBoolSlot(11, remapDigits, 0) |
|
def FeatureProcessorOptionsAddLowercaseTokens(builder, lowercaseTokens): builder.PrependBoolSlot(12, lowercaseTokens, 0) |
|
def FeatureProcessorOptionsAddSelectionReducedOutputSpace(builder, selectionReducedOutputSpace): builder.PrependBoolSlot(13, selectionReducedOutputSpace, 1) |
|
def FeatureProcessorOptionsAddCollections(builder, collections): builder.PrependUOffsetTRelativeSlot(14, flatbuffers.number_types.UOffsetTFlags.py_type(collections), 0) |
|
def FeatureProcessorOptionsStartCollectionsVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def FeatureProcessorOptionsAddDefaultCollection(builder, defaultCollection): builder.PrependInt32Slot(15, defaultCollection, -1) |
|
def FeatureProcessorOptionsAddOnlyUseLineWithClick(builder, onlyUseLineWithClick): builder.PrependBoolSlot(16, onlyUseLineWithClick, 0) |
|
def FeatureProcessorOptionsAddSplitTokensOnSelectionBoundaries(builder, splitTokensOnSelectionBoundaries): builder.PrependBoolSlot(17, splitTokensOnSelectionBoundaries, 0) |
|
def FeatureProcessorOptionsAddTokenizationCodepointConfig(builder, tokenizationCodepointConfig): builder.PrependUOffsetTRelativeSlot(18, flatbuffers.number_types.UOffsetTFlags.py_type(tokenizationCodepointConfig), 0) |
|
def FeatureProcessorOptionsStartTokenizationCodepointConfigVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def FeatureProcessorOptionsAddCenterTokenSelectionMethod(builder, centerTokenSelectionMethod): builder.PrependInt32Slot(19, centerTokenSelectionMethod, 0) |
|
def FeatureProcessorOptionsAddSnapLabelSpanBoundariesToContainingTokens(builder, snapLabelSpanBoundariesToContainingTokens): builder.PrependBoolSlot(20, snapLabelSpanBoundariesToContainingTokens, 0) |
|
def FeatureProcessorOptionsAddSupportedCodepointRanges(builder, supportedCodepointRanges): builder.PrependUOffsetTRelativeSlot(21, flatbuffers.number_types.UOffsetTFlags.py_type(supportedCodepointRanges), 0) |
|
def FeatureProcessorOptionsStartSupportedCodepointRangesVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def FeatureProcessorOptionsAddInternalTokenizerCodepointRanges(builder, internalTokenizerCodepointRanges): builder.PrependUOffsetTRelativeSlot(22, flatbuffers.number_types.UOffsetTFlags.py_type(internalTokenizerCodepointRanges), 0) |
|
def FeatureProcessorOptionsStartInternalTokenizerCodepointRangesVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def FeatureProcessorOptionsAddMinSupportedCodepointRatio(builder, minSupportedCodepointRatio): builder.PrependFloat32Slot(23, minSupportedCodepointRatio, 0.0) |
|
def FeatureProcessorOptionsAddFeatureVersion(builder, featureVersion): builder.PrependInt32Slot(24, featureVersion, 0) |
|
def FeatureProcessorOptionsAddTokenizationType(builder, tokenizationType): builder.PrependInt32Slot(25, tokenizationType, 1) |
|
def FeatureProcessorOptionsAddIcuPreserveWhitespaceTokens(builder, icuPreserveWhitespaceTokens): builder.PrependBoolSlot(26, icuPreserveWhitespaceTokens, 0) |
|
def FeatureProcessorOptionsAddIgnoredSpanBoundaryCodepoints(builder, ignoredSpanBoundaryCodepoints): builder.PrependUOffsetTRelativeSlot(27, flatbuffers.number_types.UOffsetTFlags.py_type(ignoredSpanBoundaryCodepoints), 0) |
|
def FeatureProcessorOptionsStartIgnoredSpanBoundaryCodepointsVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def FeatureProcessorOptionsAddBoundsSensitiveFeatures(builder, boundsSensitiveFeatures): builder.PrependUOffsetTRelativeSlot(28, flatbuffers.number_types.UOffsetTFlags.py_type(boundsSensitiveFeatures), 0) |
|
def FeatureProcessorOptionsAddAllowedChargrams(builder, allowedChargrams): builder.PrependUOffsetTRelativeSlot(29, flatbuffers.number_types.UOffsetTFlags.py_type(allowedChargrams), 0) |
|
def FeatureProcessorOptionsStartAllowedChargramsVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
|
def FeatureProcessorOptionsAddTokenizeOnScriptChange(builder, tokenizeOnScriptChange): builder.PrependBoolSlot(30, tokenizeOnScriptChange, 0) |
|
def FeatureProcessorOptionsAddUsePipeCharacterForNewline(builder, usePipeCharacterForNewline): builder.PrependBoolSlot(31, usePipeCharacterForNewline, 1) |
|
def FeatureProcessorOptionsEnd(builder): return builder.EndObject() |
|
|