BioTokenizer-BFD-WPC-800 / tokenizer.json
dotan1111's picture
Upload 2 files
d2c01af
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<UNK>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Lowercase"
},
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordPiece",
"unk_token": "<UNK>",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 10000,
"vocab": {
"<UNK>": 0,
"a": 1,
"b": 2,
"c": 3,
"d": 4,
"e": 5,
"f": 6,
"g": 7,
"h": 8,
"i": 9,
"k": 10,
"l": 11,
"m": 12,
"n": 13,
"o": 14,
"p": 15,
"q": 16,
"r": 17,
"s": 18,
"t": 19,
"u": 20,
"v": 21,
"w": 22,
"x": 23,
"y": 24,
"z": 25,
"##p": 26,
"##e": 27,
"##d": 28,
"##g": 29,
"##r": 30,
"##l": 31,
"##f": 32,
"##t": 33,
"##a": 34,
"##q": 35,
"##s": 36,
"##v": 37,
"##h": 38,
"##c": 39,
"##i": 40,
"##w": 41,
"##y": 42,
"##n": 43,
"##m": 44,
"##k": 45,
"##x": 46,
"##z": 47,
"##b": 48,
"##o": 49,
"##u": 50,
"##aa": 51,
"##ll": 52,
"##la": 53,
"##gg": 54,
"##rr": 55,
"##va": 56,
"##ga": 57,
"##ra": 58,
"##lv": 59,
"##pa": 60,
"##lg": 61,
"##sa": 62,
"##lr": 63,
"##ea": 64,
"##vv": 65,
"##da": 66,
"##ta": 67,
"##ls": 68,
"##lp": 69,
"##ia": 70,
"##ld": 71,
"##gr": 72,
"##le": 73,
"##ss": 74,
"##gv": 75,
"##lt": 76,
"##gs": 77,
"##er": 78,
"##gt": 79,
"##gd": 80,
"##li": 81,
"##pp": 82,
"##vr": 83,
"##ge": 84,
"##qa": 85,
"##fa": 86,
"##lk": 87,
"##vt": 88,
"##vs": 89,
"##gi": 90,
"##vd": 91,
"##ve": 92,
"##lf": 93,
"##pr": 94,
"##ka": 95,
"##dr": 96,
"##lq": 97,
"##ps": 98,
"##ee": 99,
"##tt": 100,
"##gk": 101,
"##na": 102,
"##sr": 103,
"##pd": 104,
"##vi": 105,
"##pe": 106,
"##gf": 107,
"##ln": 108,
"##pt": 109,
"##gq": 110,
"##ha": 111,
"##st": 112,
"##dd": 113,
"##qr": 114,
"##gp": 115,
"##ei": 116,
"##ya": 117,
"##kk": 118,
"##gn": 119,
"##lh": 120,
"##vp": 121,
"##tr": 122,
"##vf": 123,
"##si": 124,
"##de": 125,
"##ma": 126,
"##ly": 127,
"##aaa": 128,
"##ir": 129,
"##vk": 130,
"##gy": 131,
"##ts": 132,
"##ti": 133,
"##vn": 134,
"##kr": 135,
"##gh": 136,
"##vq": 137,
"##sd": 138,
"##se": 139,
"##sf": 140,
"##ie": 141,
"##id": 142,
"##lm": 143,
"##hr": 144,
"##fr": 145,
"##laa": 146,
"##sp": 147,
"##td": 148,
"##ke": 149,
"##te": 150,
"##nr": 151,
"##fd": 152,
"##tp": 153,
"##yr": 154,
"##gm": 155,
"##ki": 156,
"##qq": 157,
"##pi": 158,
"##ff": 159,
"##pv": 160,
"##kd": 161,
"##ca": 162,
"##sn": 163,
"##ed": 164,
"##gw": 165,
"##sq": 166,
"##sv": 167,
"##lw": 168,
"##tf": 169,
"##pq": 170,
"##re": 171,
"##lla": 172,
"##tv": 173,
"##sk": 174,
"##pf": 175,
"##ii": 176,
"##eq": 177,
"##tn": 178,
"##lc": 179,
"##rd": 180,
"##vh": 181,
"##pn": 182,
"##vy": 183,
"##vg": 184,
"##kn": 185,
"##di": 186,
"##tq": 187,
"##fe": 188,
"##wa": 189,
"##sy": 190,
"##mr": 191,
"##qi": 192,
"##pk": 193,
"##ek": 194,
"##gc": 195,
"##gaa": 196,
"##ni": 197,
"##vm": 198,
"##th": 199,
"##tk": 200,
"##yd": 201,
"##fi": 202,
"##nd": 203,
"##ri": 204,
"##sh": 205,
"##ph": 206,
"##lrr": 207,
"##qe": 208,
"##lva": 209,
"##vaa": 210,
"##ty": 211,
"##gga": 212,
"##qd": 213,
"##sg": 214,
"##lga": 215,
"##wr": 216,
"##py": 217,
"##pg": 218,
"##lra": 219,
"##nn": 220,
"##vc": 221,
"##sm": 222,
"##hd": 223,
"##rra": 224,
"##lar": 225,
"##ne": 226,
"##kq": 227,
"##lgg": 228,
"##cr": 229,
"##tg": 230,
"##df": 231,
"##tm": 232,
"##ye": 233,
"##aar": 234,
"##rrr": 235,
"##fs": 236,
"##he": 237,
"##lpa": 238,
"##if": 239,
"##pm": 240,
"##dq": 241,
"##is": 242,
"##gla": 243,
"##lsa": 244,
"##vla": 245,
"##nf": 246,
"##lda": 247,
"##vw": 248,
"##rs": 249,
"##paa": 250,
"##ks": 251,
"##ef": 252,
"##kt": 253,
"##rq": 254,
"##et": 255,
"##kf": 256,
"##yf": 257,
"##lea": 258,
"##in": 259,
"##dv": 260,
"##gll": 261,
"##es": 262,
"##en": 263,
"##lta": 264,
"##it": 265,
"##ev": 266,
"##eh": 267,
"##dp": 268,
"##iv": 269,
"##qf": 270,
"##rf": 271,
"##qs": 272,
"##ky": 273,
"##ep": 274,
"##ds": 275,
"##qt": 276,
"##rp": 277,
"##hh": 278,
"##qn": 279,
"##gva": 280,
"##dy": 281,
"##gra": 282,
"##qp": 283,
"##kp": 284,
"##dt": 285,
"##em": 286,
"##lia": 287,
"##vva": 288,
"##vll": 289,
"##dh": 290,
"##rt": 291,
"##dn": 292,
"##qk": 293,
"##iy": 294,
"##sw": 295,
"##grr": 296,
"##ft": 297,
"##glv": 298,
"##glg": 299,
"##fn": 300,
"##sc": 301,
"##rh": 302,
"##km": 303,
"##ip": 304,
"##qh": 305,
"##rv": 306,
"##ey": 307,
"##ik": 308,
"##qv": 309,
"##dk": 310,
"##pw": 311,
"##ih": 312,
"##saa": 313,
"##gvv": 314,
"##rn": 315,
"##fy": 316,
"##iq": 317,
"##tw": 318,
"##taa": 319,
"##kh": 320,
"##ny": 321,
"##rar": 322,
"##llr": 323,
"##qy": 324,
"##vlv": 325,
"##gsa": 326,
"##qm": 327,
"##vga": 328,
"##dm": 329,
"##glr": 330,
"##gls": 331,
"##ggr": 332,
"##ns": 333,
"##np": 334,
"##glp": 335,
"##kv": 336,
"##fv": 337,
"##yy": 338,
"##nt": 339,
"##fh": 340,
"##lfa": 341,
"##gar": 342,
"##mi": 343,
"##gta": 344,
"##eaa": 345,
"##dw": 346,
"##lka": 347,
"##lqa": 348,
"##iaa": 349,
"##hp": 350,
"##vlg": 351,
"##tc": 352,
"##gpa": 353,
"##daa": 354,
"##lae": 355,
"##gia": 356,
"##var": 357,
"##vgg": 358,
"##ler": 359,
"##gld": 360,
"##gda": 361,
"##pc": 362,
"##glt": 363,
"##gea": 364,
"##im": 365,
"##nq": 366,
"##hf": 367,
"##sll": 368,
"##lgr": 369,
"##lad": 370,
"##vra": 371,
"##nk": 372,
"##gss": 373,
"xm": 374,
"##fp": 375,
"##fq": 376,
"##yi": 377,
"##sla": 378,
"##par": 379,
"##yt": 380,
"##hi": 381,
"##ys": 382,
"##rk": 383,
"##yn": 384,
"##pga": 385,
"##fk": 386,
"##pll": 387,
"##sga": 388,
"##sgg": 389,
"##ew": 390,
"##tla": 391,
"##gle": 392,
"##vrr": 393,
"##vlr": 394,
"##vld": 395,
"##ger": 396,
"##md": 397,
"##vsa": 398,
"##mt": 399,
"##vda": 400,
"##me": 401,
"##vea": 402,
"##yq": 403,
"##vta": 404,
"##nh": 405,
"##vpa": 406,
"##lld": 407,
"##cd": 408,
"##pla": 409,
"##tga": 410,
"##lna": 411,
"##pgg": 412,
"##lvr": 413,
"##hq": 414,
"##wi": 415,
"##ppa": 416,
"##vls": 417,
"##ldr": 418,
"##pva": 419,
"##gli": 420,
"##ela": 421,
"##fm": 422,
"##ell": 423,
"##ic": 424,
"##tll": 425,
"##hs": 426,
"##laaa": 427,
"##nm": 428,
"##ry": 429,
"##vgv": 430,
"##vle": 431,
"##gfa": 432,
"##via": 433,
"##prr": 434,
"##qaa": 435,
"##ssa": 436,
"##fc": 437,
"##fw": 438,
"##vlp": 439,
"##mk": 440,
"##plp": 441,
"##lsr": 442,
"##dla": 443,
"##lha": 444,
"##ms": 445,
"##dll": 446,
"##faa": 447,
"##ht": 448,
"##yk": 449,
"##vgr": 450,
"##sar": 451,
"##ear": 452,
"##iw": 453,
"##gvr": 454,
"##tva": 455,
"##sva": 456,
"##dc": 457,
"##yh": 458,
"##vlt": 459,
"##mm": 460,
"##qw": 461,
"##era": 462,
"##vgd": 463,
"##ver": 464,
"##srr": 465,
"##ec": 466,
"##nw": 467,
"##sls": 468,
"##sgs": 469,
"##pra": 470,
"##lre": 471,
"##slg": 472,
"##kaa": 473,
"##tgg": 474,
"##pgr": 475,
"##plv": 476,
"##pvv": 477,
"##gad": 478,
"##hy": 479,
"##psa": 480,
"##mn": 481,
"##cc": 482,
"##sra": 483,
"##lrd": 484,
"##tar": 485,
"##dar": 486,
"##gpr": 487,
"##slv": 488,
"##lle": 489,
"##kw": 490,
"##gqa": 491,
"##lya": 492,
"##spa": 493,
"##lpr": 494,
"##sgr": 495,
"##gka": 496,
"##iar": 497,
"##vad": 498,
"##vss": 499,
"##glf": 500,
"##vvr": 501,
"##mq": 502,
"##hn": 503,
"##tpa": 504,
"##vgs": 505,
"##wd": 506,
"##ppr": 507,
"##gdr": 508,
"##vae": 509,
"##slr": 510,
"##pda": 511,
"##lgi": 512,
"##tlv": 513,
"##glk": 514,
"##tlt": 515,
"##lgd": 516,
"##aae": 517,
"##sgt": 518,
"##mf": 519,
"##tls": 520,
"##nc": 521,
"##slp": 522,
"##lvd": 523,
"##aad": 524,
"##lma": 525,
"##pss": 526,
"##tsa": 527,
"##vge": 528,
"##gsr": 529,
"##vgt": 530,
"##qla": 531,
"##pgv": 532,
"##tvt": 533,
"##lve": 534,
"##lde": 535,
"##gggg": 536,
"##iga": 537,
"##yw": 538,
"##gpp": 539,
"##ila": 540,
"##qll": 541,
"##lge": 542,
"##tta": 543,
"##pta": 544,
"##ce": 545,
"##tgt": 546,
"##pea": 547,
"##vli": 548,
"##pls": 549,
"##ltr": 550,
"##sta": 551,
"##pgt": 552,
"##plr": 553,
"##tss": 554,
"##tgr": 555,
"##tra": 556,
"##vvd": 557,
"##hk": 558,
"##lqr": 559,
"##iva": 560,
"##gna": 561,
"##fla": 562,
"##sgv": 563,
"##wq": 564,
"##xx": 565,
"##aaaa": 566,
"##ggd": 567,
"##gps": 568,
"##we": 569,
"##pgs": 570,
"##tlr": 571,
"##slt": 572,
"##wn": 573,
"##qc": 574,
"##dad": 575,
"##lee": 576,
"##fll": 577,
"##gvd": 578,
"##eae": 579,
"##kc": 580,
"##kll": 581,
"##tgs": 582,
"##ill": 583,
"##tvv": 584,
"##aai": 585,
"##vfa": 586,
"##ldd": 587,
"##tlg": 588,
"##hc": 589,
"##wf": 590,
"##pgd": 591,
"##trr": 592,
"##glq": 593,
"##gpd": 594,
"##plg": 595,
"##tlp": 596,
"##svv": 597,
"##ssd": 598,
"##gae": 599,
"##vdr": 600,
"##kla": 601,
"##gha": 602,
"##igg": 603,
"##vgi": 604,
"##pvr": 605,
"##plt": 606,
"##vsr": 607,
"##vpp": 608,
"##pad": 609,
"##yc": 610,
"##ssr": 611,
"##hw": 612,
"##pge": 613,
"##gge": 614,
"##sia": 615,
"##qar": 616,
"##gya": 617,
"##pvt": 618,
"##see": 619,
"##tgv": 620,
"##tia": 621,
"##lai": 622,
"##vlf": 623,
"##lpd": 624,
"##err": 625,
"##haa": 626,
"##ym": 627,
"##gln": 628,
"##laq": 629,
"##psr": 630,
"##svs": 631,
"##svt": 632,
"##sgd": 633,
"##sle": 634,
"##naa": 635,
"##vka": 636,
"##gai": 637,
"##cf": 638,
"##nll": 639,
"##sge": 640,
"##wk": 641,
"##rad": 642,
"##ci": 643,
"##gpt": 644,
"##rae": 645,
"##vve": 646,
"##lkr": 647,
"##sld": 648,
"##vqa": 649,
"##tle": 650,
"##per": 651,
"##dae": 652,
"##stt": 653,
"##vrd": 654,
"##hm": 655,
"##qrr": 656,
"##tge": 657,
"##lsd": 658,
"##fgg": 659,
"##grd": 660,
"##tld": 661,
"##sea": 662,
"##gve": 663,
"##vai": 664,
"##lpe": 665,
"##sps": 666,
"##tvs": 667,
"##vgk": 668,
"##vre": 669,
"##vpr": 670,
"##fga": 671,
"##vlk": 672,
"##sda": 673,
"##qra": 674,
"##pvs": 675,
"##pdr": 676,
"##far": 677,
"##sli": 678,
"##tgd": 679,
"##lx": 680,
"##hll": 681,
"##pae": 682,
"##str": 683,
"##sad": 684,
"##gri": 685,
"##my": 686,
"##fva": 687,
"##gtr": 688,
"##tea": 689,
"##svr": 690,
"##tvr": 691,
"##gqr": 692,
"##lfr": 693,
"##gpe": 694,
"##led": 695,
"##iad": 696,
"##tda": 697,
"##glh": 698,
"##vlq": 699,
"##wm": 700,
"##spr": 701,
"##pve": 702,
"##ptt": 703,
"##spp": 704,
"##ch": 705,
"##iae": 706,
"##eai": 707,
"##vna": 708,
"##lli": 709,
"##vps": 710,
"##lse": 711,
"##dlv": 712,
"##tpp": 713,
"##pld": 714,
"##sgi": 715,
"##maa": 716,
"##nla": 717,
"##qlr": 718,
"##lgf": 719,
"##rrd": 720,
"##tst": 721,
"##vpe": 722,
"##qva": 723,
"##gly": 724,
"##wh": 725,
"##mh": 726,
"##gre": 727,
"##yaa": 728,
"##dai": 729,
"##ead": 730,
"##lir": 731,
"##tve": 732,
"##pia": 733,
"##tad": 734,
"##gkr": 735,
"##lid": 736,
"##rre": 737,
"##ww": 738,
"##gma": 739,
"##cn": 740,
"##ggi": 741,
"##vdd": 742,
"##gir": 743,
"##vpd": 744,
"##qlv": 745,
"##gaaa": 746,
"##pppp": 747,
"##gid": 748,
"##vln": 749,
"##ple": 750,
"##ltd": 751,
"##ere": 752,
"##kar": 753,
"##vgf": 754,
"##eri": 755,
"##rai": 756,
"##sfa": 757,
"##lak": 758,
"##gvi": 759,
"##sae": 760,
"##pee": 761,
"##sai": 762,
"##thth": 763,
"##pgp": 764,
"##lca": 765,
"##llf": 766,
"##wy": 767,
"##lpi": 768,
"##lvi": 769,
"##laf": 770,
"##nga": 771,
"##tae": 772,
"##vtr": 773,
"##vpt": 774,
"##rri": 775,
"##pvd": 776,
"##sgk": 777,
"##vee": 778,
"##lrq": 779,
"##lek": 780,
"##pvi": 781,
"##gdd": 782,
"##gx": 783,
"##lhr": 784,
"##vaaa": 785,
"##tsr": 786,
"##tli": 787,
"##vgq": 788,
"##tps": 789,
"##tpt": 790,
"##spd": 791,
"##tvd": 792,
"##slf": 793,
"##lsi": 794,
"##dva": 795,
"##cq": 796,
"##sve": 797,
"##pli": 798,
"##svd": 799
}
}
}