modern_french_normalisation / vocab-src.json
Rachel Bawden
added converted fs model
08d8e1c
raw
history blame
15.5 kB
{
"<s>": 0,
"<pad>": 1,
"</s>": 2,
"<unk>": 3,
",": 4,
"'": 5,
"▁de": 6,
".": 7,
"▁que": 8,
"▁la": 9,
"▁l": 10,
"s": 11,
"er": 12,
"▁le": 13,
"▁qu": 14,
"▁d": 15,
"▁vous": 16,
"▁à": 17,
"é": 18,
"es": 19,
"▁m": 20,
"▁les": 21,
"▁s": 22,
"▁et": 23,
"▁en": 24,
"il": 25,
"▁ne": 26,
"▁&": 27,
"▁n": 28,
"-": 29,
"a": 30,
"u": 31,
"e": 32,
"ent": 33,
"▁qui": 34,
"r": 35,
"t": 36,
"▁f": 37,
"▁des": 38,
"▁": 39,
"▁je": 40,
"▁ce": 41,
"en": 42,
"i": 43,
"ant": 44,
"▁pour": 45,
"on": 46,
"l": 47,
"c": 48,
"est": 49,
"▁par": 50,
";": 51,
"in": 52,
"ir": 53,
"m": 54,
"▁p": 55,
"p": 56,
"▁c": 57,
"?": 58,
"▁v": 59,
"re": 60,
"▁il": 61,
"▁au": 62,
"▁est": 63,
"▁a": 64,
"▁si": 65,
"▁C": 66,
"te": 67,
"▁Et": 68,
"▁t": 69,
"▁pas": 70,
"▁me": 71,
"▁plus": 72,
"d": 73,
"it": 74,
"h": 75,
"ait": 76,
"▁un": 77,
"ai": 78,
"us": 79,
"v": 80,
"is": 81,
"ois": 82,
"▁b": 83,
"▁du": 84,
"▁se": 85,
"eur": 86,
"▁j": 87,
"▁r": 88,
"se": 89,
"▁S": 90,
"y": 91,
"at": 92,
"▁mon": 93,
"ais": 94,
"au": 95,
"oit": 96,
"un": 97,
"elle": 98,
"▁son": 99,
"g": 100,
"ez": 101,
"al": 102,
"▁bien": 103,
"▁A": 104,
"▁dans": 105,
"n": 106,
"ons": 107,
"le": 108,
"o": 109,
"▁sa": 110,
"ment": 111,
"ou": 112,
"ie": 113,
":": 114,
"▁h": 115,
"ay": 116,
"ar": 117,
"or": 118,
"as": 119,
"▁nous": 120,
"ée": 121,
"ce": 122,
"▁P": 123,
"▁M": 124,
"oy": 125,
"▁pr": 126,
"▁re": 127,
"▁Je": 128,
"f": 129,
"ont": 130,
"▁tout": 131,
"▁Mais": 132,
"ch": 133,
"▁tr": 134,
"el": 135,
"▁L": 136,
"▁Il": 137,
"▁ma": 138,
"▁point": 139,
"ur": 140,
"▁D": 141,
"oi": 142,
"qu": 143,
"▁fait": 144,
"▁in": 145,
"av": 146,
"ion": 147,
"me": 148,
"z": 149,
"end": 150,
"et": 151,
"▁ch": 152,
"▁sur": 153,
"ol": 154,
"▁V": 155,
"tre": 156,
"▁ie": 157,
"b": 158,
"oir": 159,
"▁une": 160,
"E": 161,
"eu": 162,
"age": 163,
"les": 164,
"▁cette": 165,
"!": 166,
"os": 167,
"ér": 168,
"és": 169,
"pr": 170,
"▁ses": 171,
"ance": 172,
"▁mes": 173,
"▁sans": 174,
"▁g": 175,
"▁Qu": 176,
"▁i": 177,
"an": 178,
"▁faire": 179,
"▁T": 180,
"▁tous": 181,
"▁dé": 182,
"ui": 183,
"ité": 184,
"▁ces": 185,
"A": 186,
"x": 187,
"▁pl": 188,
"ne": 189,
"▁R": 190,
"une": 191,
"res": 192,
"è": 193,
"am": 194,
"ac": 195,
"▁lui": 196,
"mes": 197,
"▁Que": 198,
"▁mais": 199,
"▁ou": 200,
"eurs": 201,
"ort": 202,
"▁cr": 203,
"ils": 204,
"ut": 205,
"ence": 206,
"▁comme": 207,
"▁vos": 208,
"▁leur": 209,
"and": 210,
"▁é": 211,
"ier": 212,
"▁luy": 213,
"ans": 214,
"our": 215,
"onn": 216,
"iv": 217,
"▁elle": 218,
"ure": 219,
"▁F": 220,
"aire": 221,
"ic": 222,
"ag": 223,
"j": 224,
"I": 225,
"ien": 226,
"ff": 227,
"▁Vous": 228,
"▁sont": 229,
"▁J": 230,
"able": 231,
"▁faut": 232,
"▁su": 233,
"ous": 234,
"▁avec": 235,
"ét": 236,
"▁puis": 237,
"▁N": 238,
"▁votre": 239,
"ille": 240,
"ens": 241,
"▁cœur": 242,
"â": 243,
"▁B": 244,
"▁peut": 245,
"ter": 246,
"tr": 247,
"▁ser": 248,
"▁y": 249,
"che": 250,
"ble": 251,
"ement": 252,
"▁peu": 253,
"▁E": 254,
"de": 255,
"ire": 256,
"bl": 257,
"iez": 258,
"▁De": 259,
"▁où": 260,
"ue": 261,
"▁part": 262,
"▁O": 263,
"ra": 264,
"▁aux": 265,
"▁tant": 266,
"▁quelque": 267,
"que": 268,
"▁dis": 269,
"▁cons": 270,
"eux": 271,
"à": 272,
"▁grand": 273,
"ang": 274,
"ain": 275,
"ç": 276,
"oin": 277,
"ap": 278,
"▁G": 279,
"dre": 280,
"▁per": 281,
"▁dit": 282,
"oient": 283,
"▁moy": 284,
"▁Seigneur": 285,
"▁voir": 286,
"aut": 287,
"omp": 288,
"▁o": 289,
"▁dev": 290,
"▁H": 291,
"ouv": 292,
"id": 293,
"▁vn": 294,
"ti": 295,
"ère": 296,
"▁suis": 297,
"ere": 298,
"ig": 299,
"▁rien": 300,
"▁dou": 301,
"ers": 302,
"aient": 303,
"▁fort": 304,
"▁yeux": 305,
"tes": 306,
"▁sou": 307,
"▁ét": 308,
"cher": 309,
"▁En": 310,
"▁vostre": 311,
"▁I": 312,
"vous": 313,
"▁Le": 314,
"em": 315,
"im": 316,
"cr": 317,
"ours": 318,
"ge": 319,
"▁ai": 320,
"▁on": 321,
"st": 322,
"▁moi": 323,
"▁av": 324,
"uit": 325,
"mp": 326,
"▁encore": 327,
"▁dont": 328,
"▁dire": 329,
"uy": 330,
"▁quel": 331,
"eure": 332,
"▁con": 333,
"▁sç": 334,
"▁toute": 335,
"▁Ce": 336,
"elles": 337,
"ert": 338,
"onne": 339,
"▁mal": 340,
"▁ex": 341,
"ond": 342,
"▁vie": 343,
"▁voy": 344,
"ise": 345,
"pl": 346,
"▁jamais": 347,
"▁Madame": 348,
"▁Si": 349,
"orte": 350,
"ab": 351,
"▁moins": 352,
"▁br": 353,
"▁mar": 354,
"ation": 355,
"▁comm": 356,
"▁temps": 357,
"▁pro": 358,
"▁;": 359,
"ard": 360,
"▁fl": 361,
"si": 362,
"▁donc": 363,
"▁trop": 364,
"ace": 365,
"▁toutes": 366,
"▁Qui": 367,
"▁cour": 368,
"▁monde": 369,
"▁aussi": 370,
"ord": 371,
"ass": 372,
"▁Pour": 373,
"tres": 374,
"▁sort": 375,
"▁voul": 376,
"▁mort": 377,
"ine": 378,
"▁cela": 379,
"▁eu": 380,
"roit": 381,
"▁Ch": 382,
"▁fr": 383,
"▁com": 384,
"ès": 385,
"▁ont": 386,
"▁deux": 387,
"ill": 388,
"tez": 389,
"ées": 390,
"▁gr": 391,
"uis": 392,
"▁ar": 393,
"▁La": 394,
"ad": 395,
"▁pass": 396,
"▁leurs": 397,
"endre": 398,
"iens": 399,
"ions": 400,
"▁Dieu": 401,
"enir": 402,
"▁fut": 403,
"ess": 404,
"ût": 405,
"are": 406,
"▁ent": 407,
"lle": 408,
"ot": 409,
"pos": 410,
"um": 411,
"▁ils": 412,
"▁nos": 413,
"▁am": 414,
"▁quand": 415,
"ct": 416,
"ul": 417,
"▁pouv": 418,
"aux": 419,
"▁ceux": 420,
"▁veux": 421,
"je": 422,
"▁cel": 423,
"▁conn": 424,
"...": 425,
"aine": 426,
"gn": 427,
"ettre": 428,
"▁cher": 429,
"ice": 430,
"▁Ie": 431,
"▁jour": 432,
"omb": 433,
"▁fais": 434,
"este": 435,
"ci": 436,
"▁:": 437,
"fin": 438,
"▁Monsieur": 439,
"▁trouv": 440,
"omme": 441,
"▁soit": 442,
"ôt": 443,
"▁autre": 444,
"▁choses": 445,
"ë": 446,
"▁ven": 447,
"▁Mon": 448,
"ole": 449,
"▁là": 450,
"rez": 451,
"▁vne": 452,
"▁app": 453,
"att": 454,
"ép": 455,
"oire": 456,
"ques": 457,
"▁même": 458,
"aur": 459,
"ors": 460,
"mer": 461,
"om": 462,
"▁cet": 463,
"▁chose": 464,
"▁entre": 465,
"ieux": 466,
"R": 467,
"▁assez": 468,
"O": 469,
"▁rec": 470,
"▁tu": 471,
"ê": 472,
"être": 473,
"▁rend": 474,
"..": 475,
"▁avait": 476,
"û": 477,
"▁Par": 478,
"auv": 479,
"cl": 480,
"ost": 481,
"▁ni": 482,
"ect": 483,
"esp": 484,
"▁all": 485,
"▁autres": 486,
"él": 487,
"▁car": 488,
"autre": 489,
"▁donn": 490,
"oc": 491,
"▁Ah": 492,
"▁main": 493,
"iss": 494,
"ique": 495,
"▁att": 496,
"œ": 497,
"▁Ne": 498,
"▁coup": 499,
"▁être": 500,
"▁trou": 501,
"N": 502,
"▁dés": 503,
"▁Ar": 504,
"▁ad": 505,
"▁aff": 506,
"ces": 507,
"emp": 508,
"▁long": 509,
"app": 510,
"én": 511,
"▁ver": 512,
"ib": 513,
"▁donner": 514,
"▁contre": 515,
"▁jus": 516,
"ins": 517,
"▁pens": 518,
"ler": 519,
"▁lieu": 520,
"▁mesme": 521,
"▁non": 522,
"▁fer": 523,
"ec": 524,
"ite": 525,
"jet": 526,
"sez": 527,
"▁voi": 528,
"ex": 529,
"iere": 530,
"ière": 531,
"▁Roy": 532,
"▁eff": 533,
"▁fois": 534,
"S": 535,
"op": 536,
"▁homme": 537,
"▁prem": 538,
"ob": 539,
"out": 540,
"ante": 541,
"tant": 542,
"▁ay": 543,
"▁doit": 544,
"▁malh": 545,
"▁Prince": 546,
"▁nom": 547,
"▁était": 548,
"▁es": 549,
"ign": 550,
"▁avoir": 551,
"eureux": 552,
"▁laiss": 553,
"▁Elle": 554,
"▁ob": 555,
"▁ret": 556,
"estre": 557,
"▁ap": 558,
"▁dem": 559,
"▁toujours": 560,
"▁Am": 561,
"▁contr": 562,
"▁ab": 563,
"▁serv": 564,
"aint": 565,
"ieu": 566,
"▁cond": 567,
"▁Les": 568,
"▁char": 569,
"▁estoit": 570,
"▁parler": 571,
"▁ren": 572,
"eau": 573,
"endant": 574,
"▁arr": 575,
"oux": 576,
"▁On": 577,
"▁prop": 578,
"avoir": 579,
"esse": 580,
"ains": 581,
"▁Ciel": 582,
"▁conf": 583,
"▁estre": 584,
"▁vou": 585,
"▁gloire": 586,
"▁te": 587,
"aiss": 588,
"▁ré": 589,
"amour": 590,
"ist": 591,
"▁Au": 592,
"ale": 593,
"ret": 594,
"▁amour": 595,
"▁veu": 596,
"cc": 597,
"▁ass": 598,
"▁bon": 599,
"▁hommes": 600,
"ore": 601,
"▁peine": 602,
"là": 603,
"ve": 604,
"dr": 605,
"▁imp": 606,
"▁port": 607,
"fois": 608,
"ime": 609,
"vn": 610,
"î": 611,
"▁enfin": 612,
"ger": 613,
"uer": 614,
"▁fin": 615,
"gne": 616,
"ï": 617,
"▁esprit": 618,
"▁plais": 619,
"H": 620,
"cy": 621,
"ient": 622,
"par": 623,
"▁comb": 624,
"▁Pr": 625,
"▁demand": 626,
"▁ny": 627,
"▁souv": 628,
"aill": 629,
"▁avoit": 630,
"▁vis": 631,
"▁rendre": 632,
"ile": 633,
"nem": 634,
"ux": 635,
"lez": 636,
"▁rép": 637,
"▁sent": 638,
"▁vois": 639,
"iers": 640,
"âme": 641,
"▁raison": 642,
"U": 643,
"▁nouv": 644,
"▁vr": 645,
"▁seul": 646,
"▁resp": 647,
"ud": 648,
"cip": 649,
"yr": 650,
"▁pré": 651,
"agn": 652,
"▁encor": 653,
"▁mieux": 654,
"▁pres": 655,
"ats": 656,
"▁Dieux": 657,
"aires": 658,
"ann": 659,
"▁pu": 660,
"▁À": 661,
"gl": 662,
"ss": 663,
"▁acc": 664,
"▁sang": 665,
"oü": 666,
"irs": 667,
"▁Car": 668,
"anc": 669,
"pris": 670,
"▁croy": 671,
"▁personne": 672,
"▁veut": 673,
"ente": 674,
"▁regard": 675,
"asse": 676,
"▁auec": 677,
"▁doute": 678,
"ense": 679,
"▁Princes": 680,
"▁reste": 681,
"ô": 682,
"▁grande": 683,
"▁mour": 684,
"▁viv": 685,
"V": 686,
"ables": 687,
"aisse": 688,
"prit": 689,
"▁sais": 690,
"mé": 691,
"▁donne": 692,
"▁Rome": 693,
"▁belle": 694,
"▁gens": 695,
"▁vain": 696,
"ants": 697,
"emb": 698,
"▁1": 699,
"▁déf": 700,
"▁pre": 701,
"ette": 702,
"ourd": 703,
"▁ca": 704,
"oign": 705,
"▁cert": 706,
"▁cru": 707,
"▁jours": 708,
"▁vôtre": 709,
"▁mis": 710,
"ages": 711,
"aître": 712,
"té": 713,
"▁beau": 714,
"▁emp": 715,
"▁puiss": 716,
"▁vient": 717,
"onneur": 718,
"oû": 719,
"▁beaucoup": 720,
"▁petit": 721,
"▁quoi": 722,
"▁sous": 723,
"avez": 724,
"ies": 725,
"puis": 726,
"▁celle": 727,
"▁fit": 728,
"if": 729,
"▁ac": 730,
"▁deu": 731,
"▁perd": 732,
"▁seulement": 733,
"▁desse": 734,
"▁esp": 735,
"▁sec": 736,
"▁Cour": 737,
"▁notre": 738,
"ffr": 739,
"▁Un": 740,
"▁mot": 741,
"▁soup": 742,
"ché": 743,
"▁sens": 744,
"ête": 745,
"▁An": 746,
"▁va": 747,
"▁Non": 748,
"all": 749,
"ienne": 750,
"ug": 751,
"▁Tr": 752,
"▁for": 753,
"quer": 754,
"▁prom": 755,
"▁été": 756,
"cess": 757,
"int": 758,
"même": 759,
"▁écl": 760,
"X": 761,
"arde": 762,
"êtes": 763,
"▁tour": 764,
"ature": 765,
"▁diff": 766,
"éd": 767,
"▁nostre": 768,
"ü": 769,
"▁terre": 770,
"▁vers": 771,
"aî": 772,
"▁im": 773,
"omm": 774,
"ordin": 775,
"▁extr": 776,
"antage": 777,
"ict": 778,
"▁esté": 779,
"▁moment": 780,
"▁pris": 781,
"auté": 782,
"rer": 783,
"▁reg": 784,
"igne": 785,
"ll": 786,
"▁Roi": 787,
"▁pourr": 788,
"▁É": 789,
"▁prendre": 790,
"2": 791,
"itié": 792,
"▁hum": 793,
"▁ton": 794,
"tement": 795,
"▁env": 796,
"▁guer": 797,
"▁man": 798,
"▁Hé": 799,
"▁Ph": 800,
"▁feu": 801,
"gré": 802,
"▁cause": 803,
"▁secret": 804,
"▁sera": 805,
"air": 806,
"ame": 807,
"▁Tout": 808,
"▁avez": 809,
"▁chang": 810,
"enez": 811,
"ouu": 812,
"temp": 813,
"▁Fr": 814,
"▁croire": 815,
"▁dest": 816,
"blig": 817,
"ù": 818,
"▁prés": 819,
"prendre": 820,
"tu": 821,
"quel": 822,
"▁seule": 823,
"▁venir": 824,
"▁ainsi": 825,
"▁sembl": 826,
"õ": 827,
"▁vertu": 828,
"T": 829,
"ainte": 830,
"onner": 831,
"▁(": 832,
"▁grands": 833,
"iment": 834,
"irer": 835,
"L": 836,
"▁jusqu": 837,
"▁lors": 838,
"▁porte": 839,
")": 840,
"▁apr": 841,
"▁ins": 842,
"▁pers": 843,
"aindre": 844,
"▁lieux": 845,
"▁mer": 846,
"▁rev": 847,
"▁trouver": 848,
"▁content": 849,
"▁fai": 850,
"▁femme": 851,
"▁mains": 852,
"▁tes": 853,
"amp": 854,
"ém": 855,
"▁Dans": 856,
"armes": 857,
"▁disc": 858,
"▁mille": 859,
"êt": 860,
"▁cor": 861,
"▁dern": 862,
"▁trait": 863,
"iter": 864,
"▁après": 865,
"▁force": 866,
"▁quelques": 867,
"▁div": 868,
"ver": 869,
"▁ici": 870,
"▁suiv": 871,
"enant": 872,
"isse": 873,
"od": 874,
"▁Peu": 875,
"▁ple": 876,
"oins": 877,
"up": 878,
"▁enf": 879,
"▁fus": 880,
"▁lib": 881,
"▁tém": 882,
"ible": 883,
"reux": 884,
"▁Ma": 885,
"▁soins": 886,
"▁vi": 887,
"itable": 888,
"ix": 889,
"ude": 890,
"▁Ou": 891,
"▁bou": 892,
"▁red": 893,
"ié": 894,
"É": 895,
"▁e": 896,
"▁di": 897,
"▁sem": 898,
"amais": 899,
"▁Rom": 900,
"▁cont": 901,
"ẽ": 902,
"use": 903,
"gr": 904,
"▁tou": 905,
"eine": 906,
"▁as": 907,
"igneur": 908,
"jours": 909,
"onc": 910,
"6": 911,
"▁gl": 912,
"▁Se": 913,
"▁rais": 914,
"M": 915,
"oup": 916,
"œur": 917,
"▁sui": 918,
"ommes": 919,
"C": 920,
"ême": 921,
"▁ter": 922,
"ã": 923,
"▁at": 924,
"▁fem": 925,
"3": 926,
"Z": 927,
"1": 928,
"▁vert": 929,
"5": 930,
"otre": 931,
"0": 932,
"Un": 933,
"oint": 934,
"[": 935,
"]": 936,
"4": 937,
"G": 938,
"D": 939,
"q": 940,
"\"": 941,
"Y": 942,
"ussi": 943,
"▁pet": 944,
"ôtre": 945,
"ong": 946,
"iel": 947,
"7": 948,
"§": 949,
"ß": 950,
"8": 951,
"P": 952,
"„": 953,
"*": 954,
"Ô": 955,
"9": 956,
"▁temp": 957,
"▁Q": 958,
"B": 959,
"▁Mad": 960,
"▁mom": 961,
"▁Mons": 962,
"Ê": 963,
"Â": 964,
"▁der": 965,
"ainsi": 966,
"ieur": 967,
"ÿ": 968,
"J": 969,
"φ": 970,
"Q": 971,
"æ": 972,
"▁seu": 973,
"F": 974,
"▁enc": 975,
"▁Prin": 976,
"È": 977,
"Î": 978,
"coup": 979,
"Œ": 980,
">": 981,
"★": 982,
"Ç": 983,
"&": 984,
"<": 985,
"k": 986,
"¬": 987,
"(": 988,
"w": 989,
"ú": 990,
"/": 991,
"=": 992,
"ñ": 993,
"ā": 994,
"ũ": 995,
"À": 996,
"Æ": 997,
"ĩ": 998,
"ō": 999
}