import sys def parseArgv(argv): data = { '--model-type': 'b', '--vocab-size': 4096, '--ngram-n': 4, '--transformer-n': 32, '--kaggle': False, '--rhyme-size': 4, '--meter-size': 3, } startParse = 1 if len(argv) > 1 and argv[1] in ['n','t','b']: data['--model-type'] = argv[1] startParse = 2 for i in range(startParse, len(argv)): if argv[i] in data: if argv[i] == '--kaggle': data[argv[i]] = True elif argv[i] == '--model-type': data[argv[i]] = argv[i+1] else: data[argv[i]] = int(argv[i+1]) return data sysArgs = parseArgv(sys.argv) VOCAB_SIZE = sysArgs['--vocab-size'] NGRAM_N = sysArgs['--ngram-n'] TRANSFORMER_N = sysArgs['--transformer-n'] MODEL_TYPE = sysArgs['--model-type'] # n: ngram, t: transformer, b: bard KAGGLE = sysArgs['--kaggle'] TOKEN_SKIP = 2 if MODEL_TYPE == 'n' else TRANSFORMER_N-1 RHYME_STACK_SIZE = sysArgs['--rhyme-size'] METER_STACK_SIZE = sysArgs['--meter-size'] VOWEL_TYPES = 14 CONSONANT_TYPES = 10 TITLE = " " NEWLINE = " <NEWLINE> " BRITISH_OUR = ['neighbor','color','flavor','splendor','labor','favor','fervor','savior','vapor','endeavor','parlor', 'clamor','harbor','splendor','behavior','rumor','humor','savor','valor','armor','honor','odor'] est_set = set(['for','t','n','liv','b','di','j','r','p','v','w','b','gu', 'l','eld','pr','inter','sever','hug','earn','smil', 'qu','ch','bl','conqu','pri']) ed_set = set(['he','you','they','we','will','mov','w','wretch','fe','wav','gre', 'till','far','fell','de','b','f','l','re','hopp','ne','br', 'mann','bann','bl','pleas','mark','m','sh','se','spe','ble', 'lov','ste','rous','arm','bar','di','unmov','asham','cre']) d_set = set(['be', 'she','we','see','re','fe','rowe','fee','le','seale','dee','ne', 'reveale','traine','warme','coole','saile','sweate','mowe','cooke', 'gree','warne','aire','seate','ree','temp','doome','helpe','feare', 'neare','designe','adde','parte','repeate','gaine','parke','mourne', 'backe','cleane','raine','charme','climbe','wee','fle','barbe','roote', 'waite','fixe','hee','ende','wounde','pointe','earne','cree','matte', 'kisse','haire','marke','neede','summe','farme','poure','owne','showe', 'crowne','entere','evene','turne','crouche','laye','jade','recorde', 'flowe','looke','nee','calle','learne','spe','ble','fille','washe', 'boxe','talke','returne','sacre','dreame','pulle','seeme','calle', 'prie','forme','ruine','lighte','appeare','adorne','aske','locke', 'crosse','misse','arme','towe','shoute','heade','burne','faile','bowe', 'rolle','walke','heape','obtaine']) c_ed_set = set(['ad','cares','jag','pis','kis','mat','er','mis','cal','pas','fil','wo']) y_ed_set = set(['drapery','city','weary']) s_set = set(['','a','i','it','his','her',"'",'their','one','will','your','our','down','pant','wa', 'god','well','other','saw','good','new','ye','leave','right','wood', 'ha','thi','hi','jesu','riche','specie','alway','ala','grasse','glorie', 'goe','doe','mas','pis','mi','pi','selve','wherea','prie','masse', 'beautie','jame','misse','san','la','lo','politic','u','ga','bu','tos', 'len']) st_set = set(['be','we','ne','re','tempe','le','mode', 'fore','le','que','riche','cre','pe', 'harde','sweete','cleane','je','te','che','highe','earne','deepe','meane','prie', 'olde']) c_est_set = set(['ful','smal']) er_set = set(['with','she','h','quak','curr','hopp','minist','eth','thund','whisp','whit', 'fev','rememb','inn','rend','de','beak','wand','port','heath','clos','should', 'wrapp','cap','cow','lett','moth','chart','prop','danc','dinn','slumb','tend', 'sever','ladd','falt','eld','aft','hind','flatt','murd','show','flow','sob', 'pray','s','numb','pond','ev','und','wint','shiv','ang','fin','hov','teach', 'clov','ov','oth','riv','barb','post','nev','discov','wat','draw','wait', 'suff','deliv','quiv','silv','cov','shelt','los','m','slipp','batt','plast', 'bitt','p','be','pe','ti','pi','ve','se','us','ton','min','sew','lit','tig', 'lat','inn','out','off','ent','low','pow','less','wond','mann','care','lov', 'rath','form','summ','bett','found','quart','tap','pap','record','shudd','pitch', 'shatt','tatt','rid','butt','mis','bould','bord','glimm','answ','wav','walk', 'glitt','gath','stick','care','temp','fish','corn','flick','dress','feath','met', 'broth','both','lock','tow','conqu','che','encount','head','alt','mutt','san']) c_er_set = set(['of','in','but','up','man','let','shut','sum','slip','din','flit', 'mat','bat','bit','lad','ban','bet','ad','flat','pe','ful','smal','up', 'pis','kis','slip','lat','cop','begin','shud','washe','shat','tat','lit', 'glim','lay','lad','cal','glit','pas','fil','ham','sup','pep','rub','chat', 'skip','alte','flut','mut','scat','dip','stag','wo']) r_set = set(['he',"'re",'rule','cottage','quake','cove','clove','warble','prime','lowe', 'cape','tempe','late','e','rive','dee','eve','wave','me','rathe','meter', 'anothe','mothe','mowe','sweate','saile','leade','hithe','warme','coole', 'reaveale','traine','chee','manne','shee','uppe','withe','designe','neare', 'barbe','darke','banne','pete','faste','soone','oute','rende','parke', 'keepe','lee','rooste','cleane','sweete','bothe','harde','sleepe','poste', 'loude','climbe','flowe','drawe','waite','highe','lathe','summe','fathe', 'cove','farme','lose','showe','deepe','longe','hove','teache','pe','rule', 'freeze','compute','consume','recorde','fille','washe','boxe','talke', 'spide','meane','outside','inside','laye','lighte','reade','ladde', 'eage','forme','coppe','answe','aske','dinne','wave','glitte','feve', 'butte','gathe','pape','broke','matte','time','locke','olde','towe','inne', 'shoute','heade','cunne','burne','singe','mutte','rolle','dippe','walke']) ing_set = set(['','us','s','st','n','wan','din','k','heav','w','morn','cloth','br','wav', 'even','cl','noth','charm','th','spr','bl','p','r','d','tempt','m','s','z', 'ch','mean','exact','bless','train','lov','str','build','pleas','slid','light', 'stock','feel','bo','gap']) c_ing_set = set(['er','wed','ad','ear','begin','pis','kis','er','mis','cal','pas','fil']) e_ing_set = set(['the','we','bee','bore','lute','ne','re','please','displease','tide','clothe','ke', 'neare','wounde','che','feare','doome','helpe','designe','evene','dye', 'adde','parte','repeate','gaine','parke','mourne','backe','cleane','charme', 'climbe','waite','fixe','raine','ende','wounde','pointe','earne','neede', 'summe','poure','owne','crowne','entere','turne','crouche','ble','laye', 'recorde','flowe','calle','morne','learne','fille','washe','boxe','talke', 'kisse','returne','dreame','pulle','seeme','matte','forme','meane','ruine', 'lighte','reade','appeare','adorne','stocke','aske','locke','calle','crosse', 'misse','towe','shoute','feele','heade','burne','singe','faile','bowe', 'rolle','walke','heape','obtaine']) y_s_set = set(['ry']) y_er_set = set(['by']) y_est_set = set(['pry']) BANNED_TOKENS = ['1','2','3','y','e','l','maud','olaf','lorenzo','de','oscar', 'r','d','f','p','agnes','eulalie','kate','niam','thel','asius', 'saadi','\\\\','juanna','johnson','dudù','moore','xanthus', 'arjun','pandav','draupadi','bhishma','karna','pandu','bhima', 'duryodhan','drona','abhimanyu','yudhishthir','agamemnon','narad', 'antilochus','diomed','helen','ulysses','achilles','nestor', 'menelaus','patroclus','hector','aeneas','laertes','priam', 'penelope','eumaeus','telemachus','euryclea','sarpedon','peleus', 'polydamas','glaucus','antenor','idomeneus','rishi','boreas', 'phaeacian','savitri','kuru','diana','panchala','ida','ithaca', 'matsya','pritha','salya','kripa','hastina','sisupala','vidura', 'dhrita','rashtra','jayadratha','lamia','medon','highth','haydée', 'haidée', 'edward','ithacus', 'lenore','à','negro','juan','harold','etc','allan','adeline', '+++++++++++++','c','j','h','4','5','6','7','8','9','10', '11','12','*','x','b','/','k','g','ii','s','u','da','el', 'le','que','~','000','m','thu','thir','13','14','15','16','17', '18','19','20','30','th','bu','ri','w','v','al','iv','wi', 'la','las','t','ma','ha','mee','ne','em','ry','di','st', 'yr','ful','iii','bo','faire','tos','ai','en','et','sug', 'ga','wel','hee','hon','n','wan','ut','te','ad','hym','na'] PUNCT = set(['.', ',', '!', '?', ':', ';', '-']) VOWELS = set(['a','e','i','o','u']) SOMETIMES_VOWELS = VOWELS.union(['y','w']) DEFINED_RHYMES = { "'ll": [4,1], "=er": [13,0], "the": [4,-1], 'a': [4,-1], 'we': [8,-1], 'ye': [8,-1], 'e': [8,-1], 'zimbabwe': [7,-1], 'one': [4,2], 'two': [11,-1], 'oh': [10,-1], 'ah': [12,-1], 'i': [9,-1], 'you': [11,-1], 'own': [10,2], 'know': [10,-1], 'do': [11,-1], 'upon': [3,2], 'whereon': [3,2], 'world': [13,4], 'learn': [13,2], 'earn': [13,2], 'yearn': [13,2], 'of': [4,5], 'service': [4,6], 'practice': [4,6], 'police': [8,6], 'through': [11,-1], 'tough': [4,5], 'enough': [4,5], 'thorough': [10,-1], 'dough': [10,-1], 'rough': [4,5], 'cough': [3,5], 'snow': [10,-1], 'w': [11,-1], 'walk': [3,7], 'talk': [3,7], 'son':[4,2], 'iron': [13,2], 'anon': [3,2], 'full': [11,1], 'pull': [11,1], 'bull': [11,1], 'put': [11,1], 'push': [11,6], 'book': [11,7], 'won': [4,2], 'what': [4,4], 'who': [11,-1], 'whose': [11,6], 'where': [7,0], 'there': [7,0], 'their': [7,0], 'theirs': [7,6], 'bear': [7,0], 'wear': [7,0], 'show': [10,-1], 'tow': [10,-1], 'sow': [10,-1], 'brow': [5,-1], 'prow': [5,-1], 'allow': [5,-1], 'laugh': [0,5], 'elbow': [10,-1], 'window': [10,-1], 'rainbow': [10,-1], 'shadow': [10,-1], 'ancient': [1,4], 'meant': [1,4], 'dreamt': [1,4], 'learnt': [13,4], 'hymn': [2,2], 'could': [11,4], 'should': [11,4], 'to': [11,-1], 'was': [4,6], 'were': [13,0], 'love': [4,5], 'eye': [9,-1], 'bury': [8,-1], 'your': [11,0], 'heart': [12,4], 'some': [4,2], 'come': [4,2], 'from': [4,2], 'become': [4,2], 'would': [11,4], 'pour': [10,0],'figure': [13,0], 'author': [4,0], 'sure': [11,0], 'rhythm': [4,2], 'every': [8,-1], 'very': [8,-1], 'many': [8,-1], 'any': [8,-1], 'busy': [8,-1], 'easy': [8,-1], 'happy': [8,-1], 'live': [2,5], 'into': [11,-1], 'soul': [10,2], 'only': [8,-1], 'earth': [13,10], 'though': [10,-1], 'thought': [3,4], 'bought': [3,4], 'brought': [3,4], 'ought': [3,4], 'said': [1,4], 'dead': [1,4], 'word': [13,4], 'heard': [13,4], 'death': [1,10], 'head': [1,4], 'once': [4,6], 'great': [7,4], 'young': [4,2], 'among': [4,2], 'yon': [3,2], 'wh': [-1,-1], 'door': [10,0], 'find': [9,4], 'mind': [9,4], 'kind': [9,4], 'behind': [9,4], 'blind': [9,4], 'wild': [9,4], 'give': [2,5], 'beauty': [8,-1], 'duty': [8,-1], 'move': [11,5], 'above': [4,5], 'prove': [11,5], 'have': [0,5], 'whom': [11,2], 'warm': [10,2], 'done': [4,2], 'gone': [3,2], 'behind': [9,4], 'none': [4,2], 'most': [10,4], 'ghost': [10,4], 'host': [10,4], 'post': [10,4], 'travel': [4,1], 'broad': [3,4],'veil': [7,1],'tread': [1,4], 'bread': [1,4], 'ocean': [4,2], 'truth': [11,10], 'human': [4,2], 'woman': [4,2], 'unto': [11,-1], 'worm': [13,4], 'blood': [4,4], 'instead': [1,4], 'spread': [1,4], 'ahead': [1,4], 'breadth': [1,10], 'breath': [1,10], 'valley': [8,-1], 'key': [8,-1], 'journey': [8,-1], 'honey': [8,-1], 'money': [8,-1], 'chimney': [8,-1], 'monkey': [8,-1], 'donkey': [8,-1], 'alley': [8,-1], 'trolley': [8,-1], 'galley': [8,-1], 'silly': [8,-1], 'lily': [8,-1], 'barley': [8,-1], 'quiet': [4,4], 'else': [1,1], 'christian': [4,2], 'shadow': [10,-1], 'meadow': [10,-1], 'mow': [10,-1], 'bestow': [10,-1], 'widow': [10,-1], 'friend': [1,4], 'source': [10,6], 'course': [10,6], 'lyre': [9,0], 'curse': [13,6], 'rehearse': [13,6], 'are': [12,0], 'genuine': [2,2], 'fly': [9,-1], 'july': [9,-1], 'reply': [9,-1], 'butterfly': [9,-1], 'ply': [9,-1], 'supply': [9,-1], 'folk': [10,7], 'welcome': [4,2], 'wash': [3,6], 'child': [9,4], 'deaf': [1,4], 'league': [8,7], 'plague': [7,7], 'vague': [7,7], 'overhead': [1,4] } DEFINED_METERS = { "'re": 0, "'ve": 0, 'shakespeare': 2, 'every': 2, 'leaves': 1, 'evening': 2, 'tongue': 1, 'lovely': 2, 'quiet': 2, 'people': 2, 'something': 2, 'beautiful': 3, 'lyre': 1, 'hymn': 1, 'forego': 2, 'therefore': 2, 'somewhere': 2 } for word in BRITISH_OUR: DEFINED_RHYMES[word] = [4,0]