bardgpt / constants.py
crackalamoo's picture
Upload files for inference
1427339
import sys
def parseArgv(argv):
data = {
'--model-type': 'b',
'--vocab-size': 4096,
'--ngram-n': 4,
'--transformer-n': 32,
'--kaggle': False,
'--rhyme-size': 4,
'--meter-size': 3,
}
startParse = 1
if len(argv) > 1 and argv[1] in ['n','t','b']:
data['--model-type'] = argv[1]
startParse = 2
for i in range(startParse, len(argv)):
if argv[i] in data:
if argv[i] == '--kaggle':
data[argv[i]] = True
elif argv[i] == '--model-type':
data[argv[i]] = argv[i+1]
else:
data[argv[i]] = int(argv[i+1])
return data
sysArgs = parseArgv(sys.argv)
VOCAB_SIZE = sysArgs['--vocab-size']
NGRAM_N = sysArgs['--ngram-n']
TRANSFORMER_N = sysArgs['--transformer-n']
MODEL_TYPE = sysArgs['--model-type'] # n: ngram, t: transformer, b: bard
KAGGLE = sysArgs['--kaggle']
TOKEN_SKIP = 2 if MODEL_TYPE == 'n' else TRANSFORMER_N-1
RHYME_STACK_SIZE = sysArgs['--rhyme-size']
METER_STACK_SIZE = sysArgs['--meter-size']
VOWEL_TYPES = 14
CONSONANT_TYPES = 10
TITLE = " <TITLE> "
NEWLINE = " <NEWLINE> "
BRITISH_OUR = ['neighbor','color','flavor','splendor','labor','favor','fervor','savior','vapor','endeavor','parlor',
'clamor','harbor','splendor','behavior','rumor','humor','savor','valor','armor','honor','odor']
est_set = set(['for','t','n','liv','b','di','j','r','p','v','w','b','gu',
'l','eld','pr','inter','sever','hug','earn','smil',
'qu','ch','bl','conqu','pri'])
ed_set = set(['he','you','they','we','will','mov','w','wretch','fe','wav','gre',
'till','far','fell','de','b','f','l','re','hopp','ne','br',
'mann','bann','bl','pleas','mark','m','sh','se','spe','ble',
'lov','ste','rous','arm','bar','di','unmov','asham','cre'])
d_set = set(['be', 'she','we','see','re','fe','rowe','fee','le','seale','dee','ne',
'reveale','traine','warme','coole','saile','sweate','mowe','cooke',
'gree','warne','aire','seate','ree','temp','doome','helpe','feare',
'neare','designe','adde','parte','repeate','gaine','parke','mourne',
'backe','cleane','raine','charme','climbe','wee','fle','barbe','roote',
'waite','fixe','hee','ende','wounde','pointe','earne','cree','matte',
'kisse','haire','marke','neede','summe','farme','poure','owne','showe',
'crowne','entere','evene','turne','crouche','laye','jade','recorde',
'flowe','looke','nee','calle','learne','spe','ble','fille','washe',
'boxe','talke','returne','sacre','dreame','pulle','seeme','calle',
'prie','forme','ruine','lighte','appeare','adorne','aske','locke',
'crosse','misse','arme','towe','shoute','heade','burne','faile','bowe',
'rolle','walke','heape','obtaine'])
c_ed_set = set(['ad','cares','jag','pis','kis','mat','er','mis','cal','pas','fil','wo'])
y_ed_set = set(['drapery','city','weary'])
s_set = set(['','a','i','it','his','her',"'",'their','one','will','your','our','down','pant','wa',
'god','well','other','saw','good','new','ye','leave','right','wood',
'ha','thi','hi','jesu','riche','specie','alway','ala','grasse','glorie',
'goe','doe','mas','pis','mi','pi','selve','wherea','prie','masse',
'beautie','jame','misse','san','la','lo','politic','u','ga','bu','tos',
'len'])
st_set = set(['be','we','ne','re','tempe','le','mode', 'fore','le','que','riche','cre','pe',
'harde','sweete','cleane','je','te','che','highe','earne','deepe','meane','prie',
'olde'])
c_est_set = set(['ful','smal'])
er_set = set(['with','she','h','quak','curr','hopp','minist','eth','thund','whisp','whit',
'fev','rememb','inn','rend','de','beak','wand','port','heath','clos','should',
'wrapp','cap','cow','lett','moth','chart','prop','danc','dinn','slumb','tend',
'sever','ladd','falt','eld','aft','hind','flatt','murd','show','flow','sob',
'pray','s','numb','pond','ev','und','wint','shiv','ang','fin','hov','teach',
'clov','ov','oth','riv','barb','post','nev','discov','wat','draw','wait',
'suff','deliv','quiv','silv','cov','shelt','los','m','slipp','batt','plast',
'bitt','p','be','pe','ti','pi','ve','se','us','ton','min','sew','lit','tig',
'lat','inn','out','off','ent','low','pow','less','wond','mann','care','lov',
'rath','form','summ','bett','found','quart','tap','pap','record','shudd','pitch',
'shatt','tatt','rid','butt','mis','bould','bord','glimm','answ','wav','walk',
'glitt','gath','stick','care','temp','fish','corn','flick','dress','feath','met',
'broth','both','lock','tow','conqu','che','encount','head','alt','mutt','san'])
c_er_set = set(['of','in','but','up','man','let','shut','sum','slip','din','flit',
'mat','bat','bit','lad','ban','bet','ad','flat','pe','ful','smal','up',
'pis','kis','slip','lat','cop','begin','shud','washe','shat','tat','lit',
'glim','lay','lad','cal','glit','pas','fil','ham','sup','pep','rub','chat',
'skip','alte','flut','mut','scat','dip','stag','wo'])
r_set = set(['he',"'re",'rule','cottage','quake','cove','clove','warble','prime','lowe',
'cape','tempe','late','e','rive','dee','eve','wave','me','rathe','meter',
'anothe','mothe','mowe','sweate','saile','leade','hithe','warme','coole',
'reaveale','traine','chee','manne','shee','uppe','withe','designe','neare',
'barbe','darke','banne','pete','faste','soone','oute','rende','parke',
'keepe','lee','rooste','cleane','sweete','bothe','harde','sleepe','poste',
'loude','climbe','flowe','drawe','waite','highe','lathe','summe','fathe',
'cove','farme','lose','showe','deepe','longe','hove','teache','pe','rule',
'freeze','compute','consume','recorde','fille','washe','boxe','talke',
'spide','meane','outside','inside','laye','lighte','reade','ladde',
'eage','forme','coppe','answe','aske','dinne','wave','glitte','feve',
'butte','gathe','pape','broke','matte','time','locke','olde','towe','inne',
'shoute','heade','cunne','burne','singe','mutte','rolle','dippe','walke'])
ing_set = set(['','us','s','st','n','wan','din','k','heav','w','morn','cloth','br','wav',
'even','cl','noth','charm','th','spr','bl','p','r','d','tempt','m','s','z',
'ch','mean','exact','bless','train','lov','str','build','pleas','slid','light',
'stock','feel','bo','gap'])
c_ing_set = set(['er','wed','ad','ear','begin','pis','kis','er','mis','cal','pas','fil'])
e_ing_set = set(['the','we','bee','bore','lute','ne','re','please','displease','tide','clothe','ke',
'neare','wounde','che','feare','doome','helpe','designe','evene','dye',
'adde','parte','repeate','gaine','parke','mourne','backe','cleane','charme',
'climbe','waite','fixe','raine','ende','wounde','pointe','earne','neede',
'summe','poure','owne','crowne','entere','turne','crouche','ble','laye',
'recorde','flowe','calle','morne','learne','fille','washe','boxe','talke',
'kisse','returne','dreame','pulle','seeme','matte','forme','meane','ruine',
'lighte','reade','appeare','adorne','stocke','aske','locke','calle','crosse',
'misse','towe','shoute','feele','heade','burne','singe','faile','bowe',
'rolle','walke','heape','obtaine'])
y_s_set = set(['ry'])
y_er_set = set(['by'])
y_est_set = set(['pry'])
BANNED_TOKENS = ['1','2','3','y','e','l','maud','olaf','lorenzo','de','oscar',
'r','d','f','p','agnes','eulalie','kate','niam','thel','asius',
'saadi','\\\\','juanna','johnson','dudù','moore','xanthus',
'arjun','pandav','draupadi','bhishma','karna','pandu','bhima',
'duryodhan','drona','abhimanyu','yudhishthir','agamemnon','narad',
'antilochus','diomed','helen','ulysses','achilles','nestor',
'menelaus','patroclus','hector','aeneas','laertes','priam',
'penelope','eumaeus','telemachus','euryclea','sarpedon','peleus',
'polydamas','glaucus','antenor','idomeneus','rishi','boreas',
'phaeacian','savitri','kuru','diana','panchala','ida','ithaca',
'matsya','pritha','salya','kripa','hastina','sisupala','vidura',
'dhrita','rashtra','jayadratha','lamia','medon','highth','haydée',
'haidée', 'edward','ithacus',
'lenore','à','negro','juan','harold','etc','allan','adeline',
'+++++++++++++','c','j','h','4','5','6','7','8','9','10',
'11','12','*','x','b','/','k','g','ii','s','u','da','el',
'le','que','~','000','m','thu','thir','13','14','15','16','17',
'18','19','20','30','th','bu','ri','w','v','al','iv','wi',
'la','las','t','ma','ha','mee','ne','em','ry','di','st',
'yr','ful','iii','bo','faire','tos','ai','en','et','sug',
'ga','wel','hee','hon','n','wan','ut','te','ad','hym','na']
PUNCT = set(['.', ',', '!', '?', ':', ';', '-'])
VOWELS = set(['a','e','i','o','u'])
SOMETIMES_VOWELS = VOWELS.union(['y','w'])
DEFINED_RHYMES = {
"'ll": [4,1], "=er": [13,0], "the": [4,-1], 'a': [4,-1], 'we': [8,-1], 'ye': [8,-1], 'e': [8,-1],
'zimbabwe': [7,-1], 'one': [4,2], 'two': [11,-1], 'oh': [10,-1], 'ah': [12,-1], 'i': [9,-1],
'you': [11,-1], 'own': [10,2], 'know': [10,-1], 'do': [11,-1], 'upon': [3,2], 'whereon': [3,2],
'world': [13,4], 'learn': [13,2], 'earn': [13,2], 'yearn': [13,2], 'of': [4,5], 'service': [4,6],
'practice': [4,6], 'police': [8,6], 'through': [11,-1], 'tough': [4,5], 'enough': [4,5],
'thorough': [10,-1], 'dough': [10,-1], 'rough': [4,5], 'cough': [3,5], 'snow': [10,-1],
'w': [11,-1], 'walk': [3,7], 'talk': [3,7], 'son':[4,2], 'iron': [13,2], 'anon': [3,2],
'full': [11,1], 'pull': [11,1], 'bull': [11,1], 'put': [11,1], 'push': [11,6], 'book': [11,7],
'won': [4,2], 'what': [4,4], 'who': [11,-1], 'whose': [11,6], 'where': [7,0], 'there': [7,0],
'their': [7,0], 'theirs': [7,6], 'bear': [7,0], 'wear': [7,0], 'show': [10,-1], 'tow': [10,-1],
'sow': [10,-1], 'brow': [5,-1], 'prow': [5,-1], 'allow': [5,-1], 'laugh': [0,5],
'elbow': [10,-1], 'window': [10,-1], 'rainbow': [10,-1], 'shadow': [10,-1], 'ancient': [1,4],
'meant': [1,4], 'dreamt': [1,4], 'learnt': [13,4], 'hymn': [2,2], 'could': [11,4], 'should': [11,4],
'to': [11,-1], 'was': [4,6], 'were': [13,0], 'love': [4,5], 'eye': [9,-1], 'bury': [8,-1],
'your': [11,0], 'heart': [12,4], 'some': [4,2], 'come': [4,2], 'from': [4,2], 'become': [4,2],
'would': [11,4], 'pour': [10,0],'figure': [13,0], 'author': [4,0], 'sure': [11,0], 'rhythm': [4,2],
'every': [8,-1], 'very': [8,-1], 'many': [8,-1], 'any': [8,-1], 'busy': [8,-1], 'easy': [8,-1],
'happy': [8,-1], 'live': [2,5], 'into': [11,-1], 'soul': [10,2], 'only': [8,-1], 'earth': [13,10],
'though': [10,-1], 'thought': [3,4], 'bought': [3,4], 'brought': [3,4], 'ought': [3,4],
'said': [1,4], 'dead': [1,4], 'word': [13,4], 'heard': [13,4], 'death': [1,10], 'head': [1,4],
'once': [4,6], 'great': [7,4], 'young': [4,2], 'among': [4,2], 'yon': [3,2], 'wh': [-1,-1],
'door': [10,0], 'find': [9,4], 'mind': [9,4], 'kind': [9,4], 'behind': [9,4], 'blind': [9,4],
'wild': [9,4], 'give': [2,5], 'beauty': [8,-1], 'duty': [8,-1], 'move': [11,5], 'above': [4,5],
'prove': [11,5], 'have': [0,5], 'whom': [11,2], 'warm': [10,2], 'done': [4,2], 'gone': [3,2],
'behind': [9,4], 'none': [4,2], 'most': [10,4], 'ghost': [10,4], 'host': [10,4], 'post': [10,4],
'travel': [4,1], 'broad': [3,4],'veil': [7,1],'tread': [1,4], 'bread': [1,4], 'ocean': [4,2],
'truth': [11,10], 'human': [4,2], 'woman': [4,2], 'unto': [11,-1], 'worm': [13,4], 'blood': [4,4],
'instead': [1,4], 'spread': [1,4], 'ahead': [1,4], 'breadth': [1,10], 'breath': [1,10],
'valley': [8,-1], 'key': [8,-1], 'journey': [8,-1], 'honey': [8,-1], 'money': [8,-1],
'chimney': [8,-1], 'monkey': [8,-1], 'donkey': [8,-1], 'alley': [8,-1], 'trolley': [8,-1],
'galley': [8,-1], 'silly': [8,-1], 'lily': [8,-1], 'barley': [8,-1], 'quiet': [4,4],
'else': [1,1], 'christian': [4,2], 'shadow': [10,-1], 'meadow': [10,-1], 'mow': [10,-1],
'bestow': [10,-1], 'widow': [10,-1], 'friend': [1,4], 'source': [10,6], 'course': [10,6],
'lyre': [9,0], 'curse': [13,6], 'rehearse': [13,6], 'are': [12,0], 'genuine': [2,2],
'fly': [9,-1], 'july': [9,-1], 'reply': [9,-1], 'butterfly': [9,-1], 'ply': [9,-1],
'supply': [9,-1], 'folk': [10,7], 'welcome': [4,2], 'wash': [3,6], 'child': [9,4],
'deaf': [1,4], 'league': [8,7], 'plague': [7,7], 'vague': [7,7], 'overhead': [1,4]
}
DEFINED_METERS = {
"'re": 0, "'ve": 0, 'shakespeare': 2, 'every': 2, 'leaves': 1, 'evening': 2,
'tongue': 1, 'lovely': 2, 'quiet': 2, 'people': 2, 'something': 2,
'beautiful': 3, 'lyre': 1, 'hymn': 1, 'forego': 2, 'therefore': 2,
'somewhere': 2
}
for word in BRITISH_OUR:
DEFINED_RHYMES[word] = [4,0]